diff --git a/clang/include/clang/Basic/BuiltinsLoongArch.def b/clang/include/clang/Basic/BuiltinsLoongArch.def index 7f2c8403410dd3ef491148c89c70c082ffc6b4df..95359a3fdc711d4dd7ed15a9fb4ba481980af588 100644 --- a/clang/include/clang/Basic/BuiltinsLoongArch.def +++ b/clang/include/clang/Basic/BuiltinsLoongArch.def @@ -15,47 +15,14 @@ # define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS) #endif -// TODO: Support more builtins. -// TODO: Added feature constraints. -TARGET_BUILTIN(__builtin_loongarch_cacop_d, "vLiULiLi", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_cacop_w, "viUii", "nc", "32bit") -TARGET_BUILTIN(__builtin_loongarch_dbar, "vIUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_ibar, "vIUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_movfcsr2gr, "UiIUi", "nc", "f") -TARGET_BUILTIN(__builtin_loongarch_movgr2fcsr, "vIUiUi", "nc", "f") -TARGET_BUILTIN(__builtin_loongarch_break, "vIUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_syscall, "vIUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_cpucfg, "UiUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_asrtle_d, "vLiLi", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_asrtgt_d, "vLiLi", "nc", "64bit") +// Definition of LoongArch basic builtins. +#include "clang/Basic/BuiltinsLoongArchBase.def" -TARGET_BUILTIN(__builtin_loongarch_crc_w_b_w, "iii", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_crc_w_h_w, "iii", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_crc_w_w_w, "iii", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_crc_w_d_w, "iLii", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_crcc_w_b_w, "iii", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_crcc_w_h_w, "iii", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_crcc_w_w_w, "iii", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_crcc_w_d_w, "iLii", "nc", "64bit") +// Definition of LSX builtins. +#include "clang/Basic/BuiltinsLoongArchLSX.def" -TARGET_BUILTIN(__builtin_loongarch_csrrd_w, "UiIUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_csrrd_d, "ULiIUi", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_csrwr_w, "UiUiIUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_csrwr_d, "ULiULiIUi", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_csrxchg_w, "UiUiUiIUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_csrxchg_d, "ULiULiULiIUi", "nc", "64bit") - -TARGET_BUILTIN(__builtin_loongarch_iocsrrd_b, "UiUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_iocsrrd_h, "UiUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_iocsrrd_w, "UiUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_iocsrrd_d, "ULiUi", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_iocsrwr_b, "vUiUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_iocsrwr_h, "vUiUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_iocsrwr_w, "vUiUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_iocsrwr_d, "vULiUi", "nc", "64bit") - -TARGET_BUILTIN(__builtin_loongarch_lddir_d, "LiLiIULi", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_ldpte_d, "vLiIULi", "nc", "64bit") +// Definition of LASX builtins. +#include "clang/Basic/BuiltinsLoongArchLASX.def" #undef BUILTIN #undef TARGET_BUILTIN diff --git a/clang/include/clang/Basic/BuiltinsLoongArchBase.def b/clang/include/clang/Basic/BuiltinsLoongArchBase.def new file mode 100644 index 0000000000000000000000000000000000000000..cbb239223aae3b22e8ef15ee4627c60825aeea39 --- /dev/null +++ b/clang/include/clang/Basic/BuiltinsLoongArchBase.def @@ -0,0 +1,53 @@ +//============------------ BuiltinsLoongArchBase.def -------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the LoongArch-specific basic builtin function database. +// Users of this file must define the BUILTIN macro to make use of this +// information. +// +//===----------------------------------------------------------------------===// + +TARGET_BUILTIN(__builtin_loongarch_cacop_d, "vWiUWiWi", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_cacop_w, "viUii", "nc", "32bit") +TARGET_BUILTIN(__builtin_loongarch_dbar, "vIUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_ibar, "vIUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_movfcsr2gr, "UiIUi", "nc", "f") +TARGET_BUILTIN(__builtin_loongarch_movgr2fcsr, "vIUiUi", "nc", "f") +TARGET_BUILTIN(__builtin_loongarch_break, "vIUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_syscall, "vIUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_cpucfg, "UiUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_asrtle_d, "vWiWi", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_asrtgt_d, "vWiWi", "nc", "64bit") + +TARGET_BUILTIN(__builtin_loongarch_crc_w_b_w, "iii", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_crc_w_h_w, "iii", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_crc_w_w_w, "iii", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_crc_w_d_w, "iWii", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_crcc_w_b_w, "iii", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_crcc_w_h_w, "iii", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_crcc_w_w_w, "iii", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_crcc_w_d_w, "iWii", "nc", "64bit") + +TARGET_BUILTIN(__builtin_loongarch_csrrd_w, "UiIUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_csrrd_d, "UWiIUi", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_csrwr_w, "UiUiIUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_csrwr_d, "UWiUWiIUi", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_csrxchg_w, "UiUiUiIUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_csrxchg_d, "UWiUWiUWiIUi", "nc", "64bit") + +TARGET_BUILTIN(__builtin_loongarch_iocsrrd_b, "UiUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_iocsrrd_h, "UiUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_iocsrrd_w, "UiUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_iocsrrd_d, "UWiUi", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_iocsrwr_b, "vUiUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_iocsrwr_h, "vUiUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_iocsrwr_w, "vUiUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_iocsrwr_d, "vUWiUi", "nc", "64bit") + +TARGET_BUILTIN(__builtin_loongarch_lddir_d, "WiWiIUWi", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_ldpte_d, "vWiIUWi", "nc", "64bit") diff --git a/clang/include/clang/Basic/BuiltinsLoongArchLASX.def b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def new file mode 100644 index 0000000000000000000000000000000000000000..3de200f665b680afdebc08a57c79a844a0783998 --- /dev/null +++ b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def @@ -0,0 +1,982 @@ +//=BuiltinsLoongArchLASX.def - LoongArch Builtin function database -- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the LoongArch-specific LASX builtin function database. +// Users of this file must define the BUILTIN macro to make use of this +// information. +// +//===----------------------------------------------------------------------===// + +TARGET_BUILTIN(__builtin_lasx_xvadd_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvadd_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvadd_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvadd_d, "V4LLiV4LLiV4LLi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvadd_q, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsub_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsub_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsub_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsub_d, "V4LLiV4LLiV4LLi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsub_q, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvaddi_bu, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddi_hu, "V16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddi_wu, "V8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddi_du, "V4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsubi_bu, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubi_hu, "V16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubi_wu, "V8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubi_du, "V4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvneg_b, "V32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvneg_h, "V16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvneg_w, "V8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvneg_d, "V4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsadd_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsadd_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsadd_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsadd_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsadd_bu, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsadd_hu, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsadd_wu, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsadd_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssub_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssub_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssub_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssub_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssub_bu, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssub_hu, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssub_wu, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssub_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvhaddw_h_b, "V16SsV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvhaddw_w_h, "V8SiV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvhaddw_d_w, "V4SLLiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvhaddw_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvhaddw_hu_bu, "V16UsV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvhaddw_wu_hu, "V8UiV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvhaddw_du_wu, "V4ULLiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvhaddw_qu_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvhsubw_h_b, "V16SsV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvhsubw_w_h, "V8SiV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvhsubw_d_w, "V4SLLiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvhsubw_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvhsubw_hu_bu, "V16UsV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvhsubw_wu_hu, "V8UiV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvhsubw_du_wu, "V4ULLiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvhsubw_qu_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvaddwev_h_b, "V16sV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwev_w_h, "V8SiV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwev_d_w, "V4LLiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwev_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvaddwod_h_b, "V16sV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwod_w_h, "V8SiV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwod_d_w, "V4LLiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwod_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsubwev_h_b, "V16sV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwev_w_h, "V8SiV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwev_d_w, "V4LLiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwev_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsubwod_h_b, "V16sV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwod_w_h, "V8SiV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwod_d_w, "V4LLiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwod_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvaddwev_h_bu, "V16sV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwev_w_hu, "V8SiV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwev_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwev_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvaddwod_h_bu, "V16sV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwod_w_hu, "V8SiV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwod_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwod_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsubwev_h_bu, "V16sV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwev_w_hu, "V8SiV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwev_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwev_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsubwod_h_bu, "V16sV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwod_w_hu, "V8SiV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwod_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwod_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvaddwev_h_bu_b, "V16sV32UcV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwev_w_hu_h, "V8SiV16UsV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwev_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwev_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvaddwod_h_bu_b, "V16sV32UcV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwod_w_hu_h, "V8SiV16UsV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwod_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwod_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvavg_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvavg_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvavg_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvavg_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvavg_bu, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvavg_hu, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvavg_wu, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvavg_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvavgr_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvavgr_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvavgr_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvavgr_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvavgr_bu, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvavgr_hu, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvavgr_wu, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvavgr_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvabsd_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvabsd_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvabsd_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvabsd_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvabsd_bu, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvabsd_hu, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvabsd_wu, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvabsd_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvadda_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvadda_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvadda_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvadda_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmax_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmax_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmax_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmax_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmaxi_b, "V32ScV32ScIi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaxi_h, "V16SsV16SsIi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaxi_w, "V8SiV8SiIi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaxi_d, "V4SLLiV4SLLiIi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmax_bu, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmax_hu, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmax_wu, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmax_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmaxi_bu, "V32UcV32UcIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaxi_hu, "V16UsV16UsIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaxi_wu, "V8UiV8UiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaxi_du, "V4ULLiV4ULLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmin_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmin_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmin_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmin_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmini_b, "V32ScV32ScIi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmini_h, "V16SsV16SsIi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmini_w, "V8SiV8SiIi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmini_d, "V4SLLiV4SLLiIi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmin_bu, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmin_hu, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmin_wu, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmin_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmini_bu, "V32UcV32UcIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmini_hu, "V16UsV16UsIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmini_wu, "V8UiV8UiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmini_du, "V4ULLiV4ULLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmul_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmul_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmul_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmul_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmuh_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmuh_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmuh_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmuh_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmuh_bu, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmuh_hu, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmuh_wu, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmuh_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmulwev_h_b, "V16sV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwev_w_h, "V8SiV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwev_d_w, "V4LLiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwev_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmulwod_h_b, "V16sV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwod_w_h, "V8SiV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwod_d_w, "V4LLiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwod_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmulwev_h_bu, "V16sV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwev_w_hu, "V8SiV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwev_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwev_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmulwod_h_bu, "V16sV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwod_w_hu, "V8SiV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwod_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwod_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmulwev_h_bu_b, "V16sV32UcV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwev_w_hu_h, "V8SiV16UsV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwev_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwev_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmulwod_h_bu_b, "V16sV32UcV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwod_w_hu_h, "V8SiV16UsV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwod_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwod_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmadd_b, "V32ScV32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmadd_h, "V16SsV16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmadd_w, "V8SiV8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmadd_d, "V4SLLiV4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmsub_b, "V32ScV32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmsub_h, "V16SsV16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmsub_w, "V8SiV8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmsub_d, "V4SLLiV4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_h_b, "V16sV16sV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_w_h, "V8SiV8SiV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_d_w, "V4LLiV4LLiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_q_d, "V4LLiV4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_h_b, "V16sV16sV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_w_h, "V8SiV8SiV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_d_w, "V4LLiV4LLiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_q_d, "V4LLiV4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_h_bu, "V16UsV16UsV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_w_hu, "V8UiV8UiV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_d_wu, "V4ULLiV4ULLiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_q_du, "V4ULLiV4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_h_bu, "V16UsV16UsV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_w_hu, "V8UiV8UiV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_d_wu, "V4ULLiV4ULLiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_q_du, "V4ULLiV4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_h_bu_b, "V16sV16sV32UcV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_w_hu_h, "V8SiV8SiV16UsV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_d_wu_w, "V4LLiV4LLiV8UiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_q_du_d, "V4LLiV4LLiV4ULLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_h_bu_b, "V16sV16sV32UcV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_w_hu_h, "V8SiV8SiV16UsV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_d_wu_w, "V4LLiV4LLiV8UiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_q_du_d, "V4LLiV4LLiV4ULLiV4LLi", "nc", "lasx") + + +TARGET_BUILTIN(__builtin_lasx_xvdiv_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvdiv_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvdiv_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvdiv_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvdiv_bu, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvdiv_hu, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvdiv_wu, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvdiv_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmod_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmod_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmod_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmod_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmod_bu, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmod_hu, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmod_wu, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmod_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsat_b, "V32ScV32ScIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsat_h, "V16SsV16SsIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsat_w, "V8SiV8SiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsat_d, "V4SLLiV4SLLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsat_bu, "V32UcV32UcIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsat_hu, "V16UsV16UsIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsat_wu, "V8UiV8UiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsat_du, "V4ULLiV4ULLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvexth_h_b, "V16sV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvexth_w_h, "V8SiV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvexth_d_w, "V4LLiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvexth_q_d, "V4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvexth_hu_bu, "V16UsV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvexth_wu_hu, "V8UiV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvexth_du_wu, "V4ULLiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvexth_qu_du, "V4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_vext2xv_h_b, "V16sV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_vext2xv_w_b, "V8SiV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_vext2xv_d_b, "V4LLiV32c", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_vext2xv_w_h, "V8SiV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_vext2xv_d_h, "V4LLiV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_vext2xv_d_w, "V4LLiV8Si", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_vext2xv_hu_bu, "V16sV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_vext2xv_wu_bu, "V8SiV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_vext2xv_du_bu, "V4LLiV32c", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_vext2xv_wu_hu, "V8SiV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_vext2xv_du_hu, "V4LLiV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_vext2xv_du_wu, "V4LLiV8Si", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsigncov_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsigncov_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsigncov_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsigncov_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmskltz_b, "V32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmskltz_h, "V16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmskltz_w, "V8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmskltz_d, "V4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmskgez_b, "V32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmsknz_b, "V16sV16s", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvldi, "V4LLiIi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrepli_b, "V32cIi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrepli_h, "V16sIi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrepli_w, "V8iIi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrepli_d, "V4LLiIi", "nc", "lasx") + + +TARGET_BUILTIN(__builtin_lasx_xvand_v, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvor_v, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvxor_v, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvnor_v, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvandn_v, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvorn_v, "V32ScV32ScV32Sc", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvandi_b, "V32UcV32UcIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvori_b, "V32UcV32UcIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvxori_b, "V32UcV32UcIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvnori_b, "V32UcV32UcIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsll_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsll_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsll_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsll_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvslli_b, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslli_h, "V16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslli_w, "V8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslli_d, "V4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrl_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrl_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrl_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrl_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrli_b, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrli_h, "V16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrli_w, "V8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrli_d, "V4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsra_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsra_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsra_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsra_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrai_b, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrai_h, "V16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrai_w, "V8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrai_d, "V4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvrotr_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrotr_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrotr_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrotr_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvrotri_b, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrotri_h, "V16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrotri_w, "V8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrotri_d, "V4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsllwil_h_b, "V16sV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsllwil_w_h, "V8SiV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsllwil_d_w, "V4LLiV8SiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvextl_q_d, "V4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsllwil_hu_bu, "V16UsV32UcIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsllwil_wu_hu, "V8UiV16UsIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsllwil_du_wu, "V4ULLiV8UiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvextl_qu_du, "V4LLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrlr_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlr_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlr_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlr_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrlri_b, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlri_h, "V16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlri_w, "V8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlri_d, "V4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrar_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrar_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrar_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrar_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrari_b, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrari_h, "V16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrari_w, "V8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrari_d, "V4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrln_b_h, "V32ScV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrln_h_w, "V16sV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrln_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsran_b_h, "V32ScV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsran_h_w, "V16sV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsran_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrlni_b_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlni_h_w, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlni_w_d, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrani_b_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrani_h_w, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrani_w_d, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrani_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrlrn_b_h, "V32ScV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlrn_h_w, "V16sV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlrn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrarn_b_h, "V32ScV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrarn_h_w, "V16sV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrarn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrlrni_b_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlrni_h_w, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlrni_w_d, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlrni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrarni_b_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrarni_h_w, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrarni_w_d, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrarni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrln_b_h, "V32ScV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrln_h_w, "V16sV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrln_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssran_b_h, "V32ScV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssran_h_w, "V16sV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssran_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrln_bu_h, "V32UcV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrln_hu_w, "V16UsV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrln_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssran_bu_h, "V32UcV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssran_hu_w, "V16UsV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssran_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrlni_b_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlni_h_w, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlni_w_d, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrani_b_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrani_h_w, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrani_w_d, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrani_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrlrni_bu_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlrni_hu_w, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlrni_wu_d, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlrni_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrani_bu_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrani_hu_w, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrani_wu_d, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrani_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrlrn_b_h, "V32ScV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlrn_h_w, "V16sV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlrn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrarn_b_h, "V32ScV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrarn_h_w, "V16sV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrarn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrlrn_bu_h, "V32UcV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlrn_hu_w, "V16UsV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlrn_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrarn_bu_h, "V32UcV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrarn_hu_w, "V16UsV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrarn_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrlrni_b_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlrni_h_w, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlrni_w_d, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlrni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrarni_b_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrarni_h_w, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrarni_w_d, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrarni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrlni_bu_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlni_hu_w, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlni_wu_d, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlni_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrarni_bu_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrarni_hu_w, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrarni_wu_d, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrarni_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvclo_b, "V32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvclo_h, "V16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvclo_w, "V8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvclo_d, "V4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvclz_b, "V32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvclz_h, "V16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvclz_w, "V8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvclz_d, "V4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvpcnt_b, "V32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpcnt_h, "V16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpcnt_w, "V8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpcnt_d, "V4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvbitclr_b, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitclr_h, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitclr_w, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitclr_d, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvbitclri_b, "V32UcV32UcIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitclri_h, "V16UsV16UsIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitclri_w, "V8UiV8UiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitclri_d, "V4ULLiV4ULLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvbitset_b, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitset_h, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitset_w, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitset_d, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvbitseti_b, "V32UcV32UcIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitseti_h, "V16UsV16UsIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitseti_w, "V8UiV8UiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitseti_d, "V4ULLiV4ULLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvbitrev_b, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitrev_h, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitrev_w, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitrev_d, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvbitrevi_b, "V32UcV32UcIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitrevi_h, "V16UsV16UsIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitrevi_w, "V8UiV8UiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitrevi_d, "V4ULLiV4ULLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfrstp_b, "V32ScV32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfrstp_h, "V16SsV16SsV16SsV16Ss", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfrstpi_b, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfrstpi_h, "V16sV16sV16sIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfadd_s, "V8fV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfadd_d, "V4dV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfsub_s, "V8fV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfsub_d, "V4dV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfmul_s, "V8fV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfmul_d, "V4dV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfdiv_s, "V8fV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfdiv_d, "V4dV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfmadd_s, "V8fV8fV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfmadd_d, "V4dV4dV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfmsub_s, "V8fV8fV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfmsub_d, "V4dV4dV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfnmadd_s, "V8fV8fV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfnmadd_d, "V4dV4dV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfnmsub_s, "V8fV8fV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfnmsub_d, "V4dV4dV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfmax_s, "V8fV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfmax_d, "V4dV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfmin_s, "V8fV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfmin_d, "V4dV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfmaxa_s, "V8fV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfmaxa_d, "V4dV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfmina_s, "V8fV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfmina_d, "V4dV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvflogb_s, "V8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvflogb_d, "V4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfclass_s, "V8iV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfclass_d, "V4LLiV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfsqrt_s, "V8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfsqrt_d, "V4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfrecip_s, "V8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfrecip_d, "V4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfrsqrt_s, "V8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfrsqrt_d, "V4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcvtl_s_h, "V8fV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcvth_s_h, "V8fV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcvtl_d_s, "V4dV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcvth_d_s, "V4dV8f", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcvt_h_s, "V16sV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcvt_s_d, "V8fV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfrintrne_s, "V8SiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfrintrne_d, "V4LLiV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfrintrz_s, "V8SiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfrintrz_d, "V4LLiV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfrintrp_s, "V8SiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfrintrp_d, "V4LLiV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfrintrm_s, "V8SiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfrintrm_d, "V4LLiV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfrint_s, "V8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfrint_d, "V4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftintrne_w_s, "V8SiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftintrne_l_d, "V4LLiV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftintrz_w_s, "V8SiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftintrz_l_d, "V4LLiV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftintrp_w_s, "V8SiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftintrp_l_d, "V4LLiV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftintrm_w_s, "V8SiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftintrm_l_d, "V4LLiV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftint_w_s, "V8SiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftint_l_d, "V4SLLiV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftintrz_wu_s, "V8UiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftintrz_lu_d, "V4ULLiV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftint_wu_s, "V8UiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftint_lu_d, "V4ULLiV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftintrne_w_d, "V8SiV4dV4d", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftintrz_w_d, "V8SiV4dV4d", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftintrp_w_d, "V8SiV4dV4d", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftintrm_w_d, "V8SiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftint_w_d, "V8SiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftintrnel_l_s, "V4LLiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftintrneh_l_s, "V4LLiV8f", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftintrzl_l_s, "V4LLiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftintrzh_l_s, "V4LLiV8f", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftintrpl_l_s, "V4LLiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftintrph_l_s, "V4LLiV8f", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftintrml_l_s, "V4LLiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftintrmh_l_s, "V4LLiV8f", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftintl_l_s, "V4LLiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftinth_l_s, "V4LLiV8f", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvffint_s_w, "V8fV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvffint_d_l, "V4dV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvffint_s_wu, "V8fV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvffint_d_lu, "V4dV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvffintl_d_w, "V4dV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvffinth_d_w, "V4dV8Si", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvffint_s_l, "V8fV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvseq_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvseq_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvseq_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvseq_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvseqi_b, "V32ScV32ScISi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvseqi_h, "V16SsV16SsISi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvseqi_w, "V8SiV8SiISi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvseqi_d, "V4SLLiV4SLLiISi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsle_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsle_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsle_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsle_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvslei_b, "V32ScV32ScISi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslei_h, "V16SsV16SsISi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslei_w, "V8SiV8SiISi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslei_d, "V4SLLiV4SLLiISi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsle_bu, "V32ScV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsle_hu, "V16SsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsle_wu, "V8SiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsle_du, "V4SLLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvslei_bu, "V32ScV32UcIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslei_hu, "V16SsV16UsIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslei_wu, "V8SiV8UiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslei_du, "V4SLLiV4ULLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvslt_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslt_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslt_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslt_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvslti_b, "V32ScV32ScISi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslti_h, "V16SsV16SsISi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslti_w, "V8SiV8SiISi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslti_d, "V4SLLiV4SLLiISi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvslt_bu, "V32ScV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslt_hu, "V16SsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslt_wu, "V8SiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslt_du, "V4SLLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvslti_bu, "V32ScV32UcIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslti_hu, "V16SsV16UsIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslti_wu, "V8SiV8UiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslti_du, "V4SLLiV4ULLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_caf_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_caf_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cun_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cun_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_ceq_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_ceq_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cueq_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cueq_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_clt_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_clt_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cult_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cult_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cle_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cle_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cule_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cule_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cne_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cne_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cor_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cor_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cune_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cune_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_saf_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_saf_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sun_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sun_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_seq_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_seq_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sueq_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sueq_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_slt_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_slt_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sult_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sult_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sle_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sle_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sule_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sule_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sne_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sne_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sor_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sor_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sune_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sune_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvbitsel_v, "V32UcV32UcV32UcV32Uc", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvbitseli_b, "V32UcV32UcV32UcIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvreplgr2vr_b, "V32Sci", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvreplgr2vr_h, "V16Ssi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvreplgr2vr_w, "V8Sii", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvreplgr2vr_d, "V4SLLiLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvinsgr2vr_w, "V8SiV8SiiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvinsgr2vr_d, "V4SLLiV4SLLiLLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_w, "iV8SiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_d, "LLiV4SLLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_wu, "iV8UiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_du, "LLiV4ULLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvreplve_b, "V32cV32cUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvreplve_h, "V16sV16sUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvreplve_w, "V8iV8iUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvreplve_d, "V4LLiV4LLiUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_b, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_h, "V16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_w, "V8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_d, "V4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvreplve0_b, "V32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvreplve0_h, "V16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvreplve0_w, "V8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvreplve0_d, "V4SLLiV4SLLi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvreplve0_q, "V32ScV32Sc", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvinsve0_w, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvinsve0_d, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvpickve_w, "V8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpickve_d, "V4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvpickve_w_f, "V8fV8fIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpickve_d_f, "V4dV4dIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvbsll_v, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbsrl_v, "V32cV32cIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvpackev_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpackev_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpackev_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpackev_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvpackod_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpackod_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpackod_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpackod_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvpickev_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpickev_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpickev_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpickev_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvpickod_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpickod_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpickod_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpickod_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvilvl_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvilvl_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvilvl_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvilvl_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvilvh_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvilvh_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvilvh_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvilvh_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvshuf_b, "V32UcV32UcV32UcV32Uc", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvshuf_h, "V16sV16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvshuf_w, "V8iV8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvshuf_d, "V4LLiV4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvperm_w, "V8iV8iV8i", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvshuf4i_b, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvshuf4i_h, "V16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvshuf4i_w, "V8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvshuf4i_d, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvpermi_w, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpermi_d, "V4LLiV4LLiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpermi_q, "V32cV32cV32cIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvextrins_b, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvextrins_h, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvextrins_w, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvextrins_d, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvld, "V32ScvC*Ii", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvst, "vV32Scv*Ii", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvldx, "V32ScvC*LLi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvstx, "vV32Scv*LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvldrepl_b, "V32cvC*Ii", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvldrepl_h, "V16svC*Ii", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvldrepl_w, "V8ivC*Ii", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvldrepl_d, "V4LLivC*Ii", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvstelm_b, "vV32Scv*IiUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvstelm_h, "vV16Ssv*IiUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvstelm_w, "vV8Siv*IiUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvstelm_d, "vV4SLLiv*IiUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xbz_v, "iV32Uc", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xbnz_v, "iV32Uc", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xbz_b, "iV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xbz_h, "iV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xbz_w, "iV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xbz_d, "iV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xbnz_b, "iV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xbnz_h, "iV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xbnz_w, "iV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xbnz_d, "iV4ULLi", "nc", "lasx") diff --git a/clang/include/clang/Basic/BuiltinsLoongArchLSX.def b/clang/include/clang/Basic/BuiltinsLoongArchLSX.def new file mode 100644 index 0000000000000000000000000000000000000000..8e6aec886c50cd912d5993809cd9bf26b1f92da6 --- /dev/null +++ b/clang/include/clang/Basic/BuiltinsLoongArchLSX.def @@ -0,0 +1,953 @@ +//=============------------- BuiltinsLoongArchLSX.def --------------- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the LoongArch-specific LSX builtin function database. +// Users of this file must define the BUILTIN macro to make use of this +// information. +// +//===----------------------------------------------------------------------===// + +TARGET_BUILTIN(__builtin_lsx_vadd_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vadd_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vadd_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vadd_d, "V2LLiV2LLiV2LLi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vadd_q, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsub_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsub_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsub_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsub_d, "V2LLiV2LLiV2LLi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsub_q, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vaddi_bu, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddi_hu, "V8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddi_wu, "V4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddi_du, "V2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsubi_bu, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubi_hu, "V8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubi_wu, "V4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubi_du, "V2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vneg_b, "V16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vneg_h, "V8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vneg_w, "V4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vneg_d, "V2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsadd_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsadd_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsadd_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsadd_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsadd_bu, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsadd_hu, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsadd_wu, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsadd_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssub_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssub_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssub_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssub_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssub_bu, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssub_hu, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssub_wu, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssub_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vhaddw_h_b, "V8SsV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vhaddw_w_h, "V4SiV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vhaddw_d_w, "V2SLLiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vhaddw_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vhaddw_hu_bu, "V8UsV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vhaddw_wu_hu, "V4UiV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vhaddw_du_wu, "V2ULLiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vhaddw_qu_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vhsubw_h_b, "V8SsV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vhsubw_w_h, "V4SiV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vhsubw_d_w, "V2SLLiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vhsubw_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vhsubw_hu_bu, "V8UsV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vhsubw_wu_hu, "V4UiV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vhsubw_du_wu, "V2ULLiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vhsubw_qu_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vaddwev_h_b, "V8sV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwev_w_h, "V4SiV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwev_d_w, "V2LLiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwev_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vaddwod_h_b, "V8sV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwod_w_h, "V4SiV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwod_d_w, "V2LLiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwod_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsubwev_h_b, "V8sV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwev_w_h, "V4SiV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwev_d_w, "V2LLiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwev_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsubwod_h_b, "V8sV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwod_w_h, "V4SiV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwod_d_w, "V2LLiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwod_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vaddwev_h_bu, "V8sV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwev_w_hu, "V4SiV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwev_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwev_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vaddwod_h_bu, "V8sV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwod_w_hu, "V4SiV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwod_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwod_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsubwev_h_bu, "V8sV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwev_w_hu, "V4SiV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwev_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwev_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsubwod_h_bu, "V8sV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwod_w_hu, "V4SiV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwod_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwod_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vaddwev_h_bu_b, "V8sV16UcV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwev_w_hu_h, "V4SiV8UsV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwev_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwev_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vaddwod_h_bu_b, "V8sV16UcV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwod_w_hu_h, "V4SiV8UsV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwod_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwod_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vavg_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vavg_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vavg_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vavg_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vavg_bu, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vavg_hu, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vavg_wu, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vavg_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vavgr_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vavgr_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vavgr_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vavgr_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vavgr_bu, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vavgr_hu, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vavgr_wu, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vavgr_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vabsd_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vabsd_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vabsd_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vabsd_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vabsd_bu, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vabsd_hu, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vabsd_wu, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vabsd_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vadda_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vadda_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vadda_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vadda_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmax_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmax_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmax_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmax_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmaxi_b, "V16ScV16ScIi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaxi_h, "V8SsV8SsIi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaxi_w, "V4SiV4SiIi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaxi_d, "V2SLLiV2SLLiIi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmax_bu, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmax_hu, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmax_wu, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmax_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmaxi_bu, "V16UcV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaxi_hu, "V8UsV8UsIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaxi_wu, "V4UiV4UiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaxi_du, "V2ULLiV2ULLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmin_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmin_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmin_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmin_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmini_b, "V16ScV16ScIi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmini_h, "V8SsV8SsIi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmini_w, "V4SiV4SiIi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmini_d, "V2SLLiV2SLLiIi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmin_bu, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmin_hu, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmin_wu, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmin_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmini_bu, "V16UcV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmini_hu, "V8UsV8UsIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmini_wu, "V4UiV4UiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmini_du, "V2ULLiV2ULLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmul_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmul_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmul_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmul_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmuh_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmuh_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmuh_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmuh_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmuh_bu, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmuh_hu, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmuh_wu, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmuh_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmulwev_h_b, "V8sV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwev_w_h, "V4SiV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwev_d_w, "V2LLiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwev_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmulwod_h_b, "V8sV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwod_w_h, "V4SiV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwod_d_w, "V2LLiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwod_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmulwev_h_bu, "V8sV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwev_w_hu, "V4SiV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwev_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwev_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmulwod_h_bu, "V8sV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwod_w_hu, "V4SiV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwod_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwod_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmulwev_h_bu_b, "V8sV16UcV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwev_w_hu_h, "V4SiV8UsV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwev_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwev_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmulwod_h_bu_b, "V8sV16UcV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwod_w_hu_h, "V4SiV8UsV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwod_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwod_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmadd_b, "V16ScV16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmadd_h, "V8SsV8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmadd_w, "V4SiV4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmadd_d, "V2SLLiV2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmsub_b, "V16ScV16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmsub_h, "V8SsV8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmsub_w, "V4SiV4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmsub_d, "V2SLLiV2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmaddwev_h_b, "V8sV8sV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwev_w_h, "V4SiV4SiV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwev_d_w, "V2LLiV2LLiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwev_q_d, "V2LLiV2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmaddwod_h_b, "V8sV8sV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwod_w_h, "V4SiV4SiV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwod_d_w, "V2LLiV2LLiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwod_q_d, "V2LLiV2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmaddwev_h_bu, "V8UsV8UsV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwev_w_hu, "V4UiV4UiV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwev_d_wu, "V2ULLiV2ULLiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwev_q_du, "V2ULLiV2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmaddwod_h_bu, "V8UsV8UsV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwod_w_hu, "V4UiV4UiV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwod_d_wu, "V2ULLiV2ULLiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwod_q_du, "V2ULLiV2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmaddwev_h_bu_b, "V8sV8sV16UcV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwev_w_hu_h, "V4SiV4SiV8UsV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwev_d_wu_w, "V2LLiV2LLiV4UiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwev_q_du_d, "V2LLiV2LLiV2ULLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmaddwod_h_bu_b, "V8sV8sV16UcV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwod_w_hu_h, "V4SiV4SiV8UsV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwod_d_wu_w, "V2LLiV2LLiV4UiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwod_q_du_d, "V2LLiV2LLiV2ULLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vdiv_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vdiv_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vdiv_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vdiv_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vdiv_bu, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vdiv_hu, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vdiv_wu, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vdiv_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmod_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmod_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmod_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmod_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + + +TARGET_BUILTIN(__builtin_lsx_vmod_bu, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmod_hu, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmod_wu, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmod_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsat_b, "V16ScV16ScIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsat_h, "V8SsV8SsIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsat_w, "V4SiV4SiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsat_d, "V2SLLiV2SLLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsat_bu, "V16UcV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsat_hu, "V8UsV8UsIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsat_wu, "V4UiV4UiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsat_du, "V2ULLiV2ULLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vexth_h_b, "V8sV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vexth_w_h, "V4SiV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vexth_d_w, "V2LLiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vexth_q_d, "V2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vexth_hu_bu, "V8UsV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vexth_wu_hu, "V4UiV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vexth_du_wu, "V2ULLiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vexth_qu_du, "V2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsigncov_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsigncov_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsigncov_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsigncov_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmskltz_b, "V16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmskltz_h, "V8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmskltz_w, "V4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmskltz_d, "V2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmskgez_b, "V16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmsknz_b, "V8sV8s", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vldi, "V2LLiIi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vrepli_b, "V16cIi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vrepli_h, "V8sIi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vrepli_w, "V4iIi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vrepli_d, "V2LLiIi", "nc", "lsx") + + +TARGET_BUILTIN(__builtin_lsx_vand_v, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vor_v, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vxor_v, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vnor_v, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vandn_v, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vorn_v, "V16ScV16ScV16Sc", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vandi_b, "V16UcV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vori_b, "V16UcV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vxori_b, "V16UcV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vnori_b, "V16UcV16UcIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsll_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsll_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsll_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsll_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vslli_b, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslli_h, "V8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslli_w, "V4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslli_d, "V2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrl_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrl_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrl_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrl_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrli_b, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrli_h, "V8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrli_w, "V4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrli_d, "V2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsra_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsra_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsra_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsra_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrai_b, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrai_h, "V8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrai_w, "V4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrai_d, "V2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vrotr_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vrotr_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vrotr_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vrotr_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vrotri_b, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vrotri_h, "V8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vrotri_w, "V4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vrotri_d, "V2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsllwil_h_b, "V8sV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsllwil_w_h, "V4SiV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsllwil_d_w, "V2LLiV4SiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vextl_q_d, "V2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsllwil_hu_bu, "V8UsV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsllwil_wu_hu, "V4UiV8UsIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsllwil_du_wu, "V2ULLiV4UiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vextl_qu_du, "V2LLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrlr_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlr_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlr_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlr_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrlri_b, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlri_h, "V8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlri_w, "V4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlri_d, "V2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrar_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrar_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrar_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrar_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrari_b, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrari_h, "V8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrari_w, "V4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrari_d, "V2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrln_b_h, "V16ScV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrln_h_w, "V8sV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrln_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsran_b_h, "V16ScV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsran_h_w, "V8sV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsran_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrlni_b_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlni_h_w, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlni_w_d, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrani_b_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrani_h_w, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrani_w_d, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrani_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrlrn_b_h, "V16ScV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlrn_h_w, "V8sV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlrn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrarn_b_h, "V16ScV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrarn_h_w, "V8sV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrarn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrlrni_b_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlrni_h_w, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlrni_w_d, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlrni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrarni_b_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrarni_h_w, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrarni_w_d, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrarni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrln_b_h, "V16ScV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrln_h_w, "V8sV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrln_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssran_b_h, "V16ScV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssran_h_w, "V8sV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssran_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrln_bu_h, "V16UcV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrln_hu_w, "V8UsV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrln_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssran_bu_h, "V16UcV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssran_hu_w, "V8UsV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssran_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrlni_b_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlni_h_w, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlni_w_d, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrani_b_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrani_h_w, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrani_w_d, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrani_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrlrni_bu_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlrni_hu_w, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlrni_wu_d, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlrni_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrani_bu_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrani_hu_w, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrani_wu_d, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrani_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrlrn_b_h, "V16ScV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlrn_h_w, "V8sV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlrn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrarn_b_h, "V16ScV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrarn_h_w, "V8sV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrarn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrlrn_bu_h, "V16UcV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlrn_hu_w, "V8UsV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlrn_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrarn_bu_h, "V16UcV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrarn_hu_w, "V8UsV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrarn_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrlrni_b_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlrni_h_w, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlrni_w_d, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlrni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrarni_b_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrarni_h_w, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrarni_w_d, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrarni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrlni_bu_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlni_hu_w, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlni_wu_d, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlni_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrarni_bu_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrarni_hu_w, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrarni_wu_d, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrarni_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vclo_b, "V16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vclo_h, "V8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vclo_w, "V4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vclo_d, "V2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vclz_b, "V16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vclz_h, "V8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vclz_w, "V4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vclz_d, "V2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vpcnt_b, "V16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpcnt_h, "V8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpcnt_w, "V4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpcnt_d, "V2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vbitclr_b, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitclr_h, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitclr_w, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitclr_d, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vbitclri_b, "V16UcV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitclri_h, "V8UsV8UsIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitclri_w, "V4UiV4UiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitclri_d, "V2ULLiV2ULLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vbitset_b, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitset_h, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitset_w, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitset_d, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vbitseti_b, "V16UcV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitseti_h, "V8UsV8UsIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitseti_w, "V4UiV4UiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitseti_d, "V2ULLiV2ULLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vbitrev_b, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitrev_h, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitrev_w, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitrev_d, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vbitrevi_b, "V16UcV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitrevi_h, "V8UsV8UsIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitrevi_w, "V4UiV4UiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitrevi_d, "V2ULLiV2ULLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfrstp_b, "V16ScV16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfrstp_h, "V8SsV8SsV8SsV8Ss", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfrstpi_b, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfrstpi_h, "V8sV8sV8sIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfadd_s, "V4fV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfadd_d, "V2dV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfsub_s, "V4fV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfsub_d, "V2dV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfmul_s, "V4fV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfmul_d, "V2dV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfdiv_s, "V4fV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfdiv_d, "V2dV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfmadd_s, "V4fV4fV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfmadd_d, "V2dV2dV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfmsub_s, "V4fV4fV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfmsub_d, "V2dV2dV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfnmadd_s, "V4fV4fV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfnmadd_d, "V2dV2dV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfnmsub_s, "V4fV4fV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfnmsub_d, "V2dV2dV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfmax_s, "V4fV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfmax_d, "V2dV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfmin_s, "V4fV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfmin_d, "V2dV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfmaxa_s, "V4fV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfmaxa_d, "V2dV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfmina_s, "V4fV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfmina_d, "V2dV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vflogb_s, "V4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vflogb_d, "V2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfclass_s, "V4iV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfclass_d, "V2LLiV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfsqrt_s, "V4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfsqrt_d, "V2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfrecip_s, "V4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfrecip_d, "V2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfrsqrt_s, "V4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfrsqrt_d, "V2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcvtl_s_h, "V4fV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcvtl_d_s, "V2dV4f", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcvth_s_h, "V4fV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcvth_d_s, "V2dV4f", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcvt_h_s, "V8sV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcvt_s_d, "V4fV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfrintrne_s, "V4SiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfrintrne_d, "V2LLiV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfrintrz_s, "V4SiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfrintrz_d, "V2LLiV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfrintrp_s, "V4SiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfrintrp_d, "V2LLiV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfrintrm_s, "V4SiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfrintrm_d, "V2LLiV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfrint_s, "V4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfrint_d, "V2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftintrne_w_s, "V4SiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrne_l_d, "V2LLiV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftintrz_w_s, "V4SiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrz_l_d, "V2LLiV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftintrp_w_s, "V4SiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrp_l_d, "V2LLiV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftintrm_w_s, "V4SiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrm_l_d, "V2LLiV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftint_w_s, "V4SiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftint_l_d, "V2SLLiV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftintrz_wu_s, "V4UiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrz_lu_d, "V2ULLiV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftint_wu_s, "V4UiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftint_lu_d, "V2ULLiV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftintrne_w_d, "V4SiV2dV2d", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrz_w_d, "V4SiV2dV2d", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrp_w_d, "V4SiV2dV2d", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrm_w_d, "V4SiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftint_w_d, "V4SiV2dV2d", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrnel_l_s, "V2LLiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrneh_l_s, "V2LLiV4f", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftintrzl_l_s, "V2LLiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrzh_l_s, "V2LLiV4f", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftintrpl_l_s, "V2LLiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrph_l_s, "V2LLiV4f", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftintrml_l_s, "V2LLiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrmh_l_s, "V2LLiV4f", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftintl_l_s, "V2LLiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftinth_l_s, "V2LLiV4f", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vffint_s_w, "V4fV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vffint_d_l, "V2dV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vffint_s_wu, "V4fV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vffint_d_lu, "V2dV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vffintl_d_w, "V2dV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vffinth_d_w, "V2dV4Si", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vffint_s_l, "V4fV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vseq_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vseq_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vseq_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vseq_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vseqi_b, "V16ScV16ScISi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vseqi_h, "V8SsV8SsISi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vseqi_w, "V4SiV4SiISi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vseqi_d, "V2SLLiV2SLLiISi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsle_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsle_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsle_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsle_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vslei_b, "V16ScV16ScISi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslei_h, "V8SsV8SsISi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslei_w, "V4SiV4SiISi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslei_d, "V2SLLiV2SLLiISi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsle_bu, "V16ScV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsle_hu, "V8SsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsle_wu, "V4SiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsle_du, "V2SLLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vslei_bu, "V16ScV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslei_hu, "V8SsV8UsIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslei_wu, "V4SiV4UiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslei_du, "V2SLLiV2ULLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vslt_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslt_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslt_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslt_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vslti_b, "V16ScV16ScISi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslti_h, "V8SsV8SsISi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslti_w, "V4SiV4SiISi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslti_d, "V2SLLiV2SLLiISi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vslt_bu, "V16ScV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslt_hu, "V8SsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslt_wu, "V4SiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslt_du, "V2SLLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vslti_bu, "V16ScV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslti_hu, "V8SsV8UsIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslti_wu, "V4SiV4UiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslti_du, "V2SLLiV2ULLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_caf_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_caf_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_cun_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_cun_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_ceq_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_ceq_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_cueq_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_cueq_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_clt_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_clt_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_cult_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_cult_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_cle_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_cle_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_cule_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_cule_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_cne_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_cne_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_cor_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_cor_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_cune_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_cune_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_saf_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_saf_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_sun_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_sun_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_seq_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_seq_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_sueq_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_sueq_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_slt_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_slt_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_sult_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_sult_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_sle_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_sle_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_sule_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_sule_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_sne_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_sne_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_sor_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_sor_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_sune_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_sune_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vbitsel_v, "V16UcV16UcV16UcV16Uc", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vbitseli_b, "V16UcV16UcV16UcIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vreplgr2vr_b, "V16Sci", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vreplgr2vr_h, "V8Ssi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vreplgr2vr_w, "V4Sii", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vreplgr2vr_d, "V2SLLiLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vinsgr2vr_b, "V16ScV16SciIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vinsgr2vr_h, "V8SsV8SsiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vinsgr2vr_w, "V4SiV4SiiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vinsgr2vr_d, "V2SLLiV2SLLiLLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vpickve2gr_b, "iV16ScIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickve2gr_h, "iV8SsIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickve2gr_w, "iV4SiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickve2gr_d, "LLiV2SLLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vpickve2gr_bu, "iV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickve2gr_hu, "iV8UsIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickve2gr_wu, "iV4UiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickve2gr_du, "LLiV2ULLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vreplve_b, "V16cV16cUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vreplve_h, "V8sV8sUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vreplve_w, "V4iV4iUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vreplve_d, "V2LLiV2LLiUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vreplvei_b, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vreplvei_h, "V8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vreplvei_w, "V4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vreplvei_d, "V2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vbsll_v, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbsrl_v, "V16cV16cIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vpackev_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpackev_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpackev_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpackev_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vpackod_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpackod_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpackod_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpackod_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vpickev_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickev_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickev_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickev_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vpickod_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickod_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickod_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickod_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vilvl_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vilvl_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vilvl_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vilvl_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vilvh_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vilvh_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vilvh_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vilvh_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vshuf_b, "V16UcV16UcV16UcV16Uc", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vshuf_h, "V8sV8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vshuf_w, "V4iV4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vshuf_d, "V2LLiV2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vshuf4i_b, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vshuf4i_h, "V8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vshuf4i_w, "V4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vshuf4i_d, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vpermi_w, "V4iV4iV4iIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vextrins_b, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vextrins_h, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vextrins_w, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vextrins_d, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vld, "V16ScvC*Ii", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vst, "vV16Scv*Ii", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vldx, "V16ScvC*LLi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vstx, "vV16Scv*LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vldrepl_b, "V16cvC*Ii", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vldrepl_h, "V8svC*Ii", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vldrepl_w, "V4ivC*Ii", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vldrepl_d, "V2LLivC*Ii", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vstelm_b, "vV16Scv*IiUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vstelm_h, "vV8Ssv*IiUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vstelm_w, "vV4Siv*IiUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vstelm_d, "vV2SLLiv*IiUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_bz_v, "iV16Uc", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_bnz_v, "iV16Uc", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_bz_b, "iV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_bz_h, "iV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_bz_w, "iV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_bz_d, "iV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_bnz_b, "iV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_bnz_h, "iV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_bnz_w, "iV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_bnz_d, "iV2ULLi", "nc", "lsx") diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 1b69324d073ab51ad12dccdeb8f2b539a4c3a4f5..8c751f2c4bdaa20e63fc1ab9e4d6e2c68e351564 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -732,6 +732,12 @@ def warn_drv_loongarch_conflicting_implied_val : Warning< InGroup; def err_drv_loongarch_invalid_mfpu_EQ : Error< "invalid argument '%0' to -mfpu=; must be one of: 64, 32, none, 0 (alias for none)">; +def err_drv_loongarch_wrong_fpu_width_for_lsx : Error< + "wrong fpu width; LSX depends on 64-bit FPU.">; +def err_drv_loongarch_wrong_fpu_width_for_lasx : Error< + "wrong fpu width; LASX depends on 64-bit FPU.">; +def err_drv_loongarch_invalid_simd_option_combination : Error< + "invalid option combination; LASX depends on LSX.">; def err_drv_expand_response_file : Error< "failed to expand response file: %0">; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index c88f25209fc0fa69bd59479adfdaf2562c0b3347..0e97620945af4841ab9a3720b5510b57d59a0119 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11853,10 +11853,6 @@ def err_non_designated_init_used : Error< def err_cast_from_randomized_struct : Error< "casting from randomized structure pointer type %0 to %1">; -// LoongArch-specific Diagnostics -def err_loongarch_builtin_requires_la64 : Error< - "this builtin requires target: loongarch64">; - // Unsafe buffer usage diagnostics. def warn_unsafe_buffer_variable : Warning< "%0 is an %select{unsafe pointer used for buffer access|unsafe buffer that " @@ -11872,9 +11868,6 @@ def note_unsafe_buffer_variable_fixit_group : Note< "change type of %0 to '%select{std::span|std::array|std::span::iterator}1' to preserve bounds information%select{|, and change %2 to '%select{std::span|std::array|std::span::iterator}1' to propagate bounds information between them}3">; def note_safe_buffer_usage_suggestions_disabled : Note< "pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions">; -def err_loongarch_builtin_requires_la32 : Error< - "this builtin requires target: loongarch32">; - def err_builtin_pass_in_regs_non_class : Error< "argument %0 is not an unqualified class type">; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index d4f7315bf8cbeb6a8092015481997d3f208695d5..bb4928293c45a058a5ec02d0a78e7c977bc5f0d9 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -192,6 +192,8 @@ def m_x86_Features_Group : OptionGroup<"">, Group, Flags<[CoreOption]>, DocName<"X86">; def m_riscv_Features_Group : OptionGroup<"">, Group, DocName<"RISC-V">; +def m_loongarch_Features_Group : OptionGroup<"">, + Group, DocName<"LoongArch">; def m_libc_Group : OptionGroup<"">, Group, Flags<[HelpHidden]>; @@ -4196,6 +4198,14 @@ def mstack_protector_guard_reg_EQ : Joined<["-"], "mstack-protector-guard-reg="> def mfentry : Flag<["-"], "mfentry">, HelpText<"Insert calls to fentry at function entry (x86/SystemZ only)">, Flags<[CC1Option]>, Group, MarshallingInfoFlag>; +def mlsx : Flag<["-"], "mlsx">, Group, + HelpText<"Enable Loongson SIMD Extension (LSX).">; +def mno_lsx : Flag<["-"], "mno-lsx">, Group, + HelpText<"Disable Loongson SIMD Extension (LSX).">; +def mlasx : Flag<["-"], "mlasx">, Group, + HelpText<"Enable Loongson Advanced SIMD Extension (LASX).">; +def mno_lasx : Flag<["-"], "mno-lasx">, Group, + HelpText<"Disable Loongson Advanced SIMD Extension (LASX).">; def mnop_mcount : Flag<["-"], "mnop-mcount">, HelpText<"Generate mcount/__fentry__ calls as nops. To activate they need to be patched in.">, Flags<[CC1Option]>, Group, MarshallingInfoFlag>; diff --git a/clang/lib/Basic/Targets/LoongArch.cpp b/clang/lib/Basic/Targets/LoongArch.cpp index 4448a2ae10a1725e2ca8397200e76f970e58e8fb..88537989a05129f553b89ac2e74a444e9472bea0 100644 --- a/clang/lib/Basic/Targets/LoongArch.cpp +++ b/clang/lib/Basic/Targets/LoongArch.cpp @@ -208,6 +208,11 @@ void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts, TuneCPU = ArchName; Builder.defineMacro("__loongarch_tune", Twine('"') + TuneCPU + Twine('"')); + if (HasFeatureLSX) + Builder.defineMacro("__loongarch_sx", Twine(1)); + if (HasFeatureLASX) + Builder.defineMacro("__loongarch_asx", Twine(1)); + StringRef ABI = getABI(); if (ABI == "lp64d" || ABI == "lp64f" || ABI == "lp64s") Builder.defineMacro("__loongarch_lp64"); @@ -257,6 +262,8 @@ bool LoongArchTargetInfo::hasFeature(StringRef Feature) const { .Case("loongarch64", Is64Bit) .Case("32bit", !Is64Bit) .Case("64bit", Is64Bit) + .Case("lsx", HasFeatureLSX) + .Case("lasx", HasFeatureLASX) .Default(false); } @@ -274,7 +281,10 @@ bool LoongArchTargetInfo::handleTargetFeatures( if (Feature == "+d") { HasFeatureD = true; } - } + } else if (Feature == "+lsx") + HasFeatureLSX = true; + else if (Feature == "+lasx") + HasFeatureLASX = true; } return true; } diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h index 34143f462a24722785c3adc6e6d1d01826a5388c..8f4150b2539de2791900be41a208f4e38ab806ee 100644 --- a/clang/lib/Basic/Targets/LoongArch.h +++ b/clang/lib/Basic/Targets/LoongArch.h @@ -27,12 +27,16 @@ protected: std::string CPU; bool HasFeatureD; bool HasFeatureF; + bool HasFeatureLSX; + bool HasFeatureLASX; public: LoongArchTargetInfo(const llvm::Triple &Triple, const TargetOptions &) : TargetInfo(Triple) { HasFeatureD = false; HasFeatureF = false; + HasFeatureLSX = false; + HasFeatureLASX = false; LongDoubleWidth = 128; LongDoubleAlign = 128; LongDoubleFormat = &llvm::APFloat::IEEEquad(); diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 30f5f4e7061c05fda2b3e975dc57308c7a758d55..e512762fafaf00b41ded828d94ccda3e806e357b 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -43,7 +43,6 @@ #include "llvm/IR/IntrinsicsARM.h" #include "llvm/IR/IntrinsicsBPF.h" #include "llvm/IR/IntrinsicsHexagon.h" -#include "llvm/IR/IntrinsicsLoongArch.h" #include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/IR/IntrinsicsPowerPC.h" #include "llvm/IR/IntrinsicsR600.h" @@ -5588,9 +5587,6 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, case llvm::Triple::riscv32: case llvm::Triple::riscv64: return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue); - case llvm::Triple::loongarch32: - case llvm::Triple::loongarch64: - return CGF->EmitLoongArchBuiltinExpr(BuiltinID, E); default: return nullptr; } @@ -20418,129 +20414,3 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes); return Builder.CreateCall(F, Ops, ""); } - -Value *CodeGenFunction::EmitLoongArchBuiltinExpr(unsigned BuiltinID, - const CallExpr *E) { - SmallVector Ops; - - for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) - Ops.push_back(EmitScalarExpr(E->getArg(i))); - - Intrinsic::ID ID = Intrinsic::not_intrinsic; - - switch (BuiltinID) { - default: - llvm_unreachable("unexpected builtin ID."); - case LoongArch::BI__builtin_loongarch_cacop_d: - ID = Intrinsic::loongarch_cacop_d; - break; - case LoongArch::BI__builtin_loongarch_cacop_w: - ID = Intrinsic::loongarch_cacop_w; - break; - case LoongArch::BI__builtin_loongarch_dbar: - ID = Intrinsic::loongarch_dbar; - break; - case LoongArch::BI__builtin_loongarch_break: - ID = Intrinsic::loongarch_break; - break; - case LoongArch::BI__builtin_loongarch_ibar: - ID = Intrinsic::loongarch_ibar; - break; - case LoongArch::BI__builtin_loongarch_movfcsr2gr: - ID = Intrinsic::loongarch_movfcsr2gr; - break; - case LoongArch::BI__builtin_loongarch_movgr2fcsr: - ID = Intrinsic::loongarch_movgr2fcsr; - break; - case LoongArch::BI__builtin_loongarch_syscall: - ID = Intrinsic::loongarch_syscall; - break; - case LoongArch::BI__builtin_loongarch_crc_w_b_w: - ID = Intrinsic::loongarch_crc_w_b_w; - break; - case LoongArch::BI__builtin_loongarch_crc_w_h_w: - ID = Intrinsic::loongarch_crc_w_h_w; - break; - case LoongArch::BI__builtin_loongarch_crc_w_w_w: - ID = Intrinsic::loongarch_crc_w_w_w; - break; - case LoongArch::BI__builtin_loongarch_crc_w_d_w: - ID = Intrinsic::loongarch_crc_w_d_w; - break; - case LoongArch::BI__builtin_loongarch_crcc_w_b_w: - ID = Intrinsic::loongarch_crcc_w_b_w; - break; - case LoongArch::BI__builtin_loongarch_crcc_w_h_w: - ID = Intrinsic::loongarch_crcc_w_h_w; - break; - case LoongArch::BI__builtin_loongarch_crcc_w_w_w: - ID = Intrinsic::loongarch_crcc_w_w_w; - break; - case LoongArch::BI__builtin_loongarch_crcc_w_d_w: - ID = Intrinsic::loongarch_crcc_w_d_w; - break; - case LoongArch::BI__builtin_loongarch_csrrd_w: - ID = Intrinsic::loongarch_csrrd_w; - break; - case LoongArch::BI__builtin_loongarch_csrwr_w: - ID = Intrinsic::loongarch_csrwr_w; - break; - case LoongArch::BI__builtin_loongarch_csrxchg_w: - ID = Intrinsic::loongarch_csrxchg_w; - break; - case LoongArch::BI__builtin_loongarch_csrrd_d: - ID = Intrinsic::loongarch_csrrd_d; - break; - case LoongArch::BI__builtin_loongarch_csrwr_d: - ID = Intrinsic::loongarch_csrwr_d; - break; - case LoongArch::BI__builtin_loongarch_csrxchg_d: - ID = Intrinsic::loongarch_csrxchg_d; - break; - case LoongArch::BI__builtin_loongarch_iocsrrd_b: - ID = Intrinsic::loongarch_iocsrrd_b; - break; - case LoongArch::BI__builtin_loongarch_iocsrrd_h: - ID = Intrinsic::loongarch_iocsrrd_h; - break; - case LoongArch::BI__builtin_loongarch_iocsrrd_w: - ID = Intrinsic::loongarch_iocsrrd_w; - break; - case LoongArch::BI__builtin_loongarch_iocsrrd_d: - ID = Intrinsic::loongarch_iocsrrd_d; - break; - case LoongArch::BI__builtin_loongarch_iocsrwr_b: - ID = Intrinsic::loongarch_iocsrwr_b; - break; - case LoongArch::BI__builtin_loongarch_iocsrwr_h: - ID = Intrinsic::loongarch_iocsrwr_h; - break; - case LoongArch::BI__builtin_loongarch_iocsrwr_w: - ID = Intrinsic::loongarch_iocsrwr_w; - break; - case LoongArch::BI__builtin_loongarch_iocsrwr_d: - ID = Intrinsic::loongarch_iocsrwr_d; - break; - case LoongArch::BI__builtin_loongarch_cpucfg: - ID = Intrinsic::loongarch_cpucfg; - break; - case LoongArch::BI__builtin_loongarch_asrtle_d: - ID = Intrinsic::loongarch_asrtle_d; - break; - case LoongArch::BI__builtin_loongarch_asrtgt_d: - ID = Intrinsic::loongarch_asrtgt_d; - break; - case LoongArch::BI__builtin_loongarch_lddir_d: - ID = Intrinsic::loongarch_lddir_d; - break; - case LoongArch::BI__builtin_loongarch_ldpte_d: - ID = Intrinsic::loongarch_ldpte_d; - break; - // TODO: Support more Intrinsics. - } - - assert(ID != Intrinsic::not_intrinsic); - - llvm::Function *F = CGM.getIntrinsic(ID); - return Builder.CreateCall(F, Ops); -} diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 8722fd4550e4a7023aa579fc0ef283dcebfe4f17..143e0707b9429ef46ea4b58a8e0aa7f216094e74 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4316,7 +4316,6 @@ public: llvm::Value *EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue); - llvm::Value *EmitLoongArchBuiltinExpr(unsigned BuiltinID, const CallExpr *E); void ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope, llvm::AtomicOrdering &AO, llvm::SyncScope::ID &SSID); diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp index 65925e9ed610107603dac5322e67920aee4d3cdf..31153a67ad284033d7a63282d093a849d8ec176a 100644 --- a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp +++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp @@ -175,6 +175,38 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, A->ignoreTargetSpecific(); if (Arg *A = Args.getLastArgNoClaim(options::OPT_mfpu_EQ)) A->ignoreTargetSpecific(); + + // Select lsx feature determined by -m[no-]lsx. + if (const Arg *A = Args.getLastArg(options::OPT_mlsx, options::OPT_mno_lsx)) { + // LSX depends on 64-bit FPU. + // -m*-float and -mfpu=none/0/32 conflict with -mlsx. + if (A->getOption().matches(options::OPT_mlsx)) { + if (llvm::find(Features, "-d") != Features.end()) + D.Diag(diag::err_drv_loongarch_wrong_fpu_width_for_lsx); + else /*-mlsx*/ + Features.push_back("+lsx"); + } else /*-mno-lsx*/ { + Features.push_back("-lsx"); + } + } + + // Select lasx feature determined by -m[no-]lasx. + if (const Arg *A = + Args.getLastArg(options::OPT_mlasx, options::OPT_mno_lasx)) { + // LASX depends on 64-bit FPU and LSX. + // -mno-lsx conflicts with -mlasx. + if (A->getOption().matches(options::OPT_mlasx)) { + if (llvm::find(Features, "-d") != Features.end()) + D.Diag(diag::err_drv_loongarch_wrong_fpu_width_for_lasx); + else if (llvm::find(Features, "-lsx") != Features.end()) + D.Diag(diag::err_drv_loongarch_invalid_simd_option_combination); + else { /*-mlasx*/ + Features.push_back("+lsx"); + Features.push_back("+lasx"); + } + } else /*-mno-lasx*/ + Features.push_back("-lasx"); + } } std::string loongarch::postProcessTargetCPUString(const std::string &CPU, diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index db47de2ad9655f36d1a399d65bc17946488a9a06..356009ae9157c9231af761b8a78e9b85628d06fb 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -78,6 +78,8 @@ set(hlsl_files set(loongarch_files larchintrin.h + lasxintrin.h + lsxintrin.h ) set(mips_msa_files diff --git a/clang/lib/Headers/lasxintrin.h b/clang/lib/Headers/lasxintrin.h new file mode 100644 index 0000000000000000000000000000000000000000..6b4d5012a24b5893024424c6613265fbbe81c830 --- /dev/null +++ b/clang/lib/Headers/lasxintrin.h @@ -0,0 +1,3860 @@ +/*===------------ lasxintrin.h - LoongArch LASX intrinsics -----------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef _LOONGSON_ASXINTRIN_H +#define _LOONGSON_ASXINTRIN_H 1 + +#if defined(__loongarch_asx) + +typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); +typedef signed char v32i8_b __attribute__((vector_size(32), aligned(1))); +typedef unsigned char v32u8 __attribute__((vector_size(32), aligned(32))); +typedef unsigned char v32u8_b __attribute__((vector_size(32), aligned(1))); +typedef short v16i16 __attribute__((vector_size(32), aligned(32))); +typedef short v16i16_h __attribute__((vector_size(32), aligned(2))); +typedef unsigned short v16u16 __attribute__((vector_size(32), aligned(32))); +typedef unsigned short v16u16_h __attribute__((vector_size(32), aligned(2))); +typedef int v8i32 __attribute__((vector_size(32), aligned(32))); +typedef int v8i32_w __attribute__((vector_size(32), aligned(4))); +typedef unsigned int v8u32 __attribute__((vector_size(32), aligned(32))); +typedef unsigned int v8u32_w __attribute__((vector_size(32), aligned(4))); +typedef long long v4i64 __attribute__((vector_size(32), aligned(32))); +typedef long long v4i64_d __attribute__((vector_size(32), aligned(8))); +typedef unsigned long long v4u64 __attribute__((vector_size(32), aligned(32))); +typedef unsigned long long v4u64_d __attribute__((vector_size(32), aligned(8))); +typedef float v8f32 __attribute__((vector_size(32), aligned(32))); +typedef float v8f32_w __attribute__((vector_size(32), aligned(4))); +typedef double v4f64 __attribute__((vector_size(32), aligned(32))); +typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); + +typedef double v4f64 __attribute__((vector_size(32), aligned(32))); +typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); + +typedef float __m256 __attribute__((__vector_size__(32), __may_alias__)); +typedef long long __m256i __attribute__((__vector_size__(32), __may_alias__)); +typedef double __m256d __attribute__((__vector_size__(32), __may_alias__)); + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsll_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsll_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsll_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsll_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsll_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsll_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsll_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsll_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvslli_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvslli_b((v32i8)(_1), (_2))) + +#define __lasx_xvslli_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvslli_h((v16i16)(_1), (_2))) + +#define __lasx_xvslli_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslli_w((v8i32)(_1), (_2))) + +#define __lasx_xvslli_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvslli_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsra_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsra_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsra_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsra_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsra_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsra_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsra_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsra_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvsrai_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsrai_b((v32i8)(_1), (_2))) + +#define __lasx_xvsrai_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsrai_h((v16i16)(_1), (_2))) + +#define __lasx_xvsrai_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsrai_w((v8i32)(_1), (_2))) + +#define __lasx_xvsrai_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsrai_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrar_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrar_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrar_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrar_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrar_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrar_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrar_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrar_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvsrari_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsrari_b((v32i8)(_1), (_2))) + +#define __lasx_xvsrari_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsrari_h((v16i16)(_1), (_2))) + +#define __lasx_xvsrari_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsrari_w((v8i32)(_1), (_2))) + +#define __lasx_xvsrari_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsrari_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrl_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrl_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrl_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrl_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrl_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrl_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrl_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrl_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvsrli_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsrli_b((v32i8)(_1), (_2))) + +#define __lasx_xvsrli_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsrli_h((v16i16)(_1), (_2))) + +#define __lasx_xvsrli_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsrli_w((v8i32)(_1), (_2))) + +#define __lasx_xvsrli_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsrli_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlr_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlr_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlr_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlr_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlr_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlr_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlr_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlr_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvsrlri_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsrlri_b((v32i8)(_1), (_2))) + +#define __lasx_xvsrlri_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsrlri_h((v16i16)(_1), (_2))) + +#define __lasx_xvsrlri_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsrlri_w((v8i32)(_1), (_2))) + +#define __lasx_xvsrlri_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsrlri_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitclr_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitclr_b((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitclr_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitclr_h((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitclr_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitclr_w((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitclr_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitclr_d((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvbitclri_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvbitclri_b((v32u8)(_1), (_2))) + +#define __lasx_xvbitclri_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvbitclri_h((v16u16)(_1), (_2))) + +#define __lasx_xvbitclri_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvbitclri_w((v8u32)(_1), (_2))) + +#define __lasx_xvbitclri_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvbitclri_d((v4u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitset_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitset_b((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitset_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitset_h((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitset_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitset_w((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitset_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitset_d((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvbitseti_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvbitseti_b((v32u8)(_1), (_2))) + +#define __lasx_xvbitseti_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvbitseti_h((v16u16)(_1), (_2))) + +#define __lasx_xvbitseti_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvbitseti_w((v8u32)(_1), (_2))) + +#define __lasx_xvbitseti_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvbitseti_d((v4u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitrev_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitrev_b((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitrev_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitrev_h((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitrev_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitrev_w((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitrev_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitrev_d((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvbitrevi_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvbitrevi_b((v32u8)(_1), (_2))) + +#define __lasx_xvbitrevi_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvbitrevi_h((v16u16)(_1), (_2))) + +#define __lasx_xvbitrevi_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvbitrevi_w((v8u32)(_1), (_2))) + +#define __lasx_xvbitrevi_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvbitrevi_d((v4u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadd_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadd_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadd_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadd_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadd_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadd_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadd_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadd_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvaddi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvaddi_bu((v32i8)(_1), (_2))) + +#define __lasx_xvaddi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvaddi_hu((v16i16)(_1), (_2))) + +#define __lasx_xvaddi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvaddi_wu((v8i32)(_1), (_2))) + +#define __lasx_xvaddi_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvaddi_du((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsub_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsub_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsub_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsub_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsub_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsub_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsub_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsub_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvsubi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsubi_bu((v32i8)(_1), (_2))) + +#define __lasx_xvsubi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsubi_hu((v16i16)(_1), (_2))) + +#define __lasx_xvsubi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsubi_wu((v8i32)(_1), (_2))) + +#define __lasx_xvsubi_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsubi_du((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvmaxi_b(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_b((v32i8)(_1), (_2))) + +#define __lasx_xvmaxi_h(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_h((v16i16)(_1), (_2))) + +#define __lasx_xvmaxi_w(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_w((v8i32)(_1), (_2))) + +#define __lasx_xvmaxi_d(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_du((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvmaxi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_bu((v32u8)(_1), (_2))) + +#define __lasx_xvmaxi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_hu((v16u16)(_1), (_2))) + +#define __lasx_xvmaxi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_wu((v8u32)(_1), (_2))) + +#define __lasx_xvmaxi_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_du((v4u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvmini_b(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_b((v32i8)(_1), (_2))) + +#define __lasx_xvmini_h(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_h((v16i16)(_1), (_2))) + +#define __lasx_xvmini_w(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_w((v8i32)(_1), (_2))) + +#define __lasx_xvmini_d(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_du((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvmini_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_bu((v32u8)(_1), (_2))) + +#define __lasx_xvmini_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_hu((v16u16)(_1), (_2))) + +#define __lasx_xvmini_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_wu((v8u32)(_1), (_2))) + +#define __lasx_xvmini_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_du((v4u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvseq_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvseq_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvseq_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvseq_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvseq_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvseq_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvseq_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvseq_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvseqi_b(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvseqi_b((v32i8)(_1), (_2))) + +#define __lasx_xvseqi_h(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvseqi_h((v16i16)(_1), (_2))) + +#define __lasx_xvseqi_w(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvseqi_w((v8i32)(_1), (_2))) + +#define __lasx_xvseqi_d(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvseqi_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvslti_b(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_b((v32i8)(_1), (_2))) + +#define __lasx_xvslti_h(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_h((v16i16)(_1), (_2))) + +#define __lasx_xvslti_w(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_w((v8i32)(_1), (_2))) + +#define __lasx_xvslti_d(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_du((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvslti_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_bu((v32u8)(_1), (_2))) + +#define __lasx_xvslti_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_hu((v16u16)(_1), (_2))) + +#define __lasx_xvslti_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_wu((v8u32)(_1), (_2))) + +#define __lasx_xvslti_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_du((v4u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvslei_b(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_b((v32i8)(_1), (_2))) + +#define __lasx_xvslei_h(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_h((v16i16)(_1), (_2))) + +#define __lasx_xvslei_w(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_w((v8i32)(_1), (_2))) + +#define __lasx_xvslei_d(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_du((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvslei_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_bu((v32u8)(_1), (_2))) + +#define __lasx_xvslei_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_hu((v16u16)(_1), (_2))) + +#define __lasx_xvslei_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_wu((v8u32)(_1), (_2))) + +#define __lasx_xvslei_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_du((v4u64)(_1), (_2))) + +#define __lasx_xvsat_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_b((v32i8)(_1), (_2))) + +#define __lasx_xvsat_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_h((v16i16)(_1), (_2))) + +#define __lasx_xvsat_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_w((v8i32)(_1), (_2))) + +#define __lasx_xvsat_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_d((v4i64)(_1), (_2))) + +#define __lasx_xvsat_bu(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_bu((v32u8)(_1), (_2))) + +#define __lasx_xvsat_hu(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_hu((v16u16)(_1), (_2))) + +#define __lasx_xvsat_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_wu((v8u32)(_1), (_2))) + +#define __lasx_xvsat_du(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_du((v4u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadda_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadda_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadda_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadda_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadda_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadda_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadda_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadda_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmul_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmul_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmul_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmul_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmul_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmul_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmul_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmul_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmadd_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmadd_b((v32i8)_1, (v32i8)_2, (v32i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmadd_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmadd_h((v16i16)_1, (v16i16)_2, (v16i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmadd_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmadd_w((v8i32)_1, (v8i32)_2, (v8i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmadd_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmadd_d((v4i64)_1, (v4i64)_2, (v4i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmsub_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmsub_b((v32i8)_1, (v32i8)_2, (v32i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmsub_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmsub_h((v16i16)_1, (v16i16)_2, (v16i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmsub_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmsub_w((v8i32)_1, (v8i32)_2, (v8i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmsub_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmsub_d((v4i64)_1, (v4i64)_2, (v4i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_h_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_w_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_d_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_hu_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_hu_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_wu_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_wu_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_du_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_du_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_h_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_w_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_d_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_hu_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_hu_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_wu_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_wu_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_du_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_du_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_du((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvrepl128vei_b(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvrepl128vei_b((v32i8)(_1), (_2))) + +#define __lasx_xvrepl128vei_h(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvrepl128vei_h((v16i16)(_1), (_2))) + +#define __lasx_xvrepl128vei_w(/*__m256i*/ _1, /*ui2*/ _2) \ + ((__m256i)__builtin_lasx_xvrepl128vei_w((v8i32)(_1), (_2))) + +#define __lasx_xvrepl128vei_d(/*__m256i*/ _1, /*ui1*/ _2) \ + ((__m256i)__builtin_lasx_xvrepl128vei_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickev_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickev_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickev_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickev_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickev_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickev_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickev_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickev_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickod_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickod_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickod_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickod_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickod_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickod_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickod_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickod_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvh_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvh_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvh_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvh_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvh_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvh_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvh_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvh_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvl_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvl_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvl_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvl_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvl_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvl_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvl_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvl_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackev_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackev_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackev_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackev_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackev_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackev_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackev_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackev_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackod_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackod_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackod_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackod_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackod_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackod_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackod_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackod_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvshuf_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvshuf_b((v32i8)_1, (v32i8)_2, (v32i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvshuf_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvshuf_h((v16i16)_1, (v16i16)_2, (v16i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvshuf_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvshuf_w((v8i32)_1, (v8i32)_2, (v8i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvshuf_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvshuf_d((v4i64)_1, (v4i64)_2, (v4i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvand_v(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvand_v((v32u8)_1, (v32u8)_2); +} + +#define __lasx_xvandi_b(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvandi_b((v32u8)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvor_v(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvor_v((v32u8)_1, (v32u8)_2); +} + +#define __lasx_xvori_b(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvori_b((v32u8)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvnor_v(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvnor_v((v32u8)_1, (v32u8)_2); +} + +#define __lasx_xvnori_b(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvnori_b((v32u8)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvxor_v(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvxor_v((v32u8)_1, (v32u8)_2); +} + +#define __lasx_xvxori_b(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvxori_b((v32u8)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitsel_v(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvbitsel_v((v32u8)_1, (v32u8)_2, (v32u8)_3); +} + +#define __lasx_xvbitseli_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvbitseli_b((v32u8)(_1), (v32u8)(_2), (_3))) + +#define __lasx_xvshuf4i_b(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvshuf4i_b((v32i8)(_1), (_2))) + +#define __lasx_xvshuf4i_h(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvshuf4i_h((v16i16)(_1), (_2))) + +#define __lasx_xvshuf4i_w(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvshuf4i_w((v8i32)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplgr2vr_b(int _1) { + return (__m256i)__builtin_lasx_xvreplgr2vr_b((int)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplgr2vr_h(int _1) { + return (__m256i)__builtin_lasx_xvreplgr2vr_h((int)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplgr2vr_w(int _1) { + return (__m256i)__builtin_lasx_xvreplgr2vr_w((int)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplgr2vr_d(long int _1) { + return (__m256i)__builtin_lasx_xvreplgr2vr_d((long int)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpcnt_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvpcnt_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpcnt_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvpcnt_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpcnt_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvpcnt_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpcnt_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvpcnt_d((v4i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclo_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvclo_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclo_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvclo_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclo_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvclo_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclo_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvclo_d((v4i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclz_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvclz_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclz_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvclz_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclz_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvclz_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclz_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvclz_d((v4i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfadd_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfadd_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfadd_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfadd_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfsub_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfsub_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfsub_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfsub_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmul_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfmul_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmul_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfmul_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfdiv_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfdiv_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfdiv_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfdiv_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcvt_h_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcvt_h_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfcvt_s_d(__m256d _1, __m256d _2) { + return (__m256)__builtin_lasx_xvfcvt_s_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmin_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfmin_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmin_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfmin_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmina_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfmina_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmina_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfmina_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmax_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfmax_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmax_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfmax_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmaxa_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfmaxa_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmaxa_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfmaxa_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfclass_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvfclass_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfclass_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvfclass_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfsqrt_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfsqrt_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfsqrt_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfsqrt_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrecip_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrecip_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrecip_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrecip_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrint_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrint_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrint_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrint_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrsqrt_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrsqrt_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrsqrt_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrsqrt_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvflogb_s(__m256 _1) { + return (__m256)__builtin_lasx_xvflogb_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvflogb_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvflogb_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfcvth_s_h(__m256i _1) { + return (__m256)__builtin_lasx_xvfcvth_s_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfcvth_d_s(__m256 _1) { + return (__m256d)__builtin_lasx_xvfcvth_d_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfcvtl_s_h(__m256i _1) { + return (__m256)__builtin_lasx_xvfcvtl_s_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfcvtl_d_s(__m256 _1) { + return (__m256d)__builtin_lasx_xvfcvtl_d_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftint_w_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftint_w_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftint_l_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftint_l_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftint_wu_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftint_wu_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftint_lu_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftint_lu_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrz_w_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrz_w_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrz_l_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftintrz_l_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrz_wu_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrz_wu_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrz_lu_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftintrz_lu_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvffint_s_w(__m256i _1) { + return (__m256)__builtin_lasx_xvffint_s_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvffint_d_l(__m256i _1) { + return (__m256d)__builtin_lasx_xvffint_d_l((v4i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvffint_s_wu(__m256i _1) { + return (__m256)__builtin_lasx_xvffint_s_wu((v8u32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvffint_d_lu(__m256i _1) { + return (__m256d)__builtin_lasx_xvffint_d_lu((v4u64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve_b(__m256i _1, int _2) { + return (__m256i)__builtin_lasx_xvreplve_b((v32i8)_1, (int)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve_h(__m256i _1, int _2) { + return (__m256i)__builtin_lasx_xvreplve_h((v16i16)_1, (int)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve_w(__m256i _1, int _2) { + return (__m256i)__builtin_lasx_xvreplve_w((v8i32)_1, (int)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve_d(__m256i _1, int _2) { + return (__m256i)__builtin_lasx_xvreplve_d((v4i64)_1, (int)_2); +} + +#define __lasx_xvpermi_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvpermi_w((v8i32)(_1), (v8i32)(_2), (_3))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvandn_v(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvandn_v((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvneg_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvneg_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvneg_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvneg_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvneg_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvneg_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvneg_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvneg_d((v4i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_du((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvsllwil_h_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_h_b((v32i8)(_1), (_2))) + +#define __lasx_xvsllwil_w_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_w_h((v16i16)(_1), (_2))) + +#define __lasx_xvsllwil_d_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_d_w((v8i32)(_1), (_2))) + +#define __lasx_xvsllwil_hu_bu(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_hu_bu((v32u8)(_1), (_2))) + +#define __lasx_xvsllwil_wu_hu(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_wu_hu((v16u16)(_1), (_2))) + +#define __lasx_xvsllwil_du_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_du_wu((v8u32)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsran_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsran_b_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsran_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsran_h_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsran_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsran_w_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssran_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssran_b_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssran_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssran_h_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssran_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssran_w_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssran_bu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssran_bu_h((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssran_hu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssran_hu_w((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssran_wu_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssran_wu_d((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrarn_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrarn_b_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrarn_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrarn_h_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrarn_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrarn_w_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrarn_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrarn_b_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrarn_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrarn_h_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrarn_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrarn_w_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrarn_bu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrarn_bu_h((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrarn_hu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrarn_hu_w((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrarn_wu_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrarn_wu_d((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrln_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrln_b_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrln_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrln_h_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrln_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrln_w_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrln_bu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrln_bu_h((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrln_hu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrln_hu_w((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrln_wu_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrln_wu_d((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlrn_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlrn_b_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlrn_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlrn_h_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlrn_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlrn_w_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrlrn_bu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrlrn_bu_h((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrlrn_hu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrlrn_hu_w((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrlrn_wu_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrlrn_wu_d((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvfrstpi_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvfrstpi_b((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvfrstpi_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvfrstpi_h((v16i16)(_1), (v16i16)(_2), (_3))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfrstp_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvfrstp_b((v32i8)_1, (v32i8)_2, (v32i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfrstp_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvfrstp_h((v16i16)_1, (v16i16)_2, (v16i16)_3); +} + +#define __lasx_xvshuf4i_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvshuf4i_d((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvbsrl_v(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvbsrl_v((v32i8)(_1), (_2))) + +#define __lasx_xvbsll_v(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvbsll_v((v32i8)(_1), (_2))) + +#define __lasx_xvextrins_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvextrins_b((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvextrins_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvextrins_h((v16i16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvextrins_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvextrins_w((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvextrins_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvextrins_d((v4i64)(_1), (v4i64)(_2), (_3))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmskltz_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvmskltz_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmskltz_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvmskltz_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmskltz_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvmskltz_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmskltz_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvmskltz_d((v4i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsigncov_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsigncov_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsigncov_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsigncov_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsigncov_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsigncov_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsigncov_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsigncov_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmadd_s(__m256 _1, __m256 _2, __m256 _3) { + return (__m256)__builtin_lasx_xvfmadd_s((v8f32)_1, (v8f32)_2, (v8f32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmadd_d(__m256d _1, __m256d _2, __m256d _3) { + return (__m256d)__builtin_lasx_xvfmadd_d((v4f64)_1, (v4f64)_2, (v4f64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmsub_s(__m256 _1, __m256 _2, __m256 _3) { + return (__m256)__builtin_lasx_xvfmsub_s((v8f32)_1, (v8f32)_2, (v8f32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmsub_d(__m256d _1, __m256d _2, __m256d _3) { + return (__m256d)__builtin_lasx_xvfmsub_d((v4f64)_1, (v4f64)_2, (v4f64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfnmadd_s(__m256 _1, __m256 _2, __m256 _3) { + return (__m256)__builtin_lasx_xvfnmadd_s((v8f32)_1, (v8f32)_2, (v8f32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfnmadd_d(__m256d _1, __m256d _2, __m256d _3) { + return (__m256d)__builtin_lasx_xvfnmadd_d((v4f64)_1, (v4f64)_2, (v4f64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfnmsub_s(__m256 _1, __m256 _2, __m256 _3) { + return (__m256)__builtin_lasx_xvfnmsub_s((v8f32)_1, (v8f32)_2, (v8f32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfnmsub_d(__m256d _1, __m256d _2, __m256d _3) { + return (__m256d)__builtin_lasx_xvfnmsub_d((v4f64)_1, (v4f64)_2, (v4f64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrne_w_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrne_w_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrne_l_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftintrne_l_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrp_w_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrp_w_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrp_l_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftintrp_l_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrm_w_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrm_w_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrm_l_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftintrm_l_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftint_w_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvftint_w_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvffint_s_l(__m256i _1, __m256i _2) { + return (__m256)__builtin_lasx_xvffint_s_l((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrz_w_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvftintrz_w_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrp_w_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvftintrp_w_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrm_w_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvftintrm_w_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrne_w_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvftintrne_w_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftinth_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftinth_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintl_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintl_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvffinth_d_w(__m256i _1) { + return (__m256d)__builtin_lasx_xvffinth_d_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvffintl_d_w(__m256i _1) { + return (__m256d)__builtin_lasx_xvffintl_d_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrzh_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrzh_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrzl_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrzl_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrph_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrph_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrpl_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrpl_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrmh_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrmh_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrml_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrml_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrneh_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrneh_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrnel_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrnel_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrintrne_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrintrne_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrintrne_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrintrne_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrintrz_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrintrz_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrintrz_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrintrz_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrintrp_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrintrp_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrintrp_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrintrp_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrintrm_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrintrm_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrintrm_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrintrm_d((v4f64)_1); +} + +#define __lasx_xvld(/*void **/ _1, /*si12*/ _2) \ + ((__m256i)__builtin_lasx_xvld((void const *)(_1), (_2))) + +#define __lasx_xvst(/*__m256i*/ _1, /*void **/ _2, /*si12*/ _3) \ + ((void)__builtin_lasx_xvst((v32i8)(_1), (void *)(_2), (_3))) + +#define __lasx_xvstelm_b(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ + /*idx*/ _4) \ + ((void)__builtin_lasx_xvstelm_b((v32i8)(_1), (void *)(_2), (_3), (_4))) + +#define __lasx_xvstelm_h(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ + /*idx*/ _4) \ + ((void)__builtin_lasx_xvstelm_h((v16i16)(_1), (void *)(_2), (_3), (_4))) + +#define __lasx_xvstelm_w(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ + /*idx*/ _4) \ + ((void)__builtin_lasx_xvstelm_w((v8i32)(_1), (void *)(_2), (_3), (_4))) + +#define __lasx_xvstelm_d(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ + /*idx*/ _4) \ + ((void)__builtin_lasx_xvstelm_d((v4i64)(_1), (void *)(_2), (_3), (_4))) + +#define __lasx_xvinsve0_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui3*/ _3) \ + ((__m256i)__builtin_lasx_xvinsve0_w((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvinsve0_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui2*/ _3) \ + ((__m256i)__builtin_lasx_xvinsve0_d((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvpickve_w(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvpickve_w((v8i32)(_1), (_2))) + +#define __lasx_xvpickve_d(/*__m256i*/ _1, /*ui2*/ _2) \ + ((__m256i)__builtin_lasx_xvpickve_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrlrn_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrlrn_b_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrlrn_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrlrn_h_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrlrn_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrlrn_w_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrln_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrln_b_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrln_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrln_h_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrln_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrln_w_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvorn_v(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvorn_v((v32i8)_1, (v32i8)_2); +} + +#define __lasx_xvldi(/*i13*/ _1) ((__m256i)__builtin_lasx_xvldi((_1))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvldx(void const *_1, long int _2) { + return (__m256i)__builtin_lasx_xvldx((void const *)_1, (long int)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) void + __lasx_xvstx(__m256i _1, void *_2, long int _3) { + return (void)__builtin_lasx_xvstx((v32i8)_1, (void *)_2, (long int)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvextl_qu_du(__m256i _1) { + return (__m256i)__builtin_lasx_xvextl_qu_du((v4u64)_1); +} + +#define __lasx_xvinsgr2vr_w(/*__m256i*/ _1, /*int*/ _2, /*ui3*/ _3) \ + ((__m256i)__builtin_lasx_xvinsgr2vr_w((v8i32)(_1), (int)(_2), (_3))) + +#define __lasx_xvinsgr2vr_d(/*__m256i*/ _1, /*long int*/ _2, /*ui2*/ _3) \ + ((__m256i)__builtin_lasx_xvinsgr2vr_d((v4i64)(_1), (long int)(_2), (_3))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve0_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvreplve0_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve0_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvreplve0_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve0_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvreplve0_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve0_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvreplve0_d((v4i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve0_q(__m256i _1) { + return (__m256i)__builtin_lasx_xvreplve0_q((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_h_b(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_h_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_w_h(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_w_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_d_w(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_d_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_w_b(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_w_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_d_h(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_d_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_d_b(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_d_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_hu_bu(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_hu_bu((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_wu_hu(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_wu_hu((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_du_wu(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_du_wu((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_wu_bu(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_wu_bu((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_du_hu(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_du_hu((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_du_bu(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_du_bu((v32i8)_1); +} + +#define __lasx_xvpermi_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvpermi_q((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvpermi_d(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvpermi_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvperm_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvperm_w((v8i32)_1, (v8i32)_2); +} + +#define __lasx_xvldrepl_b(/*void **/ _1, /*si12*/ _2) \ + ((__m256i)__builtin_lasx_xvldrepl_b((void const *)(_1), (_2))) + +#define __lasx_xvldrepl_h(/*void **/ _1, /*si11*/ _2) \ + ((__m256i)__builtin_lasx_xvldrepl_h((void const *)(_1), (_2))) + +#define __lasx_xvldrepl_w(/*void **/ _1, /*si10*/ _2) \ + ((__m256i)__builtin_lasx_xvldrepl_w((void const *)(_1), (_2))) + +#define __lasx_xvldrepl_d(/*void **/ _1, /*si9*/ _2) \ + ((__m256i)__builtin_lasx_xvldrepl_d((void const *)(_1), (_2))) + +#define __lasx_xvpickve2gr_w(/*__m256i*/ _1, /*ui3*/ _2) \ + ((int)__builtin_lasx_xvpickve2gr_w((v8i32)(_1), (_2))) + +#define __lasx_xvpickve2gr_wu(/*__m256i*/ _1, /*ui3*/ _2) \ + ((unsigned int)__builtin_lasx_xvpickve2gr_wu((v8i32)(_1), (_2))) + +#define __lasx_xvpickve2gr_d(/*__m256i*/ _1, /*ui2*/ _2) \ + ((long int)__builtin_lasx_xvpickve2gr_d((v4i64)(_1), (_2))) + +#define __lasx_xvpickve2gr_du(/*__m256i*/ _1, /*ui2*/ _2) \ + ((unsigned long int)__builtin_lasx_xvpickve2gr_du((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_q_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_d_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_w_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_h_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_q_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_q_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_d_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_d_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_w_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_w_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_h_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_h_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_q_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_d_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_w_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_h_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_q_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_q_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_d_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_d_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_w_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_w_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_h_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_h_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_q_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_d_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_w_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_h_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_q_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_q_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_d_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_d_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_w_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_w_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_h_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_h_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_q_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_d_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_w_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_h_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_q_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_q_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_d_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_d_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_w_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_w_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_h_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_h_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_q_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_d_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_w_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_h_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_q_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_q_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_d_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_d_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_w_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_w_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_h_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_h_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_q_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_d_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_w_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_h_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_q_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_q_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_d_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_d_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_w_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_w_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_h_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_h_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_d_wu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_d_wu_w((v8u32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_w_hu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_w_hu_h((v16u16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_h_bu_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_h_bu_b((v32u8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_d_wu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_d_wu_w((v8u32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_w_hu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_w_hu_h((v16u16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_h_bu_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_h_bu_b((v32u8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_d_wu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_d_wu_w((v8u32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_w_hu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_w_hu_h((v16u16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_h_bu_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_h_bu_b((v32u8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_d_wu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_d_wu_w((v8u32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_w_hu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_w_hu_h((v16u16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_h_bu_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_h_bu_b((v32u8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_q_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_qu_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_qu_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_q_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_qu_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_qu_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_q_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_q_d((v4i64)_1, (v4i64)_2, (v4i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_d_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_d_w((v4i64)_1, (v8i32)_2, (v8i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_w_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_w_h((v8i32)_1, (v16i16)_2, + (v16i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_h_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_h_b((v16i16)_1, (v32i8)_2, + (v32i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_q_du(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_q_du((v4u64)_1, (v4u64)_2, + (v4u64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_d_wu(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_d_wu((v4u64)_1, (v8u32)_2, + (v8u32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_w_hu(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_w_hu((v8u32)_1, (v16u16)_2, + (v16u16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_h_bu(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_h_bu((v16u16)_1, (v32u8)_2, + (v32u8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_q_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_q_d((v4i64)_1, (v4i64)_2, (v4i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_d_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_d_w((v4i64)_1, (v8i32)_2, (v8i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_w_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_w_h((v8i32)_1, (v16i16)_2, + (v16i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_h_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_h_b((v16i16)_1, (v32i8)_2, + (v32i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_q_du(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_q_du((v4u64)_1, (v4u64)_2, + (v4u64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_d_wu(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_d_wu((v4u64)_1, (v8u32)_2, + (v8u32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_w_hu(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_w_hu((v8u32)_1, (v16u16)_2, + (v16u16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_h_bu(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_h_bu((v16u16)_1, (v32u8)_2, + (v32u8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_q_du_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_q_du_d((v4i64)_1, (v4u64)_2, + (v4i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_d_wu_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_d_wu_w((v4i64)_1, (v8u32)_2, + (v8i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_w_hu_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_w_hu_h((v8i32)_1, (v16u16)_2, + (v16i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_h_bu_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_h_bu_b((v16i16)_1, (v32u8)_2, + (v32i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_q_du_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_q_du_d((v4i64)_1, (v4u64)_2, + (v4i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_d_wu_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_d_wu_w((v4i64)_1, (v8u32)_2, + (v8i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_w_hu_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_w_hu_h((v8i32)_1, (v16u16)_2, + (v16i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_h_bu_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_h_bu_b((v16i16)_1, (v32u8)_2, + (v32i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvrotr_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvrotr_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvrotr_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvrotr_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvrotr_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvrotr_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvrotr_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvrotr_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadd_q(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadd_q((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsub_q(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsub_q((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_q_du_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_q_du_d((v4u64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_q_du_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_q_du_d((v4u64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_q_du_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_q_du_d((v4u64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_q_du_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_q_du_d((v4u64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmskgez_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvmskgez_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmsknz_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvmsknz_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_h_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_h_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_w_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_w_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_d_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_d_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_q_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_q_d((v4i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_hu_bu(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_hu_bu((v32u8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_wu_hu(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_wu_hu((v16u16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_du_wu(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_du_wu((v8u32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_qu_du(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_qu_du((v4u64)_1); +} + +#define __lasx_xvrotri_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvrotri_b((v32i8)(_1), (_2))) + +#define __lasx_xvrotri_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvrotri_h((v16i16)(_1), (_2))) + +#define __lasx_xvrotri_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvrotri_w((v8i32)(_1), (_2))) + +#define __lasx_xvrotri_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvrotri_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvextl_q_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvextl_q_d((v4i64)_1); +} + +#define __lasx_xvsrlni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvsrlni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvsrlni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvsrlni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvsrlrni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlrni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvsrlrni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlrni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvsrlrni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlrni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvsrlrni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlrni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvssrlni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvssrlni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvssrlni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvssrlni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvssrlni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvssrlni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvssrlni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvssrlni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvssrlrni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvssrlrni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvssrlrni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvssrlrni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvssrlrni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvssrlrni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvssrlrni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvssrlrni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvsrani_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvsrani_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvsrani_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvsrani_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvsrani_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvsrani_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvsrani_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvsrani_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvsrarni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvsrarni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvsrarni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvsrarni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvsrarni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvsrarni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvsrarni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvsrarni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvssrani_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvssrani_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvssrani_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvssrani_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvssrani_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvssrani_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvssrani_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvssrani_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_du_q((v4u64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvssrarni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvssrarni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvssrarni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvssrarni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvssrarni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvssrarni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvssrarni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvssrarni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xbnz_b(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_b((v32u8)(_1))) + +#define __lasx_xbnz_d(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_d((v4u64)(_1))) + +#define __lasx_xbnz_h(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_h((v16u16)(_1))) + +#define __lasx_xbnz_v(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_v((v32u8)(_1))) + +#define __lasx_xbnz_w(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_w((v8u32)(_1))) + +#define __lasx_xbz_b(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_b((v32u8)(_1))) + +#define __lasx_xbz_d(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_d((v4u64)(_1))) + +#define __lasx_xbz_h(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_h((v16u16)(_1))) + +#define __lasx_xbz_v(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_v((v32u8)(_1))) + +#define __lasx_xbz_w(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_w((v8u32)(_1))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_caf_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_caf_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_caf_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_caf_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_ceq_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_ceq_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_ceq_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_ceq_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cle_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cle_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cle_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cle_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_clt_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_clt_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_clt_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_clt_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cne_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cne_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cne_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cne_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cor_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cor_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cor_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cor_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cueq_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cueq_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cueq_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cueq_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cule_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cule_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cule_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cule_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cult_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cult_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cult_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cult_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cun_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cun_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cune_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cune_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cune_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cune_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cun_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cun_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_saf_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_saf_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_saf_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_saf_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_seq_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_seq_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_seq_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_seq_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sle_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sle_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sle_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sle_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_slt_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_slt_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_slt_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_slt_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sne_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sne_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sne_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sne_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sor_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sor_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sor_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sor_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sueq_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sueq_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sueq_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sueq_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sule_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sule_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sule_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sule_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sult_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sult_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sult_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sult_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sun_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sun_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sune_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sune_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sune_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sune_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sun_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sun_s((v8f32)_1, (v8f32)_2); +} + +#define __lasx_xvpickve_d_f(/*__m256d*/ _1, /*ui2*/ _2) \ + ((__m256d)__builtin_lasx_xvpickve_d_f((v4f64)(_1), (_2))) + +#define __lasx_xvpickve_w_f(/*__m256*/ _1, /*ui3*/ _2) \ + ((__m256)__builtin_lasx_xvpickve_w_f((v8f32)(_1), (_2))) + +#define __lasx_xvrepli_b(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_b((_1))) + +#define __lasx_xvrepli_d(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_d((_1))) + +#define __lasx_xvrepli_h(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_h((_1))) + +#define __lasx_xvrepli_w(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_w((_1))) + +#endif /* defined(__loongarch_asx). */ +#endif /* _LOONGSON_ASXINTRIN_H. */ diff --git a/clang/lib/Headers/lsxintrin.h b/clang/lib/Headers/lsxintrin.h new file mode 100644 index 0000000000000000000000000000000000000000..a29bc7757ab5680e733561da9700716512885f71 --- /dev/null +++ b/clang/lib/Headers/lsxintrin.h @@ -0,0 +1,3726 @@ +/*===------------- lsxintrin.h - LoongArch LSX intrinsics ------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef _LOONGSON_SXINTRIN_H +#define _LOONGSON_SXINTRIN_H 1 + +#if defined(__loongarch_sx) +typedef signed char v16i8 __attribute__((vector_size(16), aligned(16))); +typedef signed char v16i8_b __attribute__((vector_size(16), aligned(1))); +typedef unsigned char v16u8 __attribute__((vector_size(16), aligned(16))); +typedef unsigned char v16u8_b __attribute__((vector_size(16), aligned(1))); +typedef short v8i16 __attribute__((vector_size(16), aligned(16))); +typedef short v8i16_h __attribute__((vector_size(16), aligned(2))); +typedef unsigned short v8u16 __attribute__((vector_size(16), aligned(16))); +typedef unsigned short v8u16_h __attribute__((vector_size(16), aligned(2))); +typedef int v4i32 __attribute__((vector_size(16), aligned(16))); +typedef int v4i32_w __attribute__((vector_size(16), aligned(4))); +typedef unsigned int v4u32 __attribute__((vector_size(16), aligned(16))); +typedef unsigned int v4u32_w __attribute__((vector_size(16), aligned(4))); +typedef long long v2i64 __attribute__((vector_size(16), aligned(16))); +typedef long long v2i64_d __attribute__((vector_size(16), aligned(8))); +typedef unsigned long long v2u64 __attribute__((vector_size(16), aligned(16))); +typedef unsigned long long v2u64_d __attribute__((vector_size(16), aligned(8))); +typedef float v4f32 __attribute__((vector_size(16), aligned(16))); +typedef float v4f32_w __attribute__((vector_size(16), aligned(4))); +typedef double v2f64 __attribute__((vector_size(16), aligned(16))); +typedef double v2f64_d __attribute__((vector_size(16), aligned(8))); + +typedef long long __m128i __attribute__((__vector_size__(16), __may_alias__)); +typedef float __m128 __attribute__((__vector_size__(16), __may_alias__)); +typedef double __m128d __attribute__((__vector_size__(16), __may_alias__)); + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsll_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsll_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsll_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsll_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsll_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsll_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsll_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsll_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vslli_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vslli_b((v16i8)(_1), (_2))) + +#define __lsx_vslli_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vslli_h((v8i16)(_1), (_2))) + +#define __lsx_vslli_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslli_w((v4i32)(_1), (_2))) + +#define __lsx_vslli_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vslli_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsra_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsra_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsra_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsra_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsra_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsra_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsra_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsra_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vsrai_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsrai_b((v16i8)(_1), (_2))) + +#define __lsx_vsrai_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsrai_h((v8i16)(_1), (_2))) + +#define __lsx_vsrai_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsrai_w((v4i32)(_1), (_2))) + +#define __lsx_vsrai_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsrai_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrar_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrar_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrar_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrar_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrar_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrar_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrar_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrar_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vsrari_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsrari_b((v16i8)(_1), (_2))) + +#define __lsx_vsrari_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsrari_h((v8i16)(_1), (_2))) + +#define __lsx_vsrari_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsrari_w((v4i32)(_1), (_2))) + +#define __lsx_vsrari_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsrari_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrl_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrl_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrl_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrl_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrl_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrl_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrl_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrl_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vsrli_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsrli_b((v16i8)(_1), (_2))) + +#define __lsx_vsrli_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsrli_h((v8i16)(_1), (_2))) + +#define __lsx_vsrli_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsrli_w((v4i32)(_1), (_2))) + +#define __lsx_vsrli_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsrli_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlr_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlr_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlr_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlr_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlr_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlr_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlr_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlr_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vsrlri_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsrlri_b((v16i8)(_1), (_2))) + +#define __lsx_vsrlri_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsrlri_h((v8i16)(_1), (_2))) + +#define __lsx_vsrlri_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsrlri_w((v4i32)(_1), (_2))) + +#define __lsx_vsrlri_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsrlri_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitclr_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitclr_b((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitclr_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitclr_h((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitclr_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitclr_w((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitclr_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitclr_d((v2u64)_1, (v2u64)_2); +} + +#define __lsx_vbitclri_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vbitclri_b((v16u8)(_1), (_2))) + +#define __lsx_vbitclri_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vbitclri_h((v8u16)(_1), (_2))) + +#define __lsx_vbitclri_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vbitclri_w((v4u32)(_1), (_2))) + +#define __lsx_vbitclri_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vbitclri_d((v2u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitset_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitset_b((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitset_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitset_h((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitset_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitset_w((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitset_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitset_d((v2u64)_1, (v2u64)_2); +} + +#define __lsx_vbitseti_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vbitseti_b((v16u8)(_1), (_2))) + +#define __lsx_vbitseti_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vbitseti_h((v8u16)(_1), (_2))) + +#define __lsx_vbitseti_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vbitseti_w((v4u32)(_1), (_2))) + +#define __lsx_vbitseti_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vbitseti_d((v2u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitrev_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitrev_b((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitrev_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitrev_h((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitrev_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitrev_w((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitrev_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitrev_d((v2u64)_1, (v2u64)_2); +} + +#define __lsx_vbitrevi_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vbitrevi_b((v16u8)(_1), (_2))) + +#define __lsx_vbitrevi_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vbitrevi_h((v8u16)(_1), (_2))) + +#define __lsx_vbitrevi_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vbitrevi_w((v4u32)(_1), (_2))) + +#define __lsx_vbitrevi_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vbitrevi_d((v2u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadd_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadd_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadd_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadd_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadd_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadd_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadd_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadd_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vaddi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vaddi_bu((v16i8)(_1), (_2))) + +#define __lsx_vaddi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vaddi_hu((v8i16)(_1), (_2))) + +#define __lsx_vaddi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vaddi_wu((v4i32)(_1), (_2))) + +#define __lsx_vaddi_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vaddi_du((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsub_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsub_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsub_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsub_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsub_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsub_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsub_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsub_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vsubi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsubi_bu((v16i8)(_1), (_2))) + +#define __lsx_vsubi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsubi_hu((v8i16)(_1), (_2))) + +#define __lsx_vsubi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsubi_wu((v4i32)(_1), (_2))) + +#define __lsx_vsubi_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsubi_du((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vmaxi_b(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_b((v16i8)(_1), (_2))) + +#define __lsx_vmaxi_h(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_h((v8i16)(_1), (_2))) + +#define __lsx_vmaxi_w(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_w((v4i32)(_1), (_2))) + +#define __lsx_vmaxi_d(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_du((v2u64)_1, (v2u64)_2); +} + +#define __lsx_vmaxi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_bu((v16u8)(_1), (_2))) + +#define __lsx_vmaxi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_hu((v8u16)(_1), (_2))) + +#define __lsx_vmaxi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_wu((v4u32)(_1), (_2))) + +#define __lsx_vmaxi_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_du((v2u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vmini_b(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_b((v16i8)(_1), (_2))) + +#define __lsx_vmini_h(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_h((v8i16)(_1), (_2))) + +#define __lsx_vmini_w(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_w((v4i32)(_1), (_2))) + +#define __lsx_vmini_d(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_du((v2u64)_1, (v2u64)_2); +} + +#define __lsx_vmini_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_bu((v16u8)(_1), (_2))) + +#define __lsx_vmini_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_hu((v8u16)(_1), (_2))) + +#define __lsx_vmini_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_wu((v4u32)(_1), (_2))) + +#define __lsx_vmini_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_du((v2u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vseq_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vseq_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vseq_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vseq_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vseq_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vseq_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vseq_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vseq_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vseqi_b(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vseqi_b((v16i8)(_1), (_2))) + +#define __lsx_vseqi_h(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vseqi_h((v8i16)(_1), (_2))) + +#define __lsx_vseqi_w(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vseqi_w((v4i32)(_1), (_2))) + +#define __lsx_vseqi_d(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vseqi_d((v2i64)(_1), (_2))) + +#define __lsx_vslti_b(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_b((v16i8)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vslti_h(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_h((v8i16)(_1), (_2))) + +#define __lsx_vslti_w(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_w((v4i32)(_1), (_2))) + +#define __lsx_vslti_d(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_du((v2u64)_1, (v2u64)_2); +} + +#define __lsx_vslti_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_bu((v16u8)(_1), (_2))) + +#define __lsx_vslti_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_hu((v8u16)(_1), (_2))) + +#define __lsx_vslti_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_wu((v4u32)(_1), (_2))) + +#define __lsx_vslti_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_du((v2u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vslei_b(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_b((v16i8)(_1), (_2))) + +#define __lsx_vslei_h(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_h((v8i16)(_1), (_2))) + +#define __lsx_vslei_w(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_w((v4i32)(_1), (_2))) + +#define __lsx_vslei_d(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_du((v2u64)_1, (v2u64)_2); +} + +#define __lsx_vslei_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_bu((v16u8)(_1), (_2))) + +#define __lsx_vslei_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_hu((v8u16)(_1), (_2))) + +#define __lsx_vslei_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_wu((v4u32)(_1), (_2))) + +#define __lsx_vslei_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_du((v2u64)(_1), (_2))) + +#define __lsx_vsat_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsat_b((v16i8)(_1), (_2))) + +#define __lsx_vsat_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsat_h((v8i16)(_1), (_2))) + +#define __lsx_vsat_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsat_w((v4i32)(_1), (_2))) + +#define __lsx_vsat_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsat_d((v2i64)(_1), (_2))) + +#define __lsx_vsat_bu(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsat_bu((v16u8)(_1), (_2))) + +#define __lsx_vsat_hu(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsat_hu((v8u16)(_1), (_2))) + +#define __lsx_vsat_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsat_wu((v4u32)(_1), (_2))) + +#define __lsx_vsat_du(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsat_du((v2u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadda_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadda_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadda_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadda_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadda_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadda_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadda_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadda_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmul_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmul_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmul_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmul_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmul_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmul_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmul_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmul_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmadd_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmadd_b((v16i8)_1, (v16i8)_2, (v16i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmadd_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmadd_h((v8i16)_1, (v8i16)_2, (v8i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmadd_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmadd_w((v4i32)_1, (v4i32)_2, (v4i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmadd_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmadd_d((v2i64)_1, (v2i64)_2, (v2i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmsub_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmsub_b((v16i8)_1, (v16i8)_2, (v16i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmsub_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmsub_h((v8i16)_1, (v8i16)_2, (v8i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmsub_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmsub_w((v4i32)_1, (v4i32)_2, (v4i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmsub_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmsub_d((v2i64)_1, (v2i64)_2, (v2i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_h_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_w_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_d_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_hu_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_hu_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_wu_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_wu_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_du_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_du_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_h_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_w_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_d_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_hu_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_hu_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_wu_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_wu_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_du_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_du_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplve_b(__m128i _1, int _2) { + return (__m128i)__builtin_lsx_vreplve_b((v16i8)_1, (int)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplve_h(__m128i _1, int _2) { + return (__m128i)__builtin_lsx_vreplve_h((v8i16)_1, (int)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplve_w(__m128i _1, int _2) { + return (__m128i)__builtin_lsx_vreplve_w((v4i32)_1, (int)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplve_d(__m128i _1, int _2) { + return (__m128i)__builtin_lsx_vreplve_d((v2i64)_1, (int)_2); +} + +#define __lsx_vreplvei_b(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vreplvei_b((v16i8)(_1), (_2))) + +#define __lsx_vreplvei_h(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vreplvei_h((v8i16)(_1), (_2))) + +#define __lsx_vreplvei_w(/*__m128i*/ _1, /*ui2*/ _2) \ + ((__m128i)__builtin_lsx_vreplvei_w((v4i32)(_1), (_2))) + +#define __lsx_vreplvei_d(/*__m128i*/ _1, /*ui1*/ _2) \ + ((__m128i)__builtin_lsx_vreplvei_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickev_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickev_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickev_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickev_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickev_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickev_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickev_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickev_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickod_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickod_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickod_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickod_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickod_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickod_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickod_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickod_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvh_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvh_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvh_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvh_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvh_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvh_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvh_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvh_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvl_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvl_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvl_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvl_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvl_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvl_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvl_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvl_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackev_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackev_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackev_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackev_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackev_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackev_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackev_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackev_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackod_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackod_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackod_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackod_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackod_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackod_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackod_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackod_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vshuf_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vshuf_h((v8i16)_1, (v8i16)_2, (v8i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vshuf_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vshuf_w((v4i32)_1, (v4i32)_2, (v4i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vshuf_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vshuf_d((v2i64)_1, (v2i64)_2, (v2i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vand_v(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vand_v((v16u8)_1, (v16u8)_2); +} + +#define __lsx_vandi_b(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vandi_b((v16u8)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vor_v(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vor_v((v16u8)_1, (v16u8)_2); +} + +#define __lsx_vori_b(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vori_b((v16u8)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vnor_v(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vnor_v((v16u8)_1, (v16u8)_2); +} + +#define __lsx_vnori_b(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vnori_b((v16u8)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vxor_v(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vxor_v((v16u8)_1, (v16u8)_2); +} + +#define __lsx_vxori_b(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vxori_b((v16u8)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitsel_v(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vbitsel_v((v16u8)_1, (v16u8)_2, (v16u8)_3); +} + +#define __lsx_vbitseli_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vbitseli_b((v16u8)(_1), (v16u8)(_2), (_3))) + +#define __lsx_vshuf4i_b(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vshuf4i_b((v16i8)(_1), (_2))) + +#define __lsx_vshuf4i_h(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vshuf4i_h((v8i16)(_1), (_2))) + +#define __lsx_vshuf4i_w(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vshuf4i_w((v4i32)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplgr2vr_b(int _1) { + return (__m128i)__builtin_lsx_vreplgr2vr_b((int)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplgr2vr_h(int _1) { + return (__m128i)__builtin_lsx_vreplgr2vr_h((int)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplgr2vr_w(int _1) { + return (__m128i)__builtin_lsx_vreplgr2vr_w((int)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplgr2vr_d(long int _1) { + return (__m128i)__builtin_lsx_vreplgr2vr_d((long int)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpcnt_b(__m128i _1) { + return (__m128i)__builtin_lsx_vpcnt_b((v16i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpcnt_h(__m128i _1) { + return (__m128i)__builtin_lsx_vpcnt_h((v8i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpcnt_w(__m128i _1) { + return (__m128i)__builtin_lsx_vpcnt_w((v4i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpcnt_d(__m128i _1) { + return (__m128i)__builtin_lsx_vpcnt_d((v2i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclo_b(__m128i _1) { + return (__m128i)__builtin_lsx_vclo_b((v16i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclo_h(__m128i _1) { + return (__m128i)__builtin_lsx_vclo_h((v8i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclo_w(__m128i _1) { + return (__m128i)__builtin_lsx_vclo_w((v4i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclo_d(__m128i _1) { + return (__m128i)__builtin_lsx_vclo_d((v2i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclz_b(__m128i _1) { + return (__m128i)__builtin_lsx_vclz_b((v16i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclz_h(__m128i _1) { + return (__m128i)__builtin_lsx_vclz_h((v8i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclz_w(__m128i _1) { + return (__m128i)__builtin_lsx_vclz_w((v4i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclz_d(__m128i _1) { + return (__m128i)__builtin_lsx_vclz_d((v2i64)_1); +} + +#define __lsx_vpickve2gr_b(/*__m128i*/ _1, /*ui4*/ _2) \ + ((int)__builtin_lsx_vpickve2gr_b((v16i8)(_1), (_2))) + +#define __lsx_vpickve2gr_h(/*__m128i*/ _1, /*ui3*/ _2) \ + ((int)__builtin_lsx_vpickve2gr_h((v8i16)(_1), (_2))) + +#define __lsx_vpickve2gr_w(/*__m128i*/ _1, /*ui2*/ _2) \ + ((int)__builtin_lsx_vpickve2gr_w((v4i32)(_1), (_2))) + +#define __lsx_vpickve2gr_d(/*__m128i*/ _1, /*ui1*/ _2) \ + ((long int)__builtin_lsx_vpickve2gr_d((v2i64)(_1), (_2))) + +#define __lsx_vpickve2gr_bu(/*__m128i*/ _1, /*ui4*/ _2) \ + ((unsigned int)__builtin_lsx_vpickve2gr_bu((v16i8)(_1), (_2))) + +#define __lsx_vpickve2gr_hu(/*__m128i*/ _1, /*ui3*/ _2) \ + ((unsigned int)__builtin_lsx_vpickve2gr_hu((v8i16)(_1), (_2))) + +#define __lsx_vpickve2gr_wu(/*__m128i*/ _1, /*ui2*/ _2) \ + ((unsigned int)__builtin_lsx_vpickve2gr_wu((v4i32)(_1), (_2))) + +#define __lsx_vpickve2gr_du(/*__m128i*/ _1, /*ui1*/ _2) \ + ((unsigned long int)__builtin_lsx_vpickve2gr_du((v2i64)(_1), (_2))) + +#define __lsx_vinsgr2vr_b(/*__m128i*/ _1, /*int*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vinsgr2vr_b((v16i8)(_1), (int)(_2), (_3))) + +#define __lsx_vinsgr2vr_h(/*__m128i*/ _1, /*int*/ _2, /*ui3*/ _3) \ + ((__m128i)__builtin_lsx_vinsgr2vr_h((v8i16)(_1), (int)(_2), (_3))) + +#define __lsx_vinsgr2vr_w(/*__m128i*/ _1, /*int*/ _2, /*ui2*/ _3) \ + ((__m128i)__builtin_lsx_vinsgr2vr_w((v4i32)(_1), (int)(_2), (_3))) + +#define __lsx_vinsgr2vr_d(/*__m128i*/ _1, /*long int*/ _2, /*ui1*/ _3) \ + ((__m128i)__builtin_lsx_vinsgr2vr_d((v2i64)(_1), (long int)(_2), (_3))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfadd_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfadd_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfadd_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfadd_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfsub_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfsub_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfsub_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfsub_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmul_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfmul_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmul_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfmul_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfdiv_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfdiv_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfdiv_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfdiv_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcvt_h_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcvt_h_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfcvt_s_d(__m128d _1, __m128d _2) { + return (__m128)__builtin_lsx_vfcvt_s_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmin_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfmin_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmin_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfmin_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmina_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfmina_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmina_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfmina_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmax_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfmax_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmax_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfmax_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmaxa_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfmaxa_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmaxa_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfmaxa_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfclass_s(__m128 _1) { + return (__m128i)__builtin_lsx_vfclass_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfclass_d(__m128d _1) { + return (__m128i)__builtin_lsx_vfclass_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfsqrt_s(__m128 _1) { + return (__m128)__builtin_lsx_vfsqrt_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfsqrt_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfsqrt_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrecip_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrecip_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrecip_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrecip_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrint_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrint_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrint_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrint_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrsqrt_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrsqrt_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrsqrt_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrsqrt_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vflogb_s(__m128 _1) { + return (__m128)__builtin_lsx_vflogb_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vflogb_d(__m128d _1) { + return (__m128d)__builtin_lsx_vflogb_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfcvth_s_h(__m128i _1) { + return (__m128)__builtin_lsx_vfcvth_s_h((v8i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfcvth_d_s(__m128 _1) { + return (__m128d)__builtin_lsx_vfcvth_d_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfcvtl_s_h(__m128i _1) { + return (__m128)__builtin_lsx_vfcvtl_s_h((v8i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfcvtl_d_s(__m128 _1) { + return (__m128d)__builtin_lsx_vfcvtl_d_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftint_w_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftint_w_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftint_l_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftint_l_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftint_wu_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftint_wu_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftint_lu_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftint_lu_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrz_w_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrz_w_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrz_l_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftintrz_l_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrz_wu_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrz_wu_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrz_lu_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftintrz_lu_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vffint_s_w(__m128i _1) { + return (__m128)__builtin_lsx_vffint_s_w((v4i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vffint_d_l(__m128i _1) { + return (__m128d)__builtin_lsx_vffint_d_l((v2i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vffint_s_wu(__m128i _1) { + return (__m128)__builtin_lsx_vffint_s_wu((v4u32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vffint_d_lu(__m128i _1) { + return (__m128d)__builtin_lsx_vffint_d_lu((v2u64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vandn_v(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vandn_v((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vneg_b(__m128i _1) { + return (__m128i)__builtin_lsx_vneg_b((v16i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vneg_h(__m128i _1) { + return (__m128i)__builtin_lsx_vneg_h((v8i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vneg_w(__m128i _1) { + return (__m128i)__builtin_lsx_vneg_w((v4i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vneg_d(__m128i _1) { + return (__m128i)__builtin_lsx_vneg_d((v2i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_du((v2u64)_1, (v2u64)_2); +} + +#define __lsx_vsllwil_h_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_h_b((v16i8)(_1), (_2))) + +#define __lsx_vsllwil_w_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_w_h((v8i16)(_1), (_2))) + +#define __lsx_vsllwil_d_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_d_w((v4i32)(_1), (_2))) + +#define __lsx_vsllwil_hu_bu(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_hu_bu((v16u8)(_1), (_2))) + +#define __lsx_vsllwil_wu_hu(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_wu_hu((v8u16)(_1), (_2))) + +#define __lsx_vsllwil_du_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_du_wu((v4u32)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsran_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsran_b_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsran_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsran_h_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsran_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsran_w_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssran_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssran_b_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssran_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssran_h_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssran_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssran_w_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssran_bu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssran_bu_h((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssran_hu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssran_hu_w((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssran_wu_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssran_wu_d((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrarn_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrarn_b_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrarn_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrarn_h_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrarn_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrarn_w_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrarn_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrarn_b_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrarn_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrarn_h_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrarn_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrarn_w_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrarn_bu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrarn_bu_h((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrarn_hu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrarn_hu_w((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrarn_wu_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrarn_wu_d((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrln_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrln_b_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrln_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrln_h_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrln_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrln_w_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrln_bu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrln_bu_h((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrln_hu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrln_hu_w((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrln_wu_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrln_wu_d((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlrn_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlrn_b_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlrn_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlrn_h_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlrn_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlrn_w_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrlrn_bu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrlrn_bu_h((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrlrn_hu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrlrn_hu_w((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrlrn_wu_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrlrn_wu_d((v2u64)_1, (v2u64)_2); +} + +#define __lsx_vfrstpi_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vfrstpi_b((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vfrstpi_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vfrstpi_h((v8i16)(_1), (v8i16)(_2), (_3))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfrstp_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vfrstp_b((v16i8)_1, (v16i8)_2, (v16i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfrstp_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vfrstp_h((v8i16)_1, (v8i16)_2, (v8i16)_3); +} + +#define __lsx_vshuf4i_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vshuf4i_d((v2i64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vbsrl_v(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vbsrl_v((v16i8)(_1), (_2))) + +#define __lsx_vbsll_v(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vbsll_v((v16i8)(_1), (_2))) + +#define __lsx_vextrins_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vextrins_b((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vextrins_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vextrins_h((v8i16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vextrins_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vextrins_w((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vextrins_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vextrins_d((v2i64)(_1), (v2i64)(_2), (_3))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmskltz_b(__m128i _1) { + return (__m128i)__builtin_lsx_vmskltz_b((v16i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmskltz_h(__m128i _1) { + return (__m128i)__builtin_lsx_vmskltz_h((v8i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmskltz_w(__m128i _1) { + return (__m128i)__builtin_lsx_vmskltz_w((v4i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmskltz_d(__m128i _1) { + return (__m128i)__builtin_lsx_vmskltz_d((v2i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsigncov_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsigncov_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsigncov_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsigncov_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsigncov_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsigncov_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsigncov_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsigncov_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmadd_s(__m128 _1, __m128 _2, __m128 _3) { + return (__m128)__builtin_lsx_vfmadd_s((v4f32)_1, (v4f32)_2, (v4f32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmadd_d(__m128d _1, __m128d _2, __m128d _3) { + return (__m128d)__builtin_lsx_vfmadd_d((v2f64)_1, (v2f64)_2, (v2f64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmsub_s(__m128 _1, __m128 _2, __m128 _3) { + return (__m128)__builtin_lsx_vfmsub_s((v4f32)_1, (v4f32)_2, (v4f32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmsub_d(__m128d _1, __m128d _2, __m128d _3) { + return (__m128d)__builtin_lsx_vfmsub_d((v2f64)_1, (v2f64)_2, (v2f64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfnmadd_s(__m128 _1, __m128 _2, __m128 _3) { + return (__m128)__builtin_lsx_vfnmadd_s((v4f32)_1, (v4f32)_2, (v4f32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfnmadd_d(__m128d _1, __m128d _2, __m128d _3) { + return (__m128d)__builtin_lsx_vfnmadd_d((v2f64)_1, (v2f64)_2, (v2f64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfnmsub_s(__m128 _1, __m128 _2, __m128 _3) { + return (__m128)__builtin_lsx_vfnmsub_s((v4f32)_1, (v4f32)_2, (v4f32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfnmsub_d(__m128d _1, __m128d _2, __m128d _3) { + return (__m128d)__builtin_lsx_vfnmsub_d((v2f64)_1, (v2f64)_2, (v2f64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrne_w_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrne_w_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrne_l_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftintrne_l_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrp_w_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrp_w_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrp_l_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftintrp_l_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrm_w_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrm_w_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrm_l_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftintrm_l_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftint_w_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vftint_w_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vffint_s_l(__m128i _1, __m128i _2) { + return (__m128)__builtin_lsx_vffint_s_l((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrz_w_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vftintrz_w_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrp_w_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vftintrp_w_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrm_w_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vftintrm_w_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrne_w_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vftintrne_w_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintl_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintl_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftinth_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftinth_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vffinth_d_w(__m128i _1) { + return (__m128d)__builtin_lsx_vffinth_d_w((v4i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vffintl_d_w(__m128i _1) { + return (__m128d)__builtin_lsx_vffintl_d_w((v4i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrzl_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrzl_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrzh_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrzh_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrpl_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrpl_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrph_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrph_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrml_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrml_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrmh_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrmh_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrnel_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrnel_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrneh_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrneh_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrintrne_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrintrne_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrintrne_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrintrne_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrintrz_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrintrz_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrintrz_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrintrz_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrintrp_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrintrp_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrintrp_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrintrp_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrintrm_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrintrm_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrintrm_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrintrm_d((v2f64)_1); +} + +#define __lsx_vstelm_b(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ + ((void)__builtin_lsx_vstelm_b((v16i8)(_1), (void *)(_2), (_3), (_4))) + +#define __lsx_vstelm_h(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ + ((void)__builtin_lsx_vstelm_h((v8i16)(_1), (void *)(_2), (_3), (_4))) + +#define __lsx_vstelm_w(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ + ((void)__builtin_lsx_vstelm_w((v4i32)(_1), (void *)(_2), (_3), (_4))) + +#define __lsx_vstelm_d(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ + ((void)__builtin_lsx_vstelm_d((v2i64)(_1), (void *)(_2), (_3), (_4))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_d_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_w_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_h_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_d_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_w_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_h_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_d_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_d_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_w_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_w_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_h_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_h_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_d_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_d_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_w_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_w_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_h_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_h_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_d_wu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_d_wu_w((v4u32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_w_hu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_w_hu_h((v8u16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_h_bu_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_h_bu_b((v16u8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_d_wu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_d_wu_w((v4u32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_w_hu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_w_hu_h((v8u16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_h_bu_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_h_bu_b((v16u8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_d_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_w_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_h_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_d_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_w_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_h_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_d_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_d_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_w_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_w_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_h_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_h_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_d_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_d_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_w_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_w_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_h_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_h_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_q_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_q_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_q_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_q_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_q_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_q_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_q_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_q_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_q_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_q_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_q_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_q_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_q_du_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_q_du_d((v2u64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_q_du_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_q_du_d((v2u64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_d_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_w_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_h_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_d_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_w_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_h_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_d_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_d_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_w_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_w_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_h_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_h_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_d_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_d_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_w_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_w_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_h_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_h_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_d_wu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_d_wu_w((v4u32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_w_hu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_w_hu_h((v8u16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_h_bu_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_h_bu_b((v16u8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_d_wu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_d_wu_w((v4u32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_w_hu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_w_hu_h((v8u16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_h_bu_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_h_bu_b((v16u8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_q_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_q_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_q_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_q_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_q_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_q_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_q_du_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_q_du_d((v2u64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_q_du_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_q_du_d((v2u64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_q_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_qu_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_qu_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_q_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_qu_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_qu_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_d_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_d_w((v2i64)_1, (v4i32)_2, (v4i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_w_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_w_h((v4i32)_1, (v8i16)_2, (v8i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_h_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_h_b((v8i16)_1, (v16i8)_2, (v16i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_d_wu(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_d_wu((v2u64)_1, (v4u32)_2, (v4u32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_w_hu(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_w_hu((v4u32)_1, (v8u16)_2, (v8u16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_h_bu(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_h_bu((v8u16)_1, (v16u8)_2, (v16u8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_d_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_d_w((v2i64)_1, (v4i32)_2, (v4i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_w_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_w_h((v4i32)_1, (v8i16)_2, (v8i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_h_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_h_b((v8i16)_1, (v16i8)_2, (v16i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_d_wu(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_d_wu((v2u64)_1, (v4u32)_2, (v4u32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_w_hu(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_w_hu((v4u32)_1, (v8u16)_2, (v8u16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_h_bu(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_h_bu((v8u16)_1, (v16u8)_2, (v16u8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_d_wu_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_d_wu_w((v2i64)_1, (v4u32)_2, + (v4i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_w_hu_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_w_hu_h((v4i32)_1, (v8u16)_2, + (v8i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_h_bu_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_h_bu_b((v8i16)_1, (v16u8)_2, + (v16i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_d_wu_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_d_wu_w((v2i64)_1, (v4u32)_2, + (v4i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_w_hu_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_w_hu_h((v4i32)_1, (v8u16)_2, + (v8i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_h_bu_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_h_bu_b((v8i16)_1, (v16u8)_2, + (v16i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_q_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_q_d((v2i64)_1, (v2i64)_2, (v2i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_q_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_q_d((v2i64)_1, (v2i64)_2, (v2i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_q_du(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_q_du((v2u64)_1, (v2u64)_2, (v2u64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_q_du(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_q_du((v2u64)_1, (v2u64)_2, (v2u64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_q_du_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_q_du_d((v2i64)_1, (v2u64)_2, + (v2i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_q_du_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_q_du_d((v2i64)_1, (v2u64)_2, + (v2i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vrotr_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vrotr_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vrotr_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vrotr_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vrotr_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vrotr_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vrotr_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vrotr_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadd_q(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadd_q((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsub_q(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsub_q((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vldrepl_b(/*void **/ _1, /*si12*/ _2) \ + ((__m128i)__builtin_lsx_vldrepl_b((void const *)(_1), (_2))) + +#define __lsx_vldrepl_h(/*void **/ _1, /*si11*/ _2) \ + ((__m128i)__builtin_lsx_vldrepl_h((void const *)(_1), (_2))) + +#define __lsx_vldrepl_w(/*void **/ _1, /*si10*/ _2) \ + ((__m128i)__builtin_lsx_vldrepl_w((void const *)(_1), (_2))) + +#define __lsx_vldrepl_d(/*void **/ _1, /*si9*/ _2) \ + ((__m128i)__builtin_lsx_vldrepl_d((void const *)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmskgez_b(__m128i _1) { + return (__m128i)__builtin_lsx_vmskgez_b((v16i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmsknz_b(__m128i _1) { + return (__m128i)__builtin_lsx_vmsknz_b((v16i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_h_b(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_h_b((v16i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_w_h(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_w_h((v8i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_d_w(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_d_w((v4i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_q_d(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_q_d((v2i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_hu_bu(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_hu_bu((v16u8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_wu_hu(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_wu_hu((v8u16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_du_wu(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_du_wu((v4u32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_qu_du(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_qu_du((v2u64)_1); +} + +#define __lsx_vrotri_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vrotri_b((v16i8)(_1), (_2))) + +#define __lsx_vrotri_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vrotri_h((v8i16)(_1), (_2))) + +#define __lsx_vrotri_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vrotri_w((v4i32)(_1), (_2))) + +#define __lsx_vrotri_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vrotri_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vextl_q_d(__m128i _1) { + return (__m128i)__builtin_lsx_vextl_q_d((v2i64)_1); +} + +#define __lsx_vsrlni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vsrlni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vsrlni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vsrlni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vsrlni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vsrlni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vsrlni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vsrlni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vsrlrni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vsrlrni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vsrlrni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vsrlrni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vsrlrni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vsrlrni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vsrlrni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vsrlrni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vssrlni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vssrlni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vssrlni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vssrlni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vssrlni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vssrlni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vssrlni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vssrlni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vssrlrni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vssrlrni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vssrlrni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vssrlrni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vssrlrni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vssrlrni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vssrlrni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vssrlrni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vsrani_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vsrani_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vsrani_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vsrani_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vsrani_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vsrani_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vsrani_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vsrani_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vsrarni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vsrarni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vsrarni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vsrarni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vsrarni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vsrarni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vsrarni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vsrarni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vssrani_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vssrani_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vssrani_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vssrani_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vssrani_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vssrani_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vssrani_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vssrani_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_du_q((v2u64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vssrarni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vssrarni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vssrarni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vssrarni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vssrarni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vssrarni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vssrarni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vssrarni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vpermi_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vpermi_w((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vld(/*void **/ _1, /*si12*/ _2) \ + ((__m128i)__builtin_lsx_vld((void const *)(_1), (_2))) + +#define __lsx_vst(/*__m128i*/ _1, /*void **/ _2, /*si12*/ _3) \ + ((void)__builtin_lsx_vst((v16i8)(_1), (void *)(_2), (_3))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrlrn_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrlrn_b_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrlrn_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrlrn_h_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrlrn_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrlrn_w_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrln_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrln_b_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrln_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrln_h_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrln_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrln_w_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vorn_v(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vorn_v((v16i8)_1, (v16i8)_2); +} + +#define __lsx_vldi(/*i13*/ _1) ((__m128i)__builtin_lsx_vldi((_1))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vshuf_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vshuf_b((v16i8)_1, (v16i8)_2, (v16i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vldx(void const *_1, long int _2) { + return (__m128i)__builtin_lsx_vldx((void const *)_1, (long int)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) void + __lsx_vstx(__m128i _1, void *_2, long int _3) { + return (void)__builtin_lsx_vstx((v16i8)_1, (void *)_2, (long int)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vextl_qu_du(__m128i _1) { + return (__m128i)__builtin_lsx_vextl_qu_du((v2u64)_1); +} + +#define __lsx_bnz_b(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_b((v16u8)(_1))) + +#define __lsx_bnz_d(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_d((v2u64)(_1))) + +#define __lsx_bnz_h(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_h((v8u16)(_1))) + +#define __lsx_bnz_v(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_v((v16u8)(_1))) + +#define __lsx_bnz_w(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_w((v4u32)(_1))) + +#define __lsx_bz_b(/*__m128i*/ _1) ((int)__builtin_lsx_bz_b((v16u8)(_1))) + +#define __lsx_bz_d(/*__m128i*/ _1) ((int)__builtin_lsx_bz_d((v2u64)(_1))) + +#define __lsx_bz_h(/*__m128i*/ _1) ((int)__builtin_lsx_bz_h((v8u16)(_1))) + +#define __lsx_bz_v(/*__m128i*/ _1) ((int)__builtin_lsx_bz_v((v16u8)(_1))) + +#define __lsx_bz_w(/*__m128i*/ _1) ((int)__builtin_lsx_bz_w((v4u32)(_1))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_caf_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_caf_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_caf_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_caf_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_ceq_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_ceq_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_ceq_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_ceq_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cle_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cle_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cle_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cle_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_clt_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_clt_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_clt_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_clt_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cne_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cne_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cne_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cne_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cor_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cor_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cor_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cor_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cueq_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cueq_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cueq_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cueq_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cule_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cule_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cule_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cule_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cult_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cult_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cult_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cult_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cun_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cun_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cune_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cune_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cune_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cune_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cun_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cun_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_saf_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_saf_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_saf_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_saf_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_seq_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_seq_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_seq_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_seq_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sle_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sle_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sle_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sle_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_slt_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_slt_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_slt_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_slt_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sne_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sne_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sne_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sne_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sor_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sor_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sor_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sor_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sueq_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sueq_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sueq_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sueq_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sule_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sule_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sule_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sule_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sult_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sult_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sult_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sult_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sun_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sun_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sune_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sune_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sune_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sune_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sun_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sun_s((v4f32)_1, (v4f32)_2); +} + +#define __lsx_vrepli_b(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_b((_1))) + +#define __lsx_vrepli_d(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_d((_1))) + +#define __lsx_vrepli_h(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_h((_1))) + +#define __lsx_vrepli_w(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_w((_1))) + +#endif /* defined(__loongarch_sx) */ +#endif /* _LOONGSON_SXINTRIN_H */ diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index a94f009f3fa6975f04a5f24e9dd55bb19a9b4b1d..5ee20554c4cf32fb529250ab22ee89d0da2dc179 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -3826,40 +3826,14 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, switch (BuiltinID) { default: break; + // Basic intrinsics. case LoongArch::BI__builtin_loongarch_cacop_d: - if (!TI.hasFeature("64bit")) - return Diag(TheCall->getBeginLoc(), - diag::err_loongarch_builtin_requires_la64) - << TheCall->getSourceRange(); - [[fallthrough]]; case LoongArch::BI__builtin_loongarch_cacop_w: { - if (BuiltinID == LoongArch::BI__builtin_loongarch_cacop_w && - !TI.hasFeature("32bit")) - return Diag(TheCall->getBeginLoc(), - diag::err_loongarch_builtin_requires_la32) - << TheCall->getSourceRange(); SemaBuiltinConstantArgRange(TheCall, 0, 0, llvm::maxUIntN(5)); SemaBuiltinConstantArgRange(TheCall, 2, llvm::minIntN(12), llvm::maxIntN(12)); break; } - case LoongArch::BI__builtin_loongarch_crc_w_b_w: - case LoongArch::BI__builtin_loongarch_crc_w_h_w: - case LoongArch::BI__builtin_loongarch_crc_w_w_w: - case LoongArch::BI__builtin_loongarch_crc_w_d_w: - case LoongArch::BI__builtin_loongarch_crcc_w_b_w: - case LoongArch::BI__builtin_loongarch_crcc_w_h_w: - case LoongArch::BI__builtin_loongarch_crcc_w_w_w: - case LoongArch::BI__builtin_loongarch_crcc_w_d_w: - case LoongArch::BI__builtin_loongarch_iocsrrd_d: - case LoongArch::BI__builtin_loongarch_iocsrwr_d: - case LoongArch::BI__builtin_loongarch_asrtle_d: - case LoongArch::BI__builtin_loongarch_asrtgt_d: - if (!TI.hasFeature("64bit")) - return Diag(TheCall->getBeginLoc(), - diag::err_loongarch_builtin_requires_la64) - << TheCall->getSourceRange(); - break; case LoongArch::BI__builtin_loongarch_break: case LoongArch::BI__builtin_loongarch_dbar: case LoongArch::BI__builtin_loongarch_ibar: @@ -3867,41 +3841,475 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, // Check if immediate is in [0, 32767]. return SemaBuiltinConstantArgRange(TheCall, 0, 0, 32767); case LoongArch::BI__builtin_loongarch_csrrd_w: - return SemaBuiltinConstantArgRange(TheCall, 0, 0, 16383); - case LoongArch::BI__builtin_loongarch_csrwr_w: - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 16383); - case LoongArch::BI__builtin_loongarch_csrxchg_w: - return SemaBuiltinConstantArgRange(TheCall, 2, 0, 16383); case LoongArch::BI__builtin_loongarch_csrrd_d: - if (!TI.hasFeature("64bit")) - return Diag(TheCall->getBeginLoc(), - diag::err_loongarch_builtin_requires_la64) - << TheCall->getSourceRange(); return SemaBuiltinConstantArgRange(TheCall, 0, 0, 16383); + case LoongArch::BI__builtin_loongarch_csrwr_w: case LoongArch::BI__builtin_loongarch_csrwr_d: - if (!TI.hasFeature("64bit")) - return Diag(TheCall->getBeginLoc(), - diag::err_loongarch_builtin_requires_la64) - << TheCall->getSourceRange(); return SemaBuiltinConstantArgRange(TheCall, 1, 0, 16383); + case LoongArch::BI__builtin_loongarch_csrxchg_w: case LoongArch::BI__builtin_loongarch_csrxchg_d: - if (!TI.hasFeature("64bit")) - return Diag(TheCall->getBeginLoc(), - diag::err_loongarch_builtin_requires_la64) - << TheCall->getSourceRange(); return SemaBuiltinConstantArgRange(TheCall, 2, 0, 16383); case LoongArch::BI__builtin_loongarch_lddir_d: case LoongArch::BI__builtin_loongarch_ldpte_d: - if (!TI.hasFeature("64bit")) - return Diag(TheCall->getBeginLoc(), - diag::err_loongarch_builtin_requires_la64) - << TheCall->getSourceRange(); return SemaBuiltinConstantArgRange(TheCall, 1, 0, 31); case LoongArch::BI__builtin_loongarch_movfcsr2gr: case LoongArch::BI__builtin_loongarch_movgr2fcsr: return SemaBuiltinConstantArgRange(TheCall, 0, 0, llvm::maxUIntN(2)); - } + // LSX intrinsics. + case LoongArch::BI__builtin_lsx_vbitclri_b: + case LoongArch::BI__builtin_lsx_vbitrevi_b: + case LoongArch::BI__builtin_lsx_vbitseti_b: + case LoongArch::BI__builtin_lsx_vsat_b: + case LoongArch::BI__builtin_lsx_vsat_bu: + case LoongArch::BI__builtin_lsx_vslli_b: + case LoongArch::BI__builtin_lsx_vsrai_b: + case LoongArch::BI__builtin_lsx_vsrari_b: + case LoongArch::BI__builtin_lsx_vsrli_b: + case LoongArch::BI__builtin_lsx_vsllwil_h_b: + case LoongArch::BI__builtin_lsx_vsllwil_hu_bu: + case LoongArch::BI__builtin_lsx_vrotri_b: + case LoongArch::BI__builtin_lsx_vsrlri_b: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); + case LoongArch::BI__builtin_lsx_vbitclri_h: + case LoongArch::BI__builtin_lsx_vbitrevi_h: + case LoongArch::BI__builtin_lsx_vbitseti_h: + case LoongArch::BI__builtin_lsx_vsat_h: + case LoongArch::BI__builtin_lsx_vsat_hu: + case LoongArch::BI__builtin_lsx_vslli_h: + case LoongArch::BI__builtin_lsx_vsrai_h: + case LoongArch::BI__builtin_lsx_vsrari_h: + case LoongArch::BI__builtin_lsx_vsrli_h: + case LoongArch::BI__builtin_lsx_vsllwil_w_h: + case LoongArch::BI__builtin_lsx_vsllwil_wu_hu: + case LoongArch::BI__builtin_lsx_vrotri_h: + case LoongArch::BI__builtin_lsx_vsrlri_h: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); + case LoongArch::BI__builtin_lsx_vssrarni_b_h: + case LoongArch::BI__builtin_lsx_vssrarni_bu_h: + case LoongArch::BI__builtin_lsx_vssrani_b_h: + case LoongArch::BI__builtin_lsx_vssrani_bu_h: + case LoongArch::BI__builtin_lsx_vsrarni_b_h: + case LoongArch::BI__builtin_lsx_vsrlni_b_h: + case LoongArch::BI__builtin_lsx_vsrlrni_b_h: + case LoongArch::BI__builtin_lsx_vssrlni_b_h: + case LoongArch::BI__builtin_lsx_vssrlni_bu_h: + case LoongArch::BI__builtin_lsx_vssrlrni_b_h: + case LoongArch::BI__builtin_lsx_vssrlrni_bu_h: + case LoongArch::BI__builtin_lsx_vsrani_b_h: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 15); + case LoongArch::BI__builtin_lsx_vslei_bu: + case LoongArch::BI__builtin_lsx_vslei_hu: + case LoongArch::BI__builtin_lsx_vslei_wu: + case LoongArch::BI__builtin_lsx_vslei_du: + case LoongArch::BI__builtin_lsx_vslti_bu: + case LoongArch::BI__builtin_lsx_vslti_hu: + case LoongArch::BI__builtin_lsx_vslti_wu: + case LoongArch::BI__builtin_lsx_vslti_du: + case LoongArch::BI__builtin_lsx_vmaxi_bu: + case LoongArch::BI__builtin_lsx_vmaxi_hu: + case LoongArch::BI__builtin_lsx_vmaxi_wu: + case LoongArch::BI__builtin_lsx_vmaxi_du: + case LoongArch::BI__builtin_lsx_vmini_bu: + case LoongArch::BI__builtin_lsx_vmini_hu: + case LoongArch::BI__builtin_lsx_vmini_wu: + case LoongArch::BI__builtin_lsx_vmini_du: + case LoongArch::BI__builtin_lsx_vaddi_bu: + case LoongArch::BI__builtin_lsx_vaddi_hu: + case LoongArch::BI__builtin_lsx_vaddi_wu: + case LoongArch::BI__builtin_lsx_vaddi_du: + case LoongArch::BI__builtin_lsx_vbitclri_w: + case LoongArch::BI__builtin_lsx_vbitrevi_w: + case LoongArch::BI__builtin_lsx_vbitseti_w: + case LoongArch::BI__builtin_lsx_vsat_w: + case LoongArch::BI__builtin_lsx_vsat_wu: + case LoongArch::BI__builtin_lsx_vslli_w: + case LoongArch::BI__builtin_lsx_vsrai_w: + case LoongArch::BI__builtin_lsx_vsrari_w: + case LoongArch::BI__builtin_lsx_vsrli_w: + case LoongArch::BI__builtin_lsx_vsllwil_d_w: + case LoongArch::BI__builtin_lsx_vsllwil_du_wu: + case LoongArch::BI__builtin_lsx_vsrlri_w: + case LoongArch::BI__builtin_lsx_vrotri_w: + case LoongArch::BI__builtin_lsx_vsubi_bu: + case LoongArch::BI__builtin_lsx_vsubi_hu: + case LoongArch::BI__builtin_lsx_vbsrl_v: + case LoongArch::BI__builtin_lsx_vbsll_v: + case LoongArch::BI__builtin_lsx_vsubi_wu: + case LoongArch::BI__builtin_lsx_vsubi_du: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 31); + case LoongArch::BI__builtin_lsx_vssrarni_h_w: + case LoongArch::BI__builtin_lsx_vssrarni_hu_w: + case LoongArch::BI__builtin_lsx_vssrani_h_w: + case LoongArch::BI__builtin_lsx_vssrani_hu_w: + case LoongArch::BI__builtin_lsx_vsrarni_h_w: + case LoongArch::BI__builtin_lsx_vsrani_h_w: + case LoongArch::BI__builtin_lsx_vfrstpi_b: + case LoongArch::BI__builtin_lsx_vfrstpi_h: + case LoongArch::BI__builtin_lsx_vsrlni_h_w: + case LoongArch::BI__builtin_lsx_vsrlrni_h_w: + case LoongArch::BI__builtin_lsx_vssrlni_h_w: + case LoongArch::BI__builtin_lsx_vssrlni_hu_w: + case LoongArch::BI__builtin_lsx_vssrlrni_h_w: + case LoongArch::BI__builtin_lsx_vssrlrni_hu_w: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 31); + case LoongArch::BI__builtin_lsx_vbitclri_d: + case LoongArch::BI__builtin_lsx_vbitrevi_d: + case LoongArch::BI__builtin_lsx_vbitseti_d: + case LoongArch::BI__builtin_lsx_vsat_d: + case LoongArch::BI__builtin_lsx_vsat_du: + case LoongArch::BI__builtin_lsx_vslli_d: + case LoongArch::BI__builtin_lsx_vsrai_d: + case LoongArch::BI__builtin_lsx_vsrli_d: + case LoongArch::BI__builtin_lsx_vsrari_d: + case LoongArch::BI__builtin_lsx_vrotri_d: + case LoongArch::BI__builtin_lsx_vsrlri_d: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 63); + case LoongArch::BI__builtin_lsx_vssrarni_w_d: + case LoongArch::BI__builtin_lsx_vssrarni_wu_d: + case LoongArch::BI__builtin_lsx_vssrani_w_d: + case LoongArch::BI__builtin_lsx_vssrani_wu_d: + case LoongArch::BI__builtin_lsx_vsrarni_w_d: + case LoongArch::BI__builtin_lsx_vsrlni_w_d: + case LoongArch::BI__builtin_lsx_vsrlrni_w_d: + case LoongArch::BI__builtin_lsx_vssrlni_w_d: + case LoongArch::BI__builtin_lsx_vssrlni_wu_d: + case LoongArch::BI__builtin_lsx_vssrlrni_w_d: + case LoongArch::BI__builtin_lsx_vssrlrni_wu_d: + case LoongArch::BI__builtin_lsx_vsrani_w_d: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 63); + case LoongArch::BI__builtin_lsx_vssrarni_d_q: + case LoongArch::BI__builtin_lsx_vssrarni_du_q: + case LoongArch::BI__builtin_lsx_vssrani_d_q: + case LoongArch::BI__builtin_lsx_vssrani_du_q: + case LoongArch::BI__builtin_lsx_vsrarni_d_q: + case LoongArch::BI__builtin_lsx_vssrlni_d_q: + case LoongArch::BI__builtin_lsx_vssrlni_du_q: + case LoongArch::BI__builtin_lsx_vssrlrni_d_q: + case LoongArch::BI__builtin_lsx_vssrlrni_du_q: + case LoongArch::BI__builtin_lsx_vsrani_d_q: + case LoongArch::BI__builtin_lsx_vsrlrni_d_q: + case LoongArch::BI__builtin_lsx_vsrlni_d_q: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 127); + case LoongArch::BI__builtin_lsx_vseqi_b: + case LoongArch::BI__builtin_lsx_vseqi_h: + case LoongArch::BI__builtin_lsx_vseqi_w: + case LoongArch::BI__builtin_lsx_vseqi_d: + case LoongArch::BI__builtin_lsx_vslti_b: + case LoongArch::BI__builtin_lsx_vslti_h: + case LoongArch::BI__builtin_lsx_vslti_w: + case LoongArch::BI__builtin_lsx_vslti_d: + case LoongArch::BI__builtin_lsx_vslei_b: + case LoongArch::BI__builtin_lsx_vslei_h: + case LoongArch::BI__builtin_lsx_vslei_w: + case LoongArch::BI__builtin_lsx_vslei_d: + case LoongArch::BI__builtin_lsx_vmaxi_b: + case LoongArch::BI__builtin_lsx_vmaxi_h: + case LoongArch::BI__builtin_lsx_vmaxi_w: + case LoongArch::BI__builtin_lsx_vmaxi_d: + case LoongArch::BI__builtin_lsx_vmini_b: + case LoongArch::BI__builtin_lsx_vmini_h: + case LoongArch::BI__builtin_lsx_vmini_w: + case LoongArch::BI__builtin_lsx_vmini_d: + return SemaBuiltinConstantArgRange(TheCall, 1, -16, 15); + case LoongArch::BI__builtin_lsx_vandi_b: + case LoongArch::BI__builtin_lsx_vnori_b: + case LoongArch::BI__builtin_lsx_vori_b: + case LoongArch::BI__builtin_lsx_vshuf4i_b: + case LoongArch::BI__builtin_lsx_vshuf4i_h: + case LoongArch::BI__builtin_lsx_vshuf4i_w: + case LoongArch::BI__builtin_lsx_vxori_b: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 255); + case LoongArch::BI__builtin_lsx_vbitseli_b: + case LoongArch::BI__builtin_lsx_vshuf4i_d: + case LoongArch::BI__builtin_lsx_vextrins_b: + case LoongArch::BI__builtin_lsx_vextrins_h: + case LoongArch::BI__builtin_lsx_vextrins_w: + case LoongArch::BI__builtin_lsx_vextrins_d: + case LoongArch::BI__builtin_lsx_vpermi_w: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 255); + case LoongArch::BI__builtin_lsx_vpickve2gr_b: + case LoongArch::BI__builtin_lsx_vpickve2gr_bu: + case LoongArch::BI__builtin_lsx_vreplvei_b: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); + case LoongArch::BI__builtin_lsx_vinsgr2vr_b: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 15); + case LoongArch::BI__builtin_lsx_vpickve2gr_h: + case LoongArch::BI__builtin_lsx_vpickve2gr_hu: + case LoongArch::BI__builtin_lsx_vreplvei_h: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); + case LoongArch::BI__builtin_lsx_vinsgr2vr_h: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 7); + case LoongArch::BI__builtin_lsx_vpickve2gr_w: + case LoongArch::BI__builtin_lsx_vpickve2gr_wu: + case LoongArch::BI__builtin_lsx_vreplvei_w: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 3); + case LoongArch::BI__builtin_lsx_vinsgr2vr_w: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 3); + case LoongArch::BI__builtin_lsx_vpickve2gr_d: + case LoongArch::BI__builtin_lsx_vpickve2gr_du: + case LoongArch::BI__builtin_lsx_vreplvei_d: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1); + case LoongArch::BI__builtin_lsx_vinsgr2vr_d: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 1); + case LoongArch::BI__builtin_lsx_vstelm_b: + return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || + SemaBuiltinConstantArgRange(TheCall, 3, 0, 15); + case LoongArch::BI__builtin_lsx_vstelm_h: + return SemaBuiltinConstantArgRange(TheCall, 2, -256, 254) || + SemaBuiltinConstantArgRange(TheCall, 3, 0, 7); + case LoongArch::BI__builtin_lsx_vstelm_w: + return SemaBuiltinConstantArgRange(TheCall, 2, -512, 508) || + SemaBuiltinConstantArgRange(TheCall, 3, 0, 3); + case LoongArch::BI__builtin_lsx_vstelm_d: + return SemaBuiltinConstantArgRange(TheCall, 2, -1024, 1016) || + SemaBuiltinConstantArgRange(TheCall, 3, 0, 1); + case LoongArch::BI__builtin_lsx_vldrepl_b: + case LoongArch::BI__builtin_lsx_vld: + return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2047); + case LoongArch::BI__builtin_lsx_vldrepl_h: + return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2046); + case LoongArch::BI__builtin_lsx_vldrepl_w: + return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2044); + case LoongArch::BI__builtin_lsx_vldrepl_d: + return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2040); + case LoongArch::BI__builtin_lsx_vst: + return SemaBuiltinConstantArgRange(TheCall, 2, -2048, 2047); + case LoongArch::BI__builtin_lsx_vldi: + return SemaBuiltinConstantArgRange(TheCall, 0, -4096, 4095); + case LoongArch::BI__builtin_lsx_vrepli_b: + case LoongArch::BI__builtin_lsx_vrepli_h: + case LoongArch::BI__builtin_lsx_vrepli_w: + case LoongArch::BI__builtin_lsx_vrepli_d: + return SemaBuiltinConstantArgRange(TheCall, 0, -512, 511); + + // LASX intrinsics. + case LoongArch::BI__builtin_lasx_xvbitclri_b: + case LoongArch::BI__builtin_lasx_xvbitrevi_b: + case LoongArch::BI__builtin_lasx_xvbitseti_b: + case LoongArch::BI__builtin_lasx_xvsat_b: + case LoongArch::BI__builtin_lasx_xvsat_bu: + case LoongArch::BI__builtin_lasx_xvslli_b: + case LoongArch::BI__builtin_lasx_xvsrai_b: + case LoongArch::BI__builtin_lasx_xvsrari_b: + case LoongArch::BI__builtin_lasx_xvsrli_b: + case LoongArch::BI__builtin_lasx_xvsllwil_h_b: + case LoongArch::BI__builtin_lasx_xvsllwil_hu_bu: + case LoongArch::BI__builtin_lasx_xvrotri_b: + case LoongArch::BI__builtin_lasx_xvsrlri_b: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); + case LoongArch::BI__builtin_lasx_xvbitclri_h: + case LoongArch::BI__builtin_lasx_xvbitrevi_h: + case LoongArch::BI__builtin_lasx_xvbitseti_h: + case LoongArch::BI__builtin_lasx_xvsat_h: + case LoongArch::BI__builtin_lasx_xvsat_hu: + case LoongArch::BI__builtin_lasx_xvslli_h: + case LoongArch::BI__builtin_lasx_xvsrai_h: + case LoongArch::BI__builtin_lasx_xvsrari_h: + case LoongArch::BI__builtin_lasx_xvsrli_h: + case LoongArch::BI__builtin_lasx_xvsllwil_w_h: + case LoongArch::BI__builtin_lasx_xvsllwil_wu_hu: + case LoongArch::BI__builtin_lasx_xvrotri_h: + case LoongArch::BI__builtin_lasx_xvsrlri_h: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); + case LoongArch::BI__builtin_lasx_xvssrarni_b_h: + case LoongArch::BI__builtin_lasx_xvssrarni_bu_h: + case LoongArch::BI__builtin_lasx_xvssrani_b_h: + case LoongArch::BI__builtin_lasx_xvssrani_bu_h: + case LoongArch::BI__builtin_lasx_xvsrarni_b_h: + case LoongArch::BI__builtin_lasx_xvsrlni_b_h: + case LoongArch::BI__builtin_lasx_xvsrlrni_b_h: + case LoongArch::BI__builtin_lasx_xvssrlni_b_h: + case LoongArch::BI__builtin_lasx_xvssrlni_bu_h: + case LoongArch::BI__builtin_lasx_xvssrlrni_b_h: + case LoongArch::BI__builtin_lasx_xvssrlrni_bu_h: + case LoongArch::BI__builtin_lasx_xvsrani_b_h: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 15); + case LoongArch::BI__builtin_lasx_xvslei_bu: + case LoongArch::BI__builtin_lasx_xvslei_hu: + case LoongArch::BI__builtin_lasx_xvslei_wu: + case LoongArch::BI__builtin_lasx_xvslei_du: + case LoongArch::BI__builtin_lasx_xvslti_bu: + case LoongArch::BI__builtin_lasx_xvslti_hu: + case LoongArch::BI__builtin_lasx_xvslti_wu: + case LoongArch::BI__builtin_lasx_xvslti_du: + case LoongArch::BI__builtin_lasx_xvmaxi_bu: + case LoongArch::BI__builtin_lasx_xvmaxi_hu: + case LoongArch::BI__builtin_lasx_xvmaxi_wu: + case LoongArch::BI__builtin_lasx_xvmaxi_du: + case LoongArch::BI__builtin_lasx_xvmini_bu: + case LoongArch::BI__builtin_lasx_xvmini_hu: + case LoongArch::BI__builtin_lasx_xvmini_wu: + case LoongArch::BI__builtin_lasx_xvmini_du: + case LoongArch::BI__builtin_lasx_xvaddi_bu: + case LoongArch::BI__builtin_lasx_xvaddi_hu: + case LoongArch::BI__builtin_lasx_xvaddi_wu: + case LoongArch::BI__builtin_lasx_xvaddi_du: + case LoongArch::BI__builtin_lasx_xvbitclri_w: + case LoongArch::BI__builtin_lasx_xvbitrevi_w: + case LoongArch::BI__builtin_lasx_xvbitseti_w: + case LoongArch::BI__builtin_lasx_xvsat_w: + case LoongArch::BI__builtin_lasx_xvsat_wu: + case LoongArch::BI__builtin_lasx_xvslli_w: + case LoongArch::BI__builtin_lasx_xvsrai_w: + case LoongArch::BI__builtin_lasx_xvsrari_w: + case LoongArch::BI__builtin_lasx_xvsrli_w: + case LoongArch::BI__builtin_lasx_xvsllwil_d_w: + case LoongArch::BI__builtin_lasx_xvsllwil_du_wu: + case LoongArch::BI__builtin_lasx_xvsrlri_w: + case LoongArch::BI__builtin_lasx_xvrotri_w: + case LoongArch::BI__builtin_lasx_xvsubi_bu: + case LoongArch::BI__builtin_lasx_xvsubi_hu: + case LoongArch::BI__builtin_lasx_xvsubi_wu: + case LoongArch::BI__builtin_lasx_xvsubi_du: + case LoongArch::BI__builtin_lasx_xvbsrl_v: + case LoongArch::BI__builtin_lasx_xvbsll_v: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 31); + case LoongArch::BI__builtin_lasx_xvssrarni_h_w: + case LoongArch::BI__builtin_lasx_xvssrarni_hu_w: + case LoongArch::BI__builtin_lasx_xvssrani_h_w: + case LoongArch::BI__builtin_lasx_xvssrani_hu_w: + case LoongArch::BI__builtin_lasx_xvsrarni_h_w: + case LoongArch::BI__builtin_lasx_xvsrani_h_w: + case LoongArch::BI__builtin_lasx_xvfrstpi_b: + case LoongArch::BI__builtin_lasx_xvfrstpi_h: + case LoongArch::BI__builtin_lasx_xvsrlni_h_w: + case LoongArch::BI__builtin_lasx_xvsrlrni_h_w: + case LoongArch::BI__builtin_lasx_xvssrlni_h_w: + case LoongArch::BI__builtin_lasx_xvssrlni_hu_w: + case LoongArch::BI__builtin_lasx_xvssrlrni_h_w: + case LoongArch::BI__builtin_lasx_xvssrlrni_hu_w: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 31); + case LoongArch::BI__builtin_lasx_xvbitclri_d: + case LoongArch::BI__builtin_lasx_xvbitrevi_d: + case LoongArch::BI__builtin_lasx_xvbitseti_d: + case LoongArch::BI__builtin_lasx_xvsat_d: + case LoongArch::BI__builtin_lasx_xvsat_du: + case LoongArch::BI__builtin_lasx_xvslli_d: + case LoongArch::BI__builtin_lasx_xvsrai_d: + case LoongArch::BI__builtin_lasx_xvsrli_d: + case LoongArch::BI__builtin_lasx_xvsrari_d: + case LoongArch::BI__builtin_lasx_xvrotri_d: + case LoongArch::BI__builtin_lasx_xvsrlri_d: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 63); + case LoongArch::BI__builtin_lasx_xvssrarni_w_d: + case LoongArch::BI__builtin_lasx_xvssrarni_wu_d: + case LoongArch::BI__builtin_lasx_xvssrani_w_d: + case LoongArch::BI__builtin_lasx_xvssrani_wu_d: + case LoongArch::BI__builtin_lasx_xvsrarni_w_d: + case LoongArch::BI__builtin_lasx_xvsrlni_w_d: + case LoongArch::BI__builtin_lasx_xvsrlrni_w_d: + case LoongArch::BI__builtin_lasx_xvssrlni_w_d: + case LoongArch::BI__builtin_lasx_xvssrlni_wu_d: + case LoongArch::BI__builtin_lasx_xvssrlrni_w_d: + case LoongArch::BI__builtin_lasx_xvssrlrni_wu_d: + case LoongArch::BI__builtin_lasx_xvsrani_w_d: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 63); + case LoongArch::BI__builtin_lasx_xvssrarni_d_q: + case LoongArch::BI__builtin_lasx_xvssrarni_du_q: + case LoongArch::BI__builtin_lasx_xvssrani_d_q: + case LoongArch::BI__builtin_lasx_xvssrani_du_q: + case LoongArch::BI__builtin_lasx_xvsrarni_d_q: + case LoongArch::BI__builtin_lasx_xvssrlni_d_q: + case LoongArch::BI__builtin_lasx_xvssrlni_du_q: + case LoongArch::BI__builtin_lasx_xvssrlrni_d_q: + case LoongArch::BI__builtin_lasx_xvssrlrni_du_q: + case LoongArch::BI__builtin_lasx_xvsrani_d_q: + case LoongArch::BI__builtin_lasx_xvsrlni_d_q: + case LoongArch::BI__builtin_lasx_xvsrlrni_d_q: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 127); + case LoongArch::BI__builtin_lasx_xvseqi_b: + case LoongArch::BI__builtin_lasx_xvseqi_h: + case LoongArch::BI__builtin_lasx_xvseqi_w: + case LoongArch::BI__builtin_lasx_xvseqi_d: + case LoongArch::BI__builtin_lasx_xvslti_b: + case LoongArch::BI__builtin_lasx_xvslti_h: + case LoongArch::BI__builtin_lasx_xvslti_w: + case LoongArch::BI__builtin_lasx_xvslti_d: + case LoongArch::BI__builtin_lasx_xvslei_b: + case LoongArch::BI__builtin_lasx_xvslei_h: + case LoongArch::BI__builtin_lasx_xvslei_w: + case LoongArch::BI__builtin_lasx_xvslei_d: + case LoongArch::BI__builtin_lasx_xvmaxi_b: + case LoongArch::BI__builtin_lasx_xvmaxi_h: + case LoongArch::BI__builtin_lasx_xvmaxi_w: + case LoongArch::BI__builtin_lasx_xvmaxi_d: + case LoongArch::BI__builtin_lasx_xvmini_b: + case LoongArch::BI__builtin_lasx_xvmini_h: + case LoongArch::BI__builtin_lasx_xvmini_w: + case LoongArch::BI__builtin_lasx_xvmini_d: + return SemaBuiltinConstantArgRange(TheCall, 1, -16, 15); + case LoongArch::BI__builtin_lasx_xvandi_b: + case LoongArch::BI__builtin_lasx_xvnori_b: + case LoongArch::BI__builtin_lasx_xvori_b: + case LoongArch::BI__builtin_lasx_xvshuf4i_b: + case LoongArch::BI__builtin_lasx_xvshuf4i_h: + case LoongArch::BI__builtin_lasx_xvshuf4i_w: + case LoongArch::BI__builtin_lasx_xvxori_b: + case LoongArch::BI__builtin_lasx_xvpermi_d: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 255); + case LoongArch::BI__builtin_lasx_xvbitseli_b: + case LoongArch::BI__builtin_lasx_xvshuf4i_d: + case LoongArch::BI__builtin_lasx_xvextrins_b: + case LoongArch::BI__builtin_lasx_xvextrins_h: + case LoongArch::BI__builtin_lasx_xvextrins_w: + case LoongArch::BI__builtin_lasx_xvextrins_d: + case LoongArch::BI__builtin_lasx_xvpermi_q: + case LoongArch::BI__builtin_lasx_xvpermi_w: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 255); + case LoongArch::BI__builtin_lasx_xvrepl128vei_b: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); + case LoongArch::BI__builtin_lasx_xvrepl128vei_h: + case LoongArch::BI__builtin_lasx_xvpickve2gr_w: + case LoongArch::BI__builtin_lasx_xvpickve2gr_wu: + case LoongArch::BI__builtin_lasx_xvpickve_w_f: + case LoongArch::BI__builtin_lasx_xvpickve_w: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); + case LoongArch::BI__builtin_lasx_xvinsgr2vr_w: + case LoongArch::BI__builtin_lasx_xvinsve0_w: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 7); + case LoongArch::BI__builtin_lasx_xvrepl128vei_w: + case LoongArch::BI__builtin_lasx_xvpickve2gr_d: + case LoongArch::BI__builtin_lasx_xvpickve2gr_du: + case LoongArch::BI__builtin_lasx_xvpickve_d_f: + case LoongArch::BI__builtin_lasx_xvpickve_d: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 3); + case LoongArch::BI__builtin_lasx_xvinsve0_d: + case LoongArch::BI__builtin_lasx_xvinsgr2vr_d: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 3); + case LoongArch::BI__builtin_lasx_xvstelm_b: + return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || + SemaBuiltinConstantArgRange(TheCall, 3, 0, 31); + case LoongArch::BI__builtin_lasx_xvstelm_h: + return SemaBuiltinConstantArgRange(TheCall, 2, -256, 254) || + SemaBuiltinConstantArgRange(TheCall, 3, 0, 15); + case LoongArch::BI__builtin_lasx_xvstelm_w: + return SemaBuiltinConstantArgRange(TheCall, 2, -512, 508) || + SemaBuiltinConstantArgRange(TheCall, 3, 0, 7); + case LoongArch::BI__builtin_lasx_xvstelm_d: + return SemaBuiltinConstantArgRange(TheCall, 2, -1024, 1016) || + SemaBuiltinConstantArgRange(TheCall, 3, 0, 3); + case LoongArch::BI__builtin_lasx_xvrepl128vei_d: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1); + case LoongArch::BI__builtin_lasx_xvldrepl_b: + case LoongArch::BI__builtin_lasx_xvld: + return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2047); + case LoongArch::BI__builtin_lasx_xvldrepl_h: + return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2046); + case LoongArch::BI__builtin_lasx_xvldrepl_w: + return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2044); + case LoongArch::BI__builtin_lasx_xvldrepl_d: + return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2040); + case LoongArch::BI__builtin_lasx_xvst: + return SemaBuiltinConstantArgRange(TheCall, 2, -2048, 2047); + case LoongArch::BI__builtin_lasx_xvldi: + return SemaBuiltinConstantArgRange(TheCall, 0, -4096, 4095); + case LoongArch::BI__builtin_lasx_xvrepli_b: + case LoongArch::BI__builtin_lasx_xvrepli_h: + case LoongArch::BI__builtin_lasx_xvrepli_w: + case LoongArch::BI__builtin_lasx_xvrepli_d: + return SemaBuiltinConstantArgRange(TheCall, 0, -512, 511); + } return false; } diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la32-error.c b/clang/test/CodeGen/LoongArch/intrinsic-la32-error.c index 0264c2948934e66dd97ad3d039ec6fbba62003d6..db113a13eb5a9510db61105800a1cdbae906d251 100644 --- a/clang/test/CodeGen/LoongArch/intrinsic-la32-error.c +++ b/clang/test/CodeGen/LoongArch/intrinsic-la32-error.c @@ -1,9 +1,58 @@ // RUN: %clang_cc1 -triple loongarch32 -emit-llvm -S -verify %s -o /dev/null +// RUN: not %clang_cc1 -triple loongarch32 -DFEATURE_CHECK -emit-llvm %s 2>&1 \ +// RUN: | FileCheck %s #include +#ifdef FEATURE_CHECK +void test_feature(long *v_l, unsigned long *v_ul, int *v_i, unsigned ui, char c, short s) { +// CHECK: error: '__builtin_loongarch_cacop_d' needs target feature 64bit + __builtin_loongarch_cacop_d(1, v_ul[0], 1024); + +// CHECK: error: '__builtin_loongarch_crc_w_b_w' needs target feature 64bit + v_i[0] = __builtin_loongarch_crc_w_b_w(c, v_i[0]); +// CHECK: error: '__builtin_loongarch_crc_w_h_w' needs target feature 64bit + v_i[1] = __builtin_loongarch_crc_w_h_w(c, v_i[0]); +// CHECK: error: '__builtin_loongarch_crc_w_w_w' needs target feature 64bit + v_i[2] = __builtin_loongarch_crc_w_w_w(c, v_i[0]); +// CHECK: error: '__builtin_loongarch_crc_w_d_w' needs target feature 64bit + v_i[3] = __builtin_loongarch_crc_w_d_w(c, v_i[0]); + +// CHECK: error: '__builtin_loongarch_crcc_w_b_w' needs target feature 64bit + v_i[4] = __builtin_loongarch_crcc_w_b_w(c, v_i[0]); +// CHECK: error: '__builtin_loongarch_crcc_w_h_w' needs target feature 64bit + v_i[5] = __builtin_loongarch_crcc_w_h_w(s, v_i[0]); +// CHECK: error: '__builtin_loongarch_crcc_w_w_w' needs target feature 64bit + v_i[6] = __builtin_loongarch_crcc_w_w_w(v_i[0], v_i[1]); +// CHECK: error: '__builtin_loongarch_crcc_w_d_w' needs target feature 64bit + v_i[7] = __builtin_loongarch_crcc_w_d_w(v_l[0], v_i[0]); + +// CHECK: error: '__builtin_loongarch_csrrd_d' needs target feature 64bit + v_ul[0] = __builtin_loongarch_csrrd_d(1); +// CHECK: error: '__builtin_loongarch_csrwr_d' needs target feature 64bit + v_ul[1] = __builtin_loongarch_csrwr_d(v_ul[0], 1); +// CHECK: error: '__builtin_loongarch_csrxchg_d' needs target feature 64bit + v_ul[2] = __builtin_loongarch_csrxchg_d(v_ul[0], v_ul[1], 1); + + +// CHECK: error: '__builtin_loongarch_iocsrrd_d' needs target feature 64bit + v_ul[3] = __builtin_loongarch_iocsrrd_d(ui); +// CHECK: error: '__builtin_loongarch_iocsrwr_d' needs target feature 64bit + __builtin_loongarch_iocsrwr_d(v_ul[0], ui); + +// CHECK: error: '__builtin_loongarch_asrtle_d' needs target feature 64bit + __builtin_loongarch_asrtle_d(v_l[0], v_l[1]); +// CHECK: error: '__builtin_loongarch_asrtgt_d' needs target feature 64bit + __builtin_loongarch_asrtgt_d(v_l[0], v_l[1]); + +// CHECK: error: '__builtin_loongarch_lddir_d' needs target feature 64bit + v_ul[4] = __builtin_loongarch_lddir_d(v_l[0], 1); +// CHECK: error: '__builtin_loongarch_ldpte_d' needs target feature 64bit + __builtin_loongarch_ldpte_d(v_l[0], 1); +} +#endif + void cacop_d(unsigned long int a) { - __builtin_loongarch_cacop_d(1, a, 1024); // expected-error {{this builtin requires target: loongarch64}} __builtin_loongarch_cacop_w(-1, a, 1024); // expected-error {{argument value -1 is outside the valid range [0, 31]}} __builtin_loongarch_cacop_w(32, a, 1024); // expected-error {{argument value 32 is outside the valid range [0, 31]}} __builtin_loongarch_cacop_w(1, a, -4096); // expected-error {{argument value -4096 is outside the valid range [-2048, 2047]}} @@ -47,49 +96,6 @@ void syscall(int a) { __builtin_loongarch_syscall(a); // expected-error {{argument to '__builtin_loongarch_syscall' must be a constant integer}} } -int crc_w_b_w(char a, int b) { - return __builtin_loongarch_crc_w_b_w(a, b); // expected-error {{this builtin requires target: loongarch64}} -} - -int crc_w_h_w(short a, int b) { - return __builtin_loongarch_crc_w_h_w(a, b); // expected-error {{this builtin requires target: loongarch64}} -} - -int crc_w_w_w(int a, int b) { - return __builtin_loongarch_crc_w_w_w(a, b); // expected-error {{this builtin requires target: loongarch64}} -} - -int crc_w_d_w(long int a, int b) { - return __builtin_loongarch_crc_w_d_w(a, b); // expected-error {{this builtin requires target: loongarch64}} -} -int crcc_w_b_w(char a, int b) { - return __builtin_loongarch_crcc_w_b_w(a, b); // expected-error {{this builtin requires target: loongarch64}} -} - -int crcc_w_h_w(short a, int b) { - return __builtin_loongarch_crcc_w_h_w(a, b); // expected-error {{this builtin requires target: loongarch64}} -} - -int crcc_w_w_w(int a, int b) { - return __builtin_loongarch_crcc_w_w_w(a, b); // expected-error {{this builtin requires target: loongarch64}} -} - -int crcc_w_d_w(long int a, int b) { - return __builtin_loongarch_crcc_w_d_w(a, b); // expected-error {{this builtin requires target: loongarch64}} -} - -unsigned long int csrrd_d() { - return __builtin_loongarch_csrrd_d(1); // expected-error {{this builtin requires target: loongarch64}} -} - -unsigned long int csrwr_d(unsigned long int a) { - return __builtin_loongarch_csrwr_d(a, 1); // expected-error {{this builtin requires target: loongarch64}} -} - -unsigned long int csrxchg_d(unsigned long int a, unsigned long int b) { - return __builtin_loongarch_csrxchg_d(a, b, 1); // expected-error {{this builtin requires target: loongarch64}} -} - void csrrd_w(int a) { __builtin_loongarch_csrrd_w(16384); // expected-error {{argument value 16384 is outside the valid range [0, 16383]}} __builtin_loongarch_csrrd_w(-1); // expected-error {{argument value 4294967295 is outside the valid range [0, 16383]}} @@ -108,30 +114,6 @@ void csrxchg_w(unsigned int a, unsigned int b) { __builtin_loongarch_csrxchg_w(a, b, b); // expected-error {{argument to '__builtin_loongarch_csrxchg_w' must be a constant integer}} } -unsigned long int iocsrrd_d(unsigned int a) { - return __builtin_loongarch_iocsrrd_d(a); // expected-error {{this builtin requires target: loongarch64}} -} - -void iocsrwr_d(unsigned long int a, unsigned int b) { - __builtin_loongarch_iocsrwr_d(a, b); // expected-error {{this builtin requires target: loongarch64}} -} - -void asrtle_d(long int a, long int b) { - __builtin_loongarch_asrtle_d(a, b); // expected-error {{this builtin requires target: loongarch64}} -} - -void asrtgt_d(long int a, long int b) { - __builtin_loongarch_asrtgt_d(a, b); // expected-error {{this builtin requires target: loongarch64}} -} - -void lddir_d(long int a, int b) { - __builtin_loongarch_lddir_d(a, 1); // expected-error {{this builtin requires target: loongarch64}} -} - -void ldpte_d(long int a, int b) { - __builtin_loongarch_ldpte_d(a, 1); // expected-error {{this builtin requires target: loongarch64}} -} - void rdtime_d() { __rdtime_d(); // expected-error {{call to undeclared function '__rdtime_d'}} } diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-alias-error.c b/clang/test/CodeGen/LoongArch/lasx/builtin-alias-error.c new file mode 100644 index 0000000000000000000000000000000000000000..2a3862bbe3c18cc27d4395c96cae08749d1b0ec3 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/lasx/builtin-alias-error.c @@ -0,0 +1,1373 @@ +// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -verify %s + +#include + +v32i8 xvslli_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvslli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvslli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvslli_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_b' must be a constant integer}} + return res; +} + +v16i16 xvslli_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvslli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvslli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvslli_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_h' must be a constant integer}} + return res; +} + +v8i32 xvslli_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvslli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvslli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvslli_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_w' must be a constant integer}} + return res; +} + +v4i64 xvslli_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvslli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvslli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvslli_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_d' must be a constant integer}} + return res; +} + +v32i8 xvsrai_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvsrai_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvsrai_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvsrai_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_b' must be a constant integer}} + return res; +} + +v16i16 xvsrai_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvsrai_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvsrai_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvsrai_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_h' must be a constant integer}} + return res; +} + +v8i32 xvsrai_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvsrai_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsrai_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsrai_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_w' must be a constant integer}} + return res; +} + +v4i64 xvsrai_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvsrai_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvsrai_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvsrai_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_d' must be a constant integer}} + return res; +} + +v32i8 xvsrari_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvsrari_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvsrari_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvsrari_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_b' must be a constant integer}} + return res; +} + +v16i16 xvsrari_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvsrari_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvsrari_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvsrari_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_h' must be a constant integer}} + return res; +} + +v8i32 xvsrari_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvsrari_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsrari_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsrari_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_w' must be a constant integer}} + return res; +} + +v4i64 xvsrari_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvsrari_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvsrari_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvsrari_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_d' must be a constant integer}} + return res; +} + +v32i8 xvsrli_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvsrli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvsrli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvsrli_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_b' must be a constant integer}} + return res; +} + +v16i16 xvsrli_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvsrli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvsrli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvsrli_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_h' must be a constant integer}} + return res; +} + +v8i32 xvsrli_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvsrli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsrli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsrli_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_w' must be a constant integer}} + return res; +} + +v4i64 xvsrli_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvsrli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvsrli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvsrli_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_d' must be a constant integer}} + return res; +} + +v32i8 xvsrlri_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvsrlri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvsrlri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvsrlri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_b' must be a constant integer}} + return res; +} + +v16i16 xvsrlri_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvsrlri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvsrlri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvsrlri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_h' must be a constant integer}} + return res; +} + +v8i32 xvsrlri_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvsrlri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsrlri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsrlri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_w' must be a constant integer}} + return res; +} + +v4i64 xvsrlri_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvsrlri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvsrlri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvsrlri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_d' must be a constant integer}} + return res; +} + +v32u8 xvbitclri_b(v32u8 _1, int var) { + v32u8 res = __lasx_xvbitclri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvbitclri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvbitclri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_b' must be a constant integer}} + return res; +} + +v16u16 xvbitclri_h(v16u16 _1, int var) { + v16u16 res = __lasx_xvbitclri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvbitclri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvbitclri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_h' must be a constant integer}} + return res; +} + +v8u32 xvbitclri_w(v8u32 _1, int var) { + v8u32 res = __lasx_xvbitclri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvbitclri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvbitclri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_w' must be a constant integer}} + return res; +} + +v4u64 xvbitclri_d(v4u64 _1, int var) { + v4u64 res = __lasx_xvbitclri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvbitclri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvbitclri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_d' must be a constant integer}} + return res; +} + +v32u8 xvbitseti_b(v32u8 _1, int var) { + v32u8 res = __lasx_xvbitseti_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvbitseti_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvbitseti_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_b' must be a constant integer}} + return res; +} + +v16u16 xvbitseti_h(v16u16 _1, int var) { + v16u16 res = __lasx_xvbitseti_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvbitseti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvbitseti_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_h' must be a constant integer}} + return res; +} + +v8u32 xvbitseti_w(v8u32 _1, int var) { + v8u32 res = __lasx_xvbitseti_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvbitseti_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvbitseti_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_w' must be a constant integer}} + return res; +} + +v4u64 xvbitseti_d(v4u64 _1, int var) { + v4u64 res = __lasx_xvbitseti_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvbitseti_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvbitseti_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_d' must be a constant integer}} + return res; +} + +v32u8 xvbitrevi_b(v32u8 _1, int var) { + v32u8 res = __lasx_xvbitrevi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvbitrevi_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvbitrevi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_b' must be a constant integer}} + return res; +} + +v16u16 xvbitrevi_h(v16u16 _1, int var) { + v16u16 res = __lasx_xvbitrevi_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvbitrevi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvbitrevi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_h' must be a constant integer}} + return res; +} + +v8u32 xvbitrevi_w(v8u32 _1, int var) { + v8u32 res = __lasx_xvbitrevi_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvbitrevi_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvbitrevi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_w' must be a constant integer}} + return res; +} + +v4u64 xvbitrevi_d(v4u64 _1, int var) { + v4u64 res = __lasx_xvbitrevi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvbitrevi_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvbitrevi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_d' must be a constant integer}} + return res; +} + +v32i8 xvaddi_bu(v32i8 _1, int var) { + v32i8 res = __lasx_xvaddi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvaddi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvaddi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_bu' must be a constant integer}} + return res; +} + +v16i16 xvaddi_hu(v16i16 _1, int var) { + v16i16 res = __lasx_xvaddi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvaddi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvaddi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_hu' must be a constant integer}} + return res; +} + +v8i32 xvaddi_wu(v8i32 _1, int var) { + v8i32 res = __lasx_xvaddi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvaddi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvaddi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_wu' must be a constant integer}} + return res; +} + +v4i64 xvaddi_du(v4i64 _1, int var) { + v4i64 res = __lasx_xvaddi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvaddi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvaddi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_du' must be a constant integer}} + return res; +} + +v32i8 xvsubi_bu(v32i8 _1, int var) { + v32i8 res = __lasx_xvsubi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsubi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsubi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_bu' must be a constant integer}} + return res; +} + +v16i16 xvsubi_hu(v16i16 _1, int var) { + v16i16 res = __lasx_xvsubi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsubi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsubi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_hu' must be a constant integer}} + return res; +} + +v8i32 xvsubi_wu(v8i32 _1, int var) { + v8i32 res = __lasx_xvsubi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsubi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsubi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_wu' must be a constant integer}} + return res; +} + +v4i64 xvsubi_du(v4i64 _1, int var) { + v4i64 res = __lasx_xvsubi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsubi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsubi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_du' must be a constant integer}} + return res; +} + +v32i8 xvmaxi_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvmaxi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvmaxi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvmaxi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_b' must be a constant integer}} + return res; +} + +v16i16 xvmaxi_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvmaxi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvmaxi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvmaxi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_h' must be a constant integer}} + return res; +} + +v8i32 xvmaxi_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvmaxi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvmaxi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvmaxi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_w' must be a constant integer}} + return res; +} + +v4i64 xvmaxi_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvmaxi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvmaxi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvmaxi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_d' must be a constant integer}} + return res; +} + +v32u8 xvmaxi_bu(v32u8 _1, int var) { + v32u8 res = __lasx_xvmaxi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvmaxi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvmaxi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_bu' must be a constant integer}} + return res; +} + +v16u16 xvmaxi_hu(v16u16 _1, int var) { + v16u16 res = __lasx_xvmaxi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvmaxi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvmaxi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_hu' must be a constant integer}} + return res; +} + +v8u32 xvmaxi_wu(v8u32 _1, int var) { + v8u32 res = __lasx_xvmaxi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvmaxi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvmaxi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_wu' must be a constant integer}} + return res; +} + +v4u64 xvmaxi_du(v4u64 _1, int var) { + v4u64 res = __lasx_xvmaxi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvmaxi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvmaxi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_du' must be a constant integer}} + return res; +} + +v32i8 xvmini_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvmini_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvmini_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvmini_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_b' must be a constant integer}} + return res; +} + +v16i16 xvmini_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvmini_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvmini_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvmini_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_h' must be a constant integer}}} + return res; +} + +v8i32 xvmini_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvmini_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvmini_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvmini_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_w' must be a constant integer}} + return res; +} + +v4i64 xvmini_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvmini_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvmini_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvmini_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_d' must be a constant integer}} + return res; +} + +v32u8 xvmini_bu(v32u8 _1, int var) { + v32u8 res = __lasx_xvmini_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvmini_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvmini_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_bu' must be a constant integer}} + return res; +} + +v16u16 xvmini_hu(v16u16 _1, int var) { + v16u16 res = __lasx_xvmini_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvmini_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvmini_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_hu' must be a constant integer}} + return res; +} + +v8u32 xvmini_wu(v8u32 _1, int var) { + v8u32 res = __lasx_xvmini_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvmini_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvmini_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_wu' must be a constant integer}} + return res; +} + +v4u64 xvmini_du(v4u64 _1, int var) { + v4u64 res = __lasx_xvmini_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvmini_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvmini_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_du' must be a constant integer}} + return res; +} + +v32i8 xvseqi_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvseqi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvseqi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvseqi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_b' must be a constant integer}} + return res; +} + +v16i16 xvseqi_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvseqi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvseqi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvseqi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_h' must be a constant integer}} + return res; +} + +v8i32 xvseqi_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvseqi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvseqi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvseqi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_w' must be a constant integer}} + return res; +} + +v4i64 xvseqi_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvseqi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvseqi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvseqi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_d' must be a constant integer}} + return res; +} + +v32i8 xvslti_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvslti_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvslti_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvslti_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_b' must be a constant integer}} + return res; +} + +v16i16 xvslti_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvslti_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvslti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvslti_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_h' must be a constant integer}} + return res; +} + +v8i32 xvslti_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvslti_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvslti_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvslti_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_w' must be a constant integer}} + return res; +} + +v4i64 xvslti_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvslti_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvslti_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvslti_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_d' must be a constant integer}} + return res; +} + +v32i8 xvslti_bu(v32u8 _1, int var) { + v32i8 res = __lasx_xvslti_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvslti_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvslti_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_bu' must be a constant integer}} + return res; +} + +v16i16 xvslti_hu(v16u16 _1, int var) { + v16i16 res = __lasx_xvslti_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvslti_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvslti_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_hu' must be a constant integer}} + return res; +} + +v8i32 xvslti_wu(v8u32 _1, int var) { + v8i32 res = __lasx_xvslti_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvslti_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvslti_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_wu' must be a constant integer}} + return res; +} + +v4i64 xvslti_du(v4u64 _1, int var) { + v4i64 res = __lasx_xvslti_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvslti_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvslti_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_du' must be a constant integer}} + return res; +} + +v32i8 xvslei_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvslei_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvslei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvslei_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_b' must be a constant integer}} + return res; +} + +v16i16 xvslei_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvslei_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvslei_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvslei_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_h' must be a constant integer}} + return res; +} + +v8i32 xvslei_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvslei_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvslei_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvslei_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_w' must be a constant integer}} + return res; +} + +v4i64 xvslei_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvslei_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvslei_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvslei_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_d' must be a constant integer}} + return res; +} + +v32i8 xvslei_bu(v32u8 _1, int var) { + v32i8 res = __lasx_xvslei_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvslei_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvslei_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_bu' must be a constant integer}} + return res; +} + +v16i16 xvslei_hu(v16u16 _1, int var) { + v16i16 res = __lasx_xvslei_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvslei_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvslei_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_hu' must be a constant integer}} + return res; +} + +v8i32 xvslei_wu(v8u32 _1, int var) { + v8i32 res = __lasx_xvslei_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvslei_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvslei_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_wu' must be a constant integer}} + return res; +} + +v4i64 xvslei_du(v4u64 _1, int var) { + v4i64 res = __lasx_xvslei_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvslei_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvslei_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_du' must be a constant integer}} + return res; +} + +v32i8 xvsat_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvsat_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvsat_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvsat_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_b' must be a constant integer}} + return res; +} + +v16i16 xvsat_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvsat_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvsat_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvsat_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_h' must be a constant integer}} + return res; +} + +v8i32 xvsat_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvsat_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsat_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsat_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_w' must be a constant integer}} + return res; +} + +v4i64 xvsat_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvsat_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvsat_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvsat_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_d' must be a constant integer}} + return res; +} + +v32u8 xvsat_bu(v32u8 _1, int var) { + v32u8 res = __lasx_xvsat_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvsat_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvsat_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_bu' must be a constant integer}} + return res; +} + +v16u16 xvsat_hu(v16u16 _1, int var) { + v16u16 res = __lasx_xvsat_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvsat_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvsat_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_hu' must be a constant integer}} + return res; +} + +v8u32 xvsat_wu(v8u32 _1, int var) { + v8u32 res = __lasx_xvsat_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsat_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsat_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_wu' must be a constant integer}} + return res; +} + +v4u64 xvsat_du(v4u64 _1, int var) { + v4u64 res = __lasx_xvsat_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvsat_du(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvsat_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_du' must be a constant integer}} + return res; +} + +v32i8 xvrepl128vei_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvrepl128vei_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvrepl128vei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvrepl128vei_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_b' must be a constant integer}} + return res; +} + +v16i16 xvrepl128vei_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvrepl128vei_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvrepl128vei_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvrepl128vei_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_h' must be a constant integer}} + return res; +} + +v8i32 xvrepl128vei_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvrepl128vei_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __lasx_xvrepl128vei_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __lasx_xvrepl128vei_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_w' must be a constant integer}} + return res; +} + +v4i64 xvrepl128vei_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvrepl128vei_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} + res |= __lasx_xvrepl128vei_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + res |= __lasx_xvrepl128vei_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_d' must be a constant integer}} + return res; +} + +v32u8 xvandi_b(v32u8 _1, int var) { + v32u8 res = __lasx_xvandi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvandi_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvandi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvandi_b' must be a constant integer}} + return res; +} + +v32u8 xvori_b(v32u8 _1, int var) { + v32u8 res = __lasx_xvori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvori_b' must be a constant integer}} + return res; +} + +v32u8 xvnori_b(v32u8 _1, int var) { + v32u8 res = __lasx_xvnori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvnori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvnori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvnori_b' must be a constant integer}} + return res; +} + +v32u8 xvxori_b(v32u8 _1, int var) { + v32u8 res = __lasx_xvxori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvxori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvxori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvxori_b' must be a constant integer}} + return res; +} + +v32u8 xvbitseli_b(v32u8 _1, v32u8 _2, int var) { + v32u8 res = __lasx_xvbitseli_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvbitseli_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvbitseli_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvbitseli_b' must be a constant integer}} + return res; +} + +v32i8 xvshuf4i_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvshuf4i_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvshuf4i_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvshuf4i_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_b' must be a constant integer}} + return res; +} + +v16i16 xvshuf4i_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvshuf4i_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvshuf4i_h(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvshuf4i_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_h' must be a constant integer}} + return res; +} + +v8i32 xvshuf4i_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvshuf4i_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvshuf4i_w(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvshuf4i_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_w' must be a constant integer}} + return res; +} + +v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __lasx_xvshuf4i_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvshuf4i_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvshuf4i_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_d' must be a constant integer}} + return res; +} + +v8i32 xvpermi_w(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __lasx_xvpermi_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvpermi_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvpermi_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvpermi_w' must be a constant integer}} + return res; +} + +v4i64 xvpermi_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvpermi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvpermi_d(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvpermi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpermi_d' must be a constant integer}} + return res; +} + +v32i8 xvpermi_q(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __lasx_xvpermi_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvpermi_q(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvpermi_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvpermi_q' must be a constant integer}} + return res; +} + +v16i16 xvsllwil_h_b(v32i8 _1, int var) { + v16i16 res = __lasx_xvsllwil_h_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvsllwil_h_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvsllwil_h_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_h_b' must be a constant integer}} + return res; +} + +v8i32 xvsllwil_w_h(v16i16 _1, int var) { + v8i32 res = __lasx_xvsllwil_w_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvsllwil_w_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvsllwil_w_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_w_h' must be a constant integer}} + return res; +} + +v4i64 xvsllwil_d_w(v8i32 _1, int var) { + v4i64 res = __lasx_xvsllwil_d_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsllwil_d_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsllwil_d_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_d_w' must be a constant integer}} + return res; +} + +v16u16 xvsllwil_hu_bu(v32u8 _1, int var) { + v16u16 res = __lasx_xvsllwil_hu_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvsllwil_hu_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvsllwil_hu_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_hu_bu' must be a constant integer}} + return res; +} + +v8u32 xvsllwil_wu_hu(v16u16 _1, int var) { + v8u32 res = __lasx_xvsllwil_wu_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvsllwil_wu_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvsllwil_wu_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_wu_hu' must be a constant integer}} + return res; +} + +v4u64 xvsllwil_du_wu(v8u32 _1, int var) { + v4u64 res = __lasx_xvsllwil_du_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsllwil_du_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsllwil_du_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_du_wu' must be a constant integer}} + return res; +} + +v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __lasx_xvfrstpi_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvfrstpi_b(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvfrstpi_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvfrstpi_b' must be a constant integer}} + return res; +} + +v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __lasx_xvfrstpi_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvfrstpi_h(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvfrstpi_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvfrstpi_h' must be a constant integer}} + return res; +} + +v32i8 xvbsrl_v(v32i8 _1, int var) { + v32i8 res = __lasx_xvbsrl_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvbsrl_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvbsrl_v(_1, var); // expected-error {{argument to '__builtin_lasx_xvbsrl_v' must be a constant integer}} + return res; +} + +v32i8 xvbsll_v(v32i8 _1, int var) { + v32i8 res = __lasx_xvbsll_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvbsll_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvbsll_v(_1, var); // expected-error {{argument to '__builtin_lasx_xvbsll_v' must be a constant integer}} + return res; +} + +v32i8 xvextrins_b(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __lasx_xvextrins_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvextrins_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvextrins_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_b' must be a constant integer}} + return res; +} + +v16i16 xvextrins_h(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __lasx_xvextrins_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvextrins_h(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvextrins_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_h' must be a constant integer}} + return res; +} + +v8i32 xvextrins_w(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __lasx_xvextrins_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvextrins_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvextrins_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_w' must be a constant integer}} + return res; +} + +v4i64 xvextrins_d(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __lasx_xvextrins_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvextrins_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvextrins_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_d' must be a constant integer}} + return res; +} + +v32i8 xvld(void *_1, int var) { + v32i8 res = __lasx_xvld(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + res |= __lasx_xvld(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} + res |= __lasx_xvld(_1, var); // expected-error {{argument to '__builtin_lasx_xvld' must be a constant integer}} + return res; +} + +void xvst(v32i8 _1, void *_2, int var) { + __lasx_xvst(_1, _2, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + __lasx_xvst(_1, _2, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} + __lasx_xvst(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvst' must be a constant integer}} +} + +void xvstelm_b(v32i8 _1, void * _2, int var) { + __lasx_xvstelm_b(_1, _2, -129, 1); // expected-error {{argument value -129 is outside the valid range [-128, 127]}} + __lasx_xvstelm_b(_1, _2, 128, 1); // expected-error {{argument value 128 is outside the valid range [-128, 127]}} + __lasx_xvstelm_b(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_b' must be a constant integer}} +} + +void xvstelm_h(v16i16 _1, void * _2, int var) { + __lasx_xvstelm_h(_1, _2, -258, 1); // expected-error {{argument value -258 is outside the valid range [-256, 254]}} + __lasx_xvstelm_h(_1, _2, 256, 1); // expected-error {{argument value 256 is outside the valid range [-256, 254]}} + __lasx_xvstelm_h(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_h' must be a constant integer}} +} + +void xvstelm_w(v8i32 _1, void * _2, int var) { + __lasx_xvstelm_w(_1, _2, -516, 1); // expected-error {{argument value -516 is outside the valid range [-512, 508]}} + __lasx_xvstelm_w(_1, _2, 512, 1); // expected-error {{argument value 512 is outside the valid range [-512, 508]}} + __lasx_xvstelm_w(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_w' must be a constant integer}} +} + +void xvstelm_d(v4i64 _1, void * _2, int var) { + __lasx_xvstelm_d(_1, _2, -1032, 1); // expected-error {{argument value -1032 is outside the valid range [-1024, 1016]}} + __lasx_xvstelm_d(_1, _2, 1024, 1); // expected-error {{argument value 1024 is outside the valid range [-1024, 1016]}} + __lasx_xvstelm_d(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_d' must be a constant integer}} +} + +void xvstelm_b_idx(v32i8 _1, void * _2, int var) { + __lasx_xvstelm_b(_1, _2, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + __lasx_xvstelm_b(_1, _2, 1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + __lasx_xvstelm_b(_1, _2, 1, var); // expected-error {{argument to '__builtin_lasx_xvstelm_b' must be a constant integer}} +} + +void xvstelm_h_idx(v16i16 _1, void * _2, int var) { + __lasx_xvstelm_h(_1, _2, 2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + __lasx_xvstelm_h(_1, _2, 2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + __lasx_xvstelm_h(_1, _2, 2, var); // expected-error {{argument to '__builtin_lasx_xvstelm_h' must be a constant integer}} +} + +void xvstelm_w_idx(v8i32 _1, void * _2, int var) { + __lasx_xvstelm_w(_1, _2, 4, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + __lasx_xvstelm_w(_1, _2, 4, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + __lasx_xvstelm_w(_1, _2, 4, var); // expected-error {{argument to '__builtin_lasx_xvstelm_w' must be a constant integer}} +} + +void xvstelm_d_idx(v4i64 _1, void * _2, int var) { + __lasx_xvstelm_d(_1, _2, 8, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + __lasx_xvstelm_d(_1, _2, 8, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + __lasx_xvstelm_d(_1, _2, 8, var); // expected-error {{argument to '__builtin_lasx_xvstelm_d' must be a constant integer}} +} + +v8i32 xvinsve0_w(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __lasx_xvinsve0_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvinsve0_w(_1, _2, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvinsve0_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvinsve0_w' must be a constant integer}} + return res; +} + +v4i64 xvinsve0_d(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __lasx_xvinsve0_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __lasx_xvinsve0_d(_1, _2, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __lasx_xvinsve0_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvinsve0_d' must be a constant integer}} + return res; +} + +v8i32 xvpickve_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvpickve_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvpickve_w(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvpickve_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_w' must be a constant integer}} + return res; +} + +v4i64 xvpickve_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvpickve_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __lasx_xvpickve_d(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __lasx_xvpickve_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_d' must be a constant integer}} + return res; +} + +v4i64 xvldi(int var) { + v4i64 res = __lasx_xvldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} + res |= __lasx_xvldi(4096); // expected-error {{argument value 4096 is outside the valid range [-4096, 4095]}} + res |= __lasx_xvldi(var); // expected-error {{argument to '__builtin_lasx_xvldi' must be a constant integer}} + return res; +} + +v8i32 xvinsgr2vr_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvinsgr2vr_w(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvinsgr2vr_w(_1, 1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvinsgr2vr_w(_1, 1, var); // expected-error {{argument to '__builtin_lasx_xvinsgr2vr_w' must be a constant integer}} + return res; +} + +v4i64 xvinsgr2vr_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvinsgr2vr_d(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __lasx_xvinsgr2vr_d(_1, 1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __lasx_xvinsgr2vr_d(_1, 1, var); // expected-error {{argument to '__builtin_lasx_xvinsgr2vr_d' must be a constant integer}} + return res; +} + +v32i8 xvldrepl_b(void *_1, int var) { + v32i8 res = __lasx_xvldrepl_b(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + res |= __lasx_xvldrepl_b(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} + res |= __lasx_xvldrepl_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_b' must be a constant integer}} + return res; +} + +v16i16 xvldrepl_h(void *_1, int var) { + v16i16 res = __lasx_xvldrepl_h(_1, -2050); // expected-error {{argument value -2050 is outside the valid range [-2048, 2046]}} + res |= __lasx_xvldrepl_h(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2046]}} + res |= __lasx_xvldrepl_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_h' must be a constant integer}} + return res; +} + +v8i32 xvldrepl_w(void *_1, int var) { + v8i32 res = __lasx_xvldrepl_w(_1, -2052); // expected-error {{argument value -2052 is outside the valid range [-2048, 2044]}} + res |= __lasx_xvldrepl_w(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2044]}} + res |= __lasx_xvldrepl_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_w' must be a constant integer}} + return res; +} + +v4i64 xvldrepl_d(void *_1, int var) { + v4i64 res = __lasx_xvldrepl_d(_1, -2056); // expected-error {{argument value -2056 is outside the valid range [-2048, 2040]}} + res |= __lasx_xvldrepl_d(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2040]}} + res |= __lasx_xvldrepl_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_d' must be a constant integer}} + return res; +} + +int xvpickve2gr_w(v8i32 _1, int var) { + int res = __lasx_xvpickve2gr_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvpickve2gr_w(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvpickve2gr_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_w' must be a constant integer}} + return res; +} + +unsigned int xvpickve2gr_wu(v8i32 _1, int var) { + unsigned int res = __lasx_xvpickve2gr_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvpickve2gr_wu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvpickve2gr_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_wu' must be a constant integer}} + return res; +} + +long xvpickve2gr_d(v4i64 _1, int var) { + long res = __lasx_xvpickve2gr_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __lasx_xvpickve2gr_d(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __lasx_xvpickve2gr_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_d' must be a constant integer}} + return res; +} + +unsigned long int xvpickve2gr_du(v4i64 _1, int var) { + unsigned long int res = __lasx_xvpickve2gr_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __lasx_xvpickve2gr_du(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __lasx_xvpickve2gr_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_du' must be a constant integer}} + return res; +} + +v32i8 xvrotri_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvrotri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvrotri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvrotri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_b' must be a constant integer}} + return res; +} + +v16i16 xvrotri_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvrotri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvrotri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvrotri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_h' must be a constant integer}} + return res; +} + +v8i32 xvrotri_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvrotri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvrotri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvrotri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_w' must be a constant integer}} + return res; +} + +v4i64 xvrotri_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvrotri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvrotri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvrotri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_d' must be a constant integer}} + return res; +} + +v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __lasx_xvsrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvsrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvsrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_b_h' must be a constant integer}} + return res; +} + +v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __lasx_xvsrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_h_w' must be a constant integer}} + return res; +} + +v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __lasx_xvsrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvsrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvsrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_w_d' must be a constant integer}} + return res; +} + +v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __lasx_xvsrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lasx_xvsrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lasx_xvsrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_d_q' must be a constant integer}} + return res; +} + +v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __lasx_xvsrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvsrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvsrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_b_h' must be a constant integer}} + return res; +} + +v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __lasx_xvsrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_h_w' must be a constant integer}} + return res; +} + +v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __lasx_xvsrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvsrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvsrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_w_d' must be a constant integer}} + return res; +} + +v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __lasx_xvsrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lasx_xvsrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lasx_xvsrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_d_q' must be a constant integer}} + return res; +} + +v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __lasx_xvssrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvssrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvssrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_b_h' must be a constant integer}} + return res; +} + +v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __lasx_xvssrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvssrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvssrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_h_w' must be a constant integer}} + return res; +} + +v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __lasx_xvssrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvssrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvssrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_w_d' must be a constant integer}} + return res; +} + +v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __lasx_xvssrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lasx_xvssrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lasx_xvssrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_d_q' must be a constant integer}} + return res; +} + +v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2, int var) { + v32u8 res = __lasx_xvssrlni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvssrlni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvssrlni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_bu_h' must be a constant integer}} + return res; +} + +v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2, int var) { + v16u16 res = __lasx_xvssrlni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvssrlni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvssrlni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_hu_w' must be a constant integer}} + return res; +} + +v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2, int var) { + v8u32 res = __lasx_xvssrlni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvssrlni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvssrlni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_wu_d' must be a constant integer}} + return res; +} + +v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2, int var) { + v4u64 res = __lasx_xvssrlni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lasx_xvssrlni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lasx_xvssrlni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_du_q' must be a constant integer}} + return res; +} + +v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __lasx_xvssrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvssrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvssrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_b_h' must be a constant integer}} + return res; +} + +v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __lasx_xvssrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvssrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvssrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_h_w' must be a constant integer}} + return res; +} + +v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __lasx_xvssrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvssrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvssrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_w_d' must be a constant integer}} + return res; +} + +v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __lasx_xvssrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lasx_xvssrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lasx_xvssrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_d_q' must be a constant integer}} + return res; +} + +v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2, int var) { + v32u8 res = __lasx_xvssrlrni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvssrlrni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvssrlrni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_bu_h' must be a constant integer}} + return res; +} + +v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2, int var) { + v16u16 res = __lasx_xvssrlrni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvssrlrni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvssrlrni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_hu_w' must be a constant integer}} + return res; +} + +v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2, int var) { + v8u32 res = __lasx_xvssrlrni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvssrlrni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvssrlrni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_wu_d' must be a constant integer}} + return res; +} + +v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2, int var) { + v4u64 res = __lasx_xvssrlrni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lasx_xvssrlrni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lasx_xvssrlrni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_du_q' must be a constant integer}} + return res; +} + +v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __lasx_xvsrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvsrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvsrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_b_h' must be a constant integer}} + return res; +} + +v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __lasx_xvsrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_h_w' must be a constant integer}} + return res; +} + +v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __lasx_xvsrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvsrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvsrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_w_d' must be a constant integer}} + return res; +} + +v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __lasx_xvsrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lasx_xvsrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lasx_xvsrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_d_q' must be a constant integer}} + return res; +} + +v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __lasx_xvsrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvsrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvsrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_b_h' must be a constant integer}} + return res; +} + +v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __lasx_xvsrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_h_w' must be a constant integer}} + return res; +} + +v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __lasx_xvsrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvsrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvsrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_w_d' must be a constant integer}} + return res; +} + +v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __lasx_xvsrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lasx_xvsrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lasx_xvsrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_d_q' must be a constant integer}} + return res; +} + +v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __lasx_xvssrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvssrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvssrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_b_h' must be a constant integer}} + return res; +} + +v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __lasx_xvssrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvssrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvssrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_h_w' must be a constant integer}} + return res; +} + +v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __lasx_xvssrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvssrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvssrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_w_d' must be a constant integer}} + return res; +} + +v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __lasx_xvssrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lasx_xvssrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lasx_xvssrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_d_q' must be a constant integer}} + return res; +} + +v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2, int var) { + v32u8 res = __lasx_xvssrani_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvssrani_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvssrani_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_bu_h' must be a constant integer}} + return res; +} + +v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2, int var) { + v16u16 res = __lasx_xvssrani_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvssrani_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvssrani_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_hu_w' must be a constant integer}} + return res; +} + +v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2, int var) { + v8u32 res = __lasx_xvssrani_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvssrani_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvssrani_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_wu_d' must be a constant integer}} + return res; +} + +v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2, int var) { + v4u64 res = __lasx_xvssrani_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lasx_xvssrani_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lasx_xvssrani_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_du_q' must be a constant integer}} + return res; +} + +v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __lasx_xvssrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvssrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvssrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_b_h' must be a constant integer}} + return res; +} + +v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __lasx_xvssrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvssrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvssrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_h_w' must be a constant integer}} + return res; +} + +v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __lasx_xvssrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvssrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvssrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_w_d' must be a constant integer}} + return res; +} + +v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __lasx_xvssrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lasx_xvssrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lasx_xvssrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_d_q' must be a constant integer}} + return res; +} + +v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2, int var) { + v32u8 res = __lasx_xvssrarni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvssrarni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvssrarni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_bu_h' must be a constant integer}} + return res; +} + +v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2, int var) { + v16u16 res = __lasx_xvssrarni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvssrarni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvssrarni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_hu_w' must be a constant integer}} + return res; +} + +v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2, int var) { + v8u32 res = __lasx_xvssrarni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvssrarni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvssrarni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_wu_d' must be a constant integer}} + return res; +} + +v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2, int var) { + v4u64 res = __lasx_xvssrarni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lasx_xvssrarni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lasx_xvssrarni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_du_q' must be a constant integer}} + return res; +} + +v4f64 xvpickve_d_f(v4f64 _1, int var) { + v4f64 res = __lasx_xvpickve_d_f(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res += __lasx_xvpickve_d_f(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res += __lasx_xvpickve_d_f(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_d_f' must be a constant integer}} + return res; +} + +v8f32 xvpickve_w_f(v8f32 _1, int var) { + v8f32 res = __lasx_xvpickve_w_f(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res += __lasx_xvpickve_w_f(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res += __lasx_xvpickve_w_f(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_w_f' must be a constant integer}} + return res; +} + +v32i8 xvrepli_b(int var) { + v32i8 res = __lasx_xvrepli_b(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __lasx_xvrepli_b(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __lasx_xvrepli_b(var); // expected-error {{argument to '__builtin_lasx_xvrepli_b' must be a constant integer}} + return res; +} + +v4i64 xvrepli_d(int var) { + v4i64 res = __lasx_xvrepli_d(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __lasx_xvrepli_d(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __lasx_xvrepli_d(var); // expected-error {{argument to '__builtin_lasx_xvrepli_d' must be a constant integer}} + return res; +} + +v16i16 xvrepli_h(int var) { + v16i16 res = __lasx_xvrepli_h(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __lasx_xvrepli_h(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __lasx_xvrepli_h(var); // expected-error {{argument to '__builtin_lasx_xvrepli_h' must be a constant integer}} + return res; +} + +v8i32 xvrepli_w(int var) { + v8i32 res = __lasx_xvrepli_w(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __lasx_xvrepli_w(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __lasx_xvrepli_w(var); // expected-error {{argument to '__builtin_lasx_xvrepli_w' must be a constant integer}} + return res; +} diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c b/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c new file mode 100644 index 0000000000000000000000000000000000000000..9a8ce224bcfd0910914c54519f8c263d4f6fa379 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c @@ -0,0 +1,6386 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -O2 -emit-llvm %s -o - | FileCheck %s + +#include + +// CHECK-LABEL: @xvsll_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsll_b(v32i8 _1, v32i8 _2) { return __lasx_xvsll_b(_1, _2); } +// CHECK-LABEL: @xvsll_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsll_h(v16i16 _1, v16i16 _2) { return __lasx_xvsll_h(_1, _2); } +// CHECK-LABEL: @xvsll_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsll_w(v8i32 _1, v8i32 _2) { return __lasx_xvsll_w(_1, _2); } +// CHECK-LABEL: @xvsll_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsll_d(v4i64 _1, v4i64 _2) { return __lasx_xvsll_d(_1, _2); } +// CHECK-LABEL: @xvslli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvslli_b(v32i8 _1) { return __lasx_xvslli_b(_1, 1); } +// CHECK-LABEL: @xvslli_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvslli_h(v16i16 _1) { return __lasx_xvslli_h(_1, 1); } +// CHECK-LABEL: @xvslli_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvslli_w(v8i32 _1) { return __lasx_xvslli_w(_1, 1); } +// CHECK-LABEL: @xvslli_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvslli_d(v4i64 _1) { return __lasx_xvslli_d(_1, 1); } +// CHECK-LABEL: @xvsra_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsra_b(v32i8 _1, v32i8 _2) { return __lasx_xvsra_b(_1, _2); } +// CHECK-LABEL: @xvsra_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsra_h(v16i16 _1, v16i16 _2) { return __lasx_xvsra_h(_1, _2); } +// CHECK-LABEL: @xvsra_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsra_w(v8i32 _1, v8i32 _2) { return __lasx_xvsra_w(_1, _2); } +// CHECK-LABEL: @xvsra_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsra_d(v4i64 _1, v4i64 _2) { return __lasx_xvsra_d(_1, _2); } +// CHECK-LABEL: @xvsrai_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrai_b(v32i8 _1) { return __lasx_xvsrai_b(_1, 1); } +// CHECK-LABEL: @xvsrai_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrai_h(v16i16 _1) { return __lasx_xvsrai_h(_1, 1); } +// CHECK-LABEL: @xvsrai_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrai_w(v8i32 _1) { return __lasx_xvsrai_w(_1, 1); } +// CHECK-LABEL: @xvsrai_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrai_d(v4i64 _1) { return __lasx_xvsrai_d(_1, 1); } +// CHECK-LABEL: @xvsrar_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrar_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrar_b(_1, _2); } +// CHECK-LABEL: @xvsrar_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrar_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrar_h(_1, _2); } +// CHECK-LABEL: @xvsrar_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrar_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrar_w(_1, _2); } +// CHECK-LABEL: @xvsrar_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrar_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrar_d(_1, _2); } +// CHECK-LABEL: @xvsrari_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrari_b(v32i8 _1) { return __lasx_xvsrari_b(_1, 1); } +// CHECK-LABEL: @xvsrari_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrari_h(v16i16 _1) { return __lasx_xvsrari_h(_1, 1); } +// CHECK-LABEL: @xvsrari_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrari_w(v8i32 _1) { return __lasx_xvsrari_w(_1, 1); } +// CHECK-LABEL: @xvsrari_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrari_d(v4i64 _1) { return __lasx_xvsrari_d(_1, 1); } +// CHECK-LABEL: @xvsrl_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrl_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrl_b(_1, _2); } +// CHECK-LABEL: @xvsrl_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrl_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrl_h(_1, _2); } +// CHECK-LABEL: @xvsrl_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrl_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrl_w(_1, _2); } +// CHECK-LABEL: @xvsrl_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrl_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrl_d(_1, _2); } +// CHECK-LABEL: @xvsrli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrli_b(v32i8 _1) { return __lasx_xvsrli_b(_1, 1); } +// CHECK-LABEL: @xvsrli_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrli_h(v16i16 _1) { return __lasx_xvsrli_h(_1, 1); } +// CHECK-LABEL: @xvsrli_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrli_w(v8i32 _1) { return __lasx_xvsrli_w(_1, 1); } +// CHECK-LABEL: @xvsrli_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrli_d(v4i64 _1) { return __lasx_xvsrli_d(_1, 1); } +// CHECK-LABEL: @xvsrlr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrlr_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrlr_b(_1, _2); } +// CHECK-LABEL: @xvsrlr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrlr_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrlr_h(_1, _2); } +// CHECK-LABEL: @xvsrlr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrlr_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrlr_w(_1, _2); } +// CHECK-LABEL: @xvsrlr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrlr_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrlr_d(_1, _2); } +// CHECK-LABEL: @xvsrlri_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrlri_b(v32i8 _1) { return __lasx_xvsrlri_b(_1, 1); } +// CHECK-LABEL: @xvsrlri_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrlri_h(v16i16 _1) { return __lasx_xvsrlri_h(_1, 1); } +// CHECK-LABEL: @xvsrlri_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrlri_w(v8i32 _1) { return __lasx_xvsrlri_w(_1, 1); } +// CHECK-LABEL: @xvsrlri_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrlri_d(v4i64 _1) { return __lasx_xvsrlri_d(_1, 1); } +// CHECK-LABEL: @xvbitclr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvbitclr_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitclr_b(_1, _2); } +// CHECK-LABEL: @xvbitclr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvbitclr_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitclr_h(_1, _2); } +// CHECK-LABEL: @xvbitclr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvbitclr_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitclr_w(_1, _2); } +// CHECK-LABEL: @xvbitclr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvbitclr_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitclr_d(_1, _2); } +// CHECK-LABEL: @xvbitclri_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvbitclri_b(v32u8 _1) { return __lasx_xvbitclri_b(_1, 1); } +// CHECK-LABEL: @xvbitclri_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvbitclri_h(v16u16 _1) { return __lasx_xvbitclri_h(_1, 1); } +// CHECK-LABEL: @xvbitclri_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvbitclri_w(v8u32 _1) { return __lasx_xvbitclri_w(_1, 1); } +// CHECK-LABEL: @xvbitclri_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvbitclri_d(v4u64 _1) { return __lasx_xvbitclri_d(_1, 1); } +// CHECK-LABEL: @xvbitset_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvbitset_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitset_b(_1, _2); } +// CHECK-LABEL: @xvbitset_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvbitset_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitset_h(_1, _2); } +// CHECK-LABEL: @xvbitset_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvbitset_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitset_w(_1, _2); } +// CHECK-LABEL: @xvbitset_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvbitset_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitset_d(_1, _2); } +// CHECK-LABEL: @xvbitseti_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvbitseti_b(v32u8 _1) { return __lasx_xvbitseti_b(_1, 1); } +// CHECK-LABEL: @xvbitseti_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvbitseti_h(v16u16 _1) { return __lasx_xvbitseti_h(_1, 1); } +// CHECK-LABEL: @xvbitseti_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvbitseti_w(v8u32 _1) { return __lasx_xvbitseti_w(_1, 1); } +// CHECK-LABEL: @xvbitseti_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvbitseti_d(v4u64 _1) { return __lasx_xvbitseti_d(_1, 1); } +// CHECK-LABEL: @xvbitrev_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvbitrev_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitrev_b(_1, _2); } +// CHECK-LABEL: @xvbitrev_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvbitrev_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitrev_h(_1, _2); } +// CHECK-LABEL: @xvbitrev_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvbitrev_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitrev_w(_1, _2); } +// CHECK-LABEL: @xvbitrev_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvbitrev_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitrev_d(_1, _2); } +// CHECK-LABEL: @xvbitrevi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvbitrevi_b(v32u8 _1) { return __lasx_xvbitrevi_b(_1, 1); } +// CHECK-LABEL: @xvbitrevi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvbitrevi_h(v16u16 _1) { return __lasx_xvbitrevi_h(_1, 1); } +// CHECK-LABEL: @xvbitrevi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvbitrevi_w(v8u32 _1) { return __lasx_xvbitrevi_w(_1, 1); } +// CHECK-LABEL: @xvbitrevi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvbitrevi_d(v4u64 _1) { return __lasx_xvbitrevi_d(_1, 1); } +// CHECK-LABEL: @xvadd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvadd_b(v32i8 _1, v32i8 _2) { return __lasx_xvadd_b(_1, _2); } +// CHECK-LABEL: @xvadd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvadd_h(v16i16 _1, v16i16 _2) { return __lasx_xvadd_h(_1, _2); } +// CHECK-LABEL: @xvadd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvadd_w(v8i32 _1, v8i32 _2) { return __lasx_xvadd_w(_1, _2); } +// CHECK-LABEL: @xvadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvadd_d(v4i64 _1, v4i64 _2) { return __lasx_xvadd_d(_1, _2); } +// CHECK-LABEL: @xvaddi_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvaddi_bu(v32i8 _1) { return __lasx_xvaddi_bu(_1, 1); } +// CHECK-LABEL: @xvaddi_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvaddi_hu(v16i16 _1) { return __lasx_xvaddi_hu(_1, 1); } +// CHECK-LABEL: @xvaddi_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvaddi_wu(v8i32 _1) { return __lasx_xvaddi_wu(_1, 1); } +// CHECK-LABEL: @xvaddi_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddi_du(v4i64 _1) { return __lasx_xvaddi_du(_1, 1); } +// CHECK-LABEL: @xvsub_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsub_b(v32i8 _1, v32i8 _2) { return __lasx_xvsub_b(_1, _2); } +// CHECK-LABEL: @xvsub_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsub_h(v16i16 _1, v16i16 _2) { return __lasx_xvsub_h(_1, _2); } +// CHECK-LABEL: @xvsub_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsub_w(v8i32 _1, v8i32 _2) { return __lasx_xvsub_w(_1, _2); } +// CHECK-LABEL: @xvsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsub_d(v4i64 _1, v4i64 _2) { return __lasx_xvsub_d(_1, _2); } +// CHECK-LABEL: @xvsubi_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsubi_bu(v32i8 _1) { return __lasx_xvsubi_bu(_1, 1); } +// CHECK-LABEL: @xvsubi_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsubi_hu(v16i16 _1) { return __lasx_xvsubi_hu(_1, 1); } +// CHECK-LABEL: @xvsubi_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsubi_wu(v8i32 _1) { return __lasx_xvsubi_wu(_1, 1); } +// CHECK-LABEL: @xvsubi_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsubi_du(v4i64 _1) { return __lasx_xvsubi_du(_1, 1); } +// CHECK-LABEL: @xvmax_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmax_b(v32i8 _1, v32i8 _2) { return __lasx_xvmax_b(_1, _2); } +// CHECK-LABEL: @xvmax_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmax_h(v16i16 _1, v16i16 _2) { return __lasx_xvmax_h(_1, _2); } +// CHECK-LABEL: @xvmax_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmax_w(v8i32 _1, v8i32 _2) { return __lasx_xvmax_w(_1, _2); } +// CHECK-LABEL: @xvmax_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmax_d(v4i64 _1, v4i64 _2) { return __lasx_xvmax_d(_1, _2); } +// CHECK-LABEL: @xvmaxi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmaxi_b(v32i8 _1) { return __lasx_xvmaxi_b(_1, 1); } +// CHECK-LABEL: @xvmaxi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmaxi_h(v16i16 _1) { return __lasx_xvmaxi_h(_1, 1); } +// CHECK-LABEL: @xvmaxi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmaxi_w(v8i32 _1) { return __lasx_xvmaxi_w(_1, 1); } +// CHECK-LABEL: @xvmaxi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmaxi_d(v4i64 _1) { return __lasx_xvmaxi_d(_1, 1); } +// CHECK-LABEL: @xvmax_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvmax_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmax_bu(_1, _2); } +// CHECK-LABEL: @xvmax_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvmax_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmax_hu(_1, _2); } +// CHECK-LABEL: @xvmax_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvmax_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmax_wu(_1, _2); } +// CHECK-LABEL: @xvmax_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmax_du(v4u64 _1, v4u64 _2) { return __lasx_xvmax_du(_1, _2); } +// CHECK-LABEL: @xvmaxi_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvmaxi_bu(v32u8 _1) { return __lasx_xvmaxi_bu(_1, 1); } +// CHECK-LABEL: @xvmaxi_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvmaxi_hu(v16u16 _1) { return __lasx_xvmaxi_hu(_1, 1); } +// CHECK-LABEL: @xvmaxi_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvmaxi_wu(v8u32 _1) { return __lasx_xvmaxi_wu(_1, 1); } +// CHECK-LABEL: @xvmaxi_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmaxi_du(v4u64 _1) { return __lasx_xvmaxi_du(_1, 1); } +// CHECK-LABEL: @xvmin_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmin_b(v32i8 _1, v32i8 _2) { return __lasx_xvmin_b(_1, _2); } +// CHECK-LABEL: @xvmin_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmin_h(v16i16 _1, v16i16 _2) { return __lasx_xvmin_h(_1, _2); } +// CHECK-LABEL: @xvmin_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmin_w(v8i32 _1, v8i32 _2) { return __lasx_xvmin_w(_1, _2); } +// CHECK-LABEL: @xvmin_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmin_d(v4i64 _1, v4i64 _2) { return __lasx_xvmin_d(_1, _2); } +// CHECK-LABEL: @xvmini_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmini_b(v32i8 _1) { return __lasx_xvmini_b(_1, 1); } +// CHECK-LABEL: @xvmini_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmini_h(v16i16 _1) { return __lasx_xvmini_h(_1, 1); } +// CHECK-LABEL: @xvmini_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmini_w(v8i32 _1) { return __lasx_xvmini_w(_1, 1); } +// CHECK-LABEL: @xvmini_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmini_d(v4i64 _1) { return __lasx_xvmini_d(_1, 1); } +// CHECK-LABEL: @xvmin_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvmin_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmin_bu(_1, _2); } +// CHECK-LABEL: @xvmin_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvmin_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmin_hu(_1, _2); } +// CHECK-LABEL: @xvmin_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvmin_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmin_wu(_1, _2); } +// CHECK-LABEL: @xvmin_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmin_du(v4u64 _1, v4u64 _2) { return __lasx_xvmin_du(_1, _2); } +// CHECK-LABEL: @xvmini_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvmini_bu(v32u8 _1) { return __lasx_xvmini_bu(_1, 1); } +// CHECK-LABEL: @xvmini_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvmini_hu(v16u16 _1) { return __lasx_xvmini_hu(_1, 1); } +// CHECK-LABEL: @xvmini_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvmini_wu(v8u32 _1) { return __lasx_xvmini_wu(_1, 1); } +// CHECK-LABEL: @xvmini_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmini_du(v4u64 _1) { return __lasx_xvmini_du(_1, 1); } +// CHECK-LABEL: @xvseq_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvseq_b(v32i8 _1, v32i8 _2) { return __lasx_xvseq_b(_1, _2); } +// CHECK-LABEL: @xvseq_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvseq_h(v16i16 _1, v16i16 _2) { return __lasx_xvseq_h(_1, _2); } +// CHECK-LABEL: @xvseq_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvseq_w(v8i32 _1, v8i32 _2) { return __lasx_xvseq_w(_1, _2); } +// CHECK-LABEL: @xvseq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvseq_d(v4i64 _1, v4i64 _2) { return __lasx_xvseq_d(_1, _2); } +// CHECK-LABEL: @xvseqi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvseqi_b(v32i8 _1) { return __lasx_xvseqi_b(_1, 1); } +// CHECK-LABEL: @xvseqi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvseqi_h(v16i16 _1) { return __lasx_xvseqi_h(_1, 1); } +// CHECK-LABEL: @xvseqi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvseqi_w(v8i32 _1) { return __lasx_xvseqi_w(_1, 1); } +// CHECK-LABEL: @xvseqi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvseqi_d(v4i64 _1) { return __lasx_xvseqi_d(_1, 1); } +// CHECK-LABEL: @xvslt_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvslt_b(v32i8 _1, v32i8 _2) { return __lasx_xvslt_b(_1, _2); } +// CHECK-LABEL: @xvslt_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvslt_h(v16i16 _1, v16i16 _2) { return __lasx_xvslt_h(_1, _2); } +// CHECK-LABEL: @xvslt_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvslt_w(v8i32 _1, v8i32 _2) { return __lasx_xvslt_w(_1, _2); } +// CHECK-LABEL: @xvslt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvslt_d(v4i64 _1, v4i64 _2) { return __lasx_xvslt_d(_1, _2); } +// CHECK-LABEL: @xvslti_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvslti_b(v32i8 _1) { return __lasx_xvslti_b(_1, 1); } +// CHECK-LABEL: @xvslti_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvslti_h(v16i16 _1) { return __lasx_xvslti_h(_1, 1); } +// CHECK-LABEL: @xvslti_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvslti_w(v8i32 _1) { return __lasx_xvslti_w(_1, 1); } +// CHECK-LABEL: @xvslti_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvslti_d(v4i64 _1) { return __lasx_xvslti_d(_1, 1); } +// CHECK-LABEL: @xvslt_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvslt_bu(v32u8 _1, v32u8 _2) { return __lasx_xvslt_bu(_1, _2); } +// CHECK-LABEL: @xvslt_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvslt_hu(v16u16 _1, v16u16 _2) { return __lasx_xvslt_hu(_1, _2); } +// CHECK-LABEL: @xvslt_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvslt_wu(v8u32 _1, v8u32 _2) { return __lasx_xvslt_wu(_1, _2); } +// CHECK-LABEL: @xvslt_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvslt_du(v4u64 _1, v4u64 _2) { return __lasx_xvslt_du(_1, _2); } +// CHECK-LABEL: @xvslti_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvslti_bu(v32u8 _1) { return __lasx_xvslti_bu(_1, 1); } +// CHECK-LABEL: @xvslti_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvslti_hu(v16u16 _1) { return __lasx_xvslti_hu(_1, 1); } +// CHECK-LABEL: @xvslti_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvslti_wu(v8u32 _1) { return __lasx_xvslti_wu(_1, 1); } +// CHECK-LABEL: @xvslti_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvslti_du(v4u64 _1) { return __lasx_xvslti_du(_1, 1); } +// CHECK-LABEL: @xvsle_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsle_b(v32i8 _1, v32i8 _2) { return __lasx_xvsle_b(_1, _2); } +// CHECK-LABEL: @xvsle_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsle_h(v16i16 _1, v16i16 _2) { return __lasx_xvsle_h(_1, _2); } +// CHECK-LABEL: @xvsle_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsle_w(v8i32 _1, v8i32 _2) { return __lasx_xvsle_w(_1, _2); } +// CHECK-LABEL: @xvsle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsle_d(v4i64 _1, v4i64 _2) { return __lasx_xvsle_d(_1, _2); } +// CHECK-LABEL: @xvslei_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvslei_b(v32i8 _1) { return __lasx_xvslei_b(_1, 1); } +// CHECK-LABEL: @xvslei_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvslei_h(v16i16 _1) { return __lasx_xvslei_h(_1, 1); } +// CHECK-LABEL: @xvslei_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvslei_w(v8i32 _1) { return __lasx_xvslei_w(_1, 1); } +// CHECK-LABEL: @xvslei_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvslei_d(v4i64 _1) { return __lasx_xvslei_d(_1, 1); } +// CHECK-LABEL: @xvsle_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsle_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsle_bu(_1, _2); } +// CHECK-LABEL: @xvsle_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsle_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsle_hu(_1, _2); } +// CHECK-LABEL: @xvsle_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsle_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsle_wu(_1, _2); } +// CHECK-LABEL: @xvsle_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsle_du(v4u64 _1, v4u64 _2) { return __lasx_xvsle_du(_1, _2); } +// CHECK-LABEL: @xvslei_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvslei_bu(v32u8 _1) { return __lasx_xvslei_bu(_1, 1); } +// CHECK-LABEL: @xvslei_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvslei_hu(v16u16 _1) { return __lasx_xvslei_hu(_1, 1); } +// CHECK-LABEL: @xvslei_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvslei_wu(v8u32 _1) { return __lasx_xvslei_wu(_1, 1); } +// CHECK-LABEL: @xvslei_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvslei_du(v4u64 _1) { return __lasx_xvslei_du(_1, 1); } +// CHECK-LABEL: @xvsat_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsat_b(v32i8 _1) { return __lasx_xvsat_b(_1, 1); } +// CHECK-LABEL: @xvsat_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsat_h(v16i16 _1) { return __lasx_xvsat_h(_1, 1); } +// CHECK-LABEL: @xvsat_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsat_w(v8i32 _1) { return __lasx_xvsat_w(_1, 1); } +// CHECK-LABEL: @xvsat_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsat_d(v4i64 _1) { return __lasx_xvsat_d(_1, 1); } +// CHECK-LABEL: @xvsat_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvsat_bu(v32u8 _1) { return __lasx_xvsat_bu(_1, 1); } +// CHECK-LABEL: @xvsat_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvsat_hu(v16u16 _1) { return __lasx_xvsat_hu(_1, 1); } +// CHECK-LABEL: @xvsat_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvsat_wu(v8u32 _1) { return __lasx_xvsat_wu(_1, 1); } +// CHECK-LABEL: @xvsat_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvsat_du(v4u64 _1) { return __lasx_xvsat_du(_1, 1); } +// CHECK-LABEL: @xvadda_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvadda_b(v32i8 _1, v32i8 _2) { return __lasx_xvadda_b(_1, _2); } +// CHECK-LABEL: @xvadda_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvadda_h(v16i16 _1, v16i16 _2) { return __lasx_xvadda_h(_1, _2); } +// CHECK-LABEL: @xvadda_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvadda_w(v8i32 _1, v8i32 _2) { return __lasx_xvadda_w(_1, _2); } +// CHECK-LABEL: @xvadda_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvadda_d(v4i64 _1, v4i64 _2) { return __lasx_xvadda_d(_1, _2); } +// CHECK-LABEL: @xvsadd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsadd_b(v32i8 _1, v32i8 _2) { return __lasx_xvsadd_b(_1, _2); } +// CHECK-LABEL: @xvsadd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsadd_h(v16i16 _1, v16i16 _2) { return __lasx_xvsadd_h(_1, _2); } +// CHECK-LABEL: @xvsadd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsadd_w(v8i32 _1, v8i32 _2) { return __lasx_xvsadd_w(_1, _2); } +// CHECK-LABEL: @xvsadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsadd_d(v4i64 _1, v4i64 _2) { return __lasx_xvsadd_d(_1, _2); } +// CHECK-LABEL: @xvsadd_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvsadd_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsadd_bu(_1, _2); } +// CHECK-LABEL: @xvsadd_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvsadd_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsadd_hu(_1, _2); } +// CHECK-LABEL: @xvsadd_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvsadd_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsadd_wu(_1, _2); } +// CHECK-LABEL: @xvsadd_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvsadd_du(v4u64 _1, v4u64 _2) { return __lasx_xvsadd_du(_1, _2); } +// CHECK-LABEL: @xvavg_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvavg_b(v32i8 _1, v32i8 _2) { return __lasx_xvavg_b(_1, _2); } +// CHECK-LABEL: @xvavg_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvavg_h(v16i16 _1, v16i16 _2) { return __lasx_xvavg_h(_1, _2); } +// CHECK-LABEL: @xvavg_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvavg_w(v8i32 _1, v8i32 _2) { return __lasx_xvavg_w(_1, _2); } +// CHECK-LABEL: @xvavg_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvavg_d(v4i64 _1, v4i64 _2) { return __lasx_xvavg_d(_1, _2); } +// CHECK-LABEL: @xvavg_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvavg_bu(v32u8 _1, v32u8 _2) { return __lasx_xvavg_bu(_1, _2); } +// CHECK-LABEL: @xvavg_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvavg_hu(v16u16 _1, v16u16 _2) { return __lasx_xvavg_hu(_1, _2); } +// CHECK-LABEL: @xvavg_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvavg_wu(v8u32 _1, v8u32 _2) { return __lasx_xvavg_wu(_1, _2); } +// CHECK-LABEL: @xvavg_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvavg_du(v4u64 _1, v4u64 _2) { return __lasx_xvavg_du(_1, _2); } +// CHECK-LABEL: @xvavgr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvavgr_b(v32i8 _1, v32i8 _2) { return __lasx_xvavgr_b(_1, _2); } +// CHECK-LABEL: @xvavgr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvavgr_h(v16i16 _1, v16i16 _2) { return __lasx_xvavgr_h(_1, _2); } +// CHECK-LABEL: @xvavgr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvavgr_w(v8i32 _1, v8i32 _2) { return __lasx_xvavgr_w(_1, _2); } +// CHECK-LABEL: @xvavgr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvavgr_d(v4i64 _1, v4i64 _2) { return __lasx_xvavgr_d(_1, _2); } +// CHECK-LABEL: @xvavgr_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvavgr_bu(v32u8 _1, v32u8 _2) { return __lasx_xvavgr_bu(_1, _2); } +// CHECK-LABEL: @xvavgr_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvavgr_hu(v16u16 _1, v16u16 _2) { return __lasx_xvavgr_hu(_1, _2); } +// CHECK-LABEL: @xvavgr_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvavgr_wu(v8u32 _1, v8u32 _2) { return __lasx_xvavgr_wu(_1, _2); } +// CHECK-LABEL: @xvavgr_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvavgr_du(v4u64 _1, v4u64 _2) { return __lasx_xvavgr_du(_1, _2); } +// CHECK-LABEL: @xvssub_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvssub_b(v32i8 _1, v32i8 _2) { return __lasx_xvssub_b(_1, _2); } +// CHECK-LABEL: @xvssub_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvssub_h(v16i16 _1, v16i16 _2) { return __lasx_xvssub_h(_1, _2); } +// CHECK-LABEL: @xvssub_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvssub_w(v8i32 _1, v8i32 _2) { return __lasx_xvssub_w(_1, _2); } +// CHECK-LABEL: @xvssub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvssub_d(v4i64 _1, v4i64 _2) { return __lasx_xvssub_d(_1, _2); } +// CHECK-LABEL: @xvssub_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvssub_bu(v32u8 _1, v32u8 _2) { return __lasx_xvssub_bu(_1, _2); } +// CHECK-LABEL: @xvssub_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvssub_hu(v16u16 _1, v16u16 _2) { return __lasx_xvssub_hu(_1, _2); } +// CHECK-LABEL: @xvssub_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvssub_wu(v8u32 _1, v8u32 _2) { return __lasx_xvssub_wu(_1, _2); } +// CHECK-LABEL: @xvssub_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvssub_du(v4u64 _1, v4u64 _2) { return __lasx_xvssub_du(_1, _2); } +// CHECK-LABEL: @xvabsd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvabsd_b(v32i8 _1, v32i8 _2) { return __lasx_xvabsd_b(_1, _2); } +// CHECK-LABEL: @xvabsd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvabsd_h(v16i16 _1, v16i16 _2) { return __lasx_xvabsd_h(_1, _2); } +// CHECK-LABEL: @xvabsd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvabsd_w(v8i32 _1, v8i32 _2) { return __lasx_xvabsd_w(_1, _2); } +// CHECK-LABEL: @xvabsd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvabsd_d(v4i64 _1, v4i64 _2) { return __lasx_xvabsd_d(_1, _2); } +// CHECK-LABEL: @xvabsd_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvabsd_bu(v32u8 _1, v32u8 _2) { return __lasx_xvabsd_bu(_1, _2); } +// CHECK-LABEL: @xvabsd_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvabsd_hu(v16u16 _1, v16u16 _2) { return __lasx_xvabsd_hu(_1, _2); } +// CHECK-LABEL: @xvabsd_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvabsd_wu(v8u32 _1, v8u32 _2) { return __lasx_xvabsd_wu(_1, _2); } +// CHECK-LABEL: @xvabsd_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvabsd_du(v4u64 _1, v4u64 _2) { return __lasx_xvabsd_du(_1, _2); } +// CHECK-LABEL: @xvmul_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmul_b(v32i8 _1, v32i8 _2) { return __lasx_xvmul_b(_1, _2); } +// CHECK-LABEL: @xvmul_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmul_h(v16i16 _1, v16i16 _2) { return __lasx_xvmul_h(_1, _2); } +// CHECK-LABEL: @xvmul_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmul_w(v8i32 _1, v8i32 _2) { return __lasx_xvmul_w(_1, _2); } +// CHECK-LABEL: @xvmul_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmul_d(v4i64 _1, v4i64 _2) { return __lasx_xvmul_d(_1, _2); } +// CHECK-LABEL: @xvmadd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmadd_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmadd_b(_1, _2, _3); } +// CHECK-LABEL: @xvmadd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmadd_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmadd_h(_1, _2, _3); } +// CHECK-LABEL: @xvmadd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> [[_136]], <8 x i32> [[_247]], <8 x i32> [[_358]]) +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmadd_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmadd_w(_1, _2, _3); } +// CHECK-LABEL: @xvmadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmadd_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmadd_d(_1, _2, _3); } +// CHECK-LABEL: @xvmsub_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmsub_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmsub_b(_1, _2, _3); } +// CHECK-LABEL: @xvmsub_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmsub_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmsub_h(_1, _2, _3); } +// CHECK-LABEL: @xvmsub_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> [[_136]], <8 x i32> [[_247]], <8 x i32> [[_358]]) +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmsub_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmsub_w(_1, _2, _3); } +// CHECK-LABEL: @xvmsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmsub_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmsub_d(_1, _2, _3); } +// CHECK-LABEL: @xvdiv_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvdiv_b(v32i8 _1, v32i8 _2) { return __lasx_xvdiv_b(_1, _2); } +// CHECK-LABEL: @xvdiv_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvdiv_h(v16i16 _1, v16i16 _2) { return __lasx_xvdiv_h(_1, _2); } +// CHECK-LABEL: @xvdiv_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvdiv_w(v8i32 _1, v8i32 _2) { return __lasx_xvdiv_w(_1, _2); } +// CHECK-LABEL: @xvdiv_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvdiv_d(v4i64 _1, v4i64 _2) { return __lasx_xvdiv_d(_1, _2); } +// CHECK-LABEL: @xvdiv_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvdiv_bu(v32u8 _1, v32u8 _2) { return __lasx_xvdiv_bu(_1, _2); } +// CHECK-LABEL: @xvdiv_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvdiv_hu(v16u16 _1, v16u16 _2) { return __lasx_xvdiv_hu(_1, _2); } +// CHECK-LABEL: @xvdiv_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvdiv_wu(v8u32 _1, v8u32 _2) { return __lasx_xvdiv_wu(_1, _2); } +// CHECK-LABEL: @xvdiv_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvdiv_du(v4u64 _1, v4u64 _2) { return __lasx_xvdiv_du(_1, _2); } +// CHECK-LABEL: @xvhaddw_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvhaddw_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvhaddw_h_b(_1, _2); } +// CHECK-LABEL: @xvhaddw_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvhaddw_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvhaddw_w_h(_1, _2); } +// CHECK-LABEL: @xvhaddw_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvhaddw_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvhaddw_d_w(_1, _2); } +// CHECK-LABEL: @xvhaddw_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvhaddw_hu_bu(v32u8 _1, v32u8 _2) { return __lasx_xvhaddw_hu_bu(_1, _2); } +// CHECK-LABEL: @xvhaddw_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvhaddw_wu_hu(v16u16 _1, v16u16 _2) { return __lasx_xvhaddw_wu_hu(_1, _2); } +// CHECK-LABEL: @xvhaddw_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvhaddw_du_wu(v8u32 _1, v8u32 _2) { return __lasx_xvhaddw_du_wu(_1, _2); } +// CHECK-LABEL: @xvhsubw_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvhsubw_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvhsubw_h_b(_1, _2); } +// CHECK-LABEL: @xvhsubw_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvhsubw_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvhsubw_w_h(_1, _2); } +// CHECK-LABEL: @xvhsubw_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvhsubw_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvhsubw_d_w(_1, _2); } +// CHECK-LABEL: @xvhsubw_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvhsubw_hu_bu(v32u8 _1, v32u8 _2) { return __lasx_xvhsubw_hu_bu(_1, _2); } +// CHECK-LABEL: @xvhsubw_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvhsubw_wu_hu(v16u16 _1, v16u16 _2) { return __lasx_xvhsubw_wu_hu(_1, _2); } +// CHECK-LABEL: @xvhsubw_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvhsubw_du_wu(v8u32 _1, v8u32 _2) { return __lasx_xvhsubw_du_wu(_1, _2); } +// CHECK-LABEL: @xvmod_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmod_b(v32i8 _1, v32i8 _2) { return __lasx_xvmod_b(_1, _2); } +// CHECK-LABEL: @xvmod_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmod_h(v16i16 _1, v16i16 _2) { return __lasx_xvmod_h(_1, _2); } +// CHECK-LABEL: @xvmod_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmod_w(v8i32 _1, v8i32 _2) { return __lasx_xvmod_w(_1, _2); } +// CHECK-LABEL: @xvmod_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmod_d(v4i64 _1, v4i64 _2) { return __lasx_xvmod_d(_1, _2); } +// CHECK-LABEL: @xvmod_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvmod_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmod_bu(_1, _2); } +// CHECK-LABEL: @xvmod_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvmod_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmod_hu(_1, _2); } +// CHECK-LABEL: @xvmod_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvmod_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmod_wu(_1, _2); } +// CHECK-LABEL: @xvmod_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmod_du(v4u64 _1, v4u64 _2) { return __lasx_xvmod_du(_1, _2); } +// CHECK-LABEL: @xvrepl128vei_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvrepl128vei_b(v32i8 _1) { return __lasx_xvrepl128vei_b(_1, 1); } +// CHECK-LABEL: @xvrepl128vei_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvrepl128vei_h(v16i16 _1) { return __lasx_xvrepl128vei_h(_1, 1); } +// CHECK-LABEL: @xvrepl128vei_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvrepl128vei_w(v8i32 _1) { return __lasx_xvrepl128vei_w(_1, 1); } +// CHECK-LABEL: @xvrepl128vei_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvrepl128vei_d(v4i64 _1) { return __lasx_xvrepl128vei_d(_1, 1); } +// CHECK-LABEL: @xvpickev_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvpickev_b(v32i8 _1, v32i8 _2) { return __lasx_xvpickev_b(_1, _2); } +// CHECK-LABEL: @xvpickev_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvpickev_h(v16i16 _1, v16i16 _2) { return __lasx_xvpickev_h(_1, _2); } +// CHECK-LABEL: @xvpickev_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvpickev_w(v8i32 _1, v8i32 _2) { return __lasx_xvpickev_w(_1, _2); } +// CHECK-LABEL: @xvpickev_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvpickev_d(v4i64 _1, v4i64 _2) { return __lasx_xvpickev_d(_1, _2); } +// CHECK-LABEL: @xvpickod_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvpickod_b(v32i8 _1, v32i8 _2) { return __lasx_xvpickod_b(_1, _2); } +// CHECK-LABEL: @xvpickod_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvpickod_h(v16i16 _1, v16i16 _2) { return __lasx_xvpickod_h(_1, _2); } +// CHECK-LABEL: @xvpickod_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvpickod_w(v8i32 _1, v8i32 _2) { return __lasx_xvpickod_w(_1, _2); } +// CHECK-LABEL: @xvpickod_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvpickod_d(v4i64 _1, v4i64 _2) { return __lasx_xvpickod_d(_1, _2); } +// CHECK-LABEL: @xvilvh_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvilvh_b(v32i8 _1, v32i8 _2) { return __lasx_xvilvh_b(_1, _2); } +// CHECK-LABEL: @xvilvh_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvilvh_h(v16i16 _1, v16i16 _2) { return __lasx_xvilvh_h(_1, _2); } +// CHECK-LABEL: @xvilvh_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvilvh_w(v8i32 _1, v8i32 _2) { return __lasx_xvilvh_w(_1, _2); } +// CHECK-LABEL: @xvilvh_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvilvh_d(v4i64 _1, v4i64 _2) { return __lasx_xvilvh_d(_1, _2); } +// CHECK-LABEL: @xvilvl_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvilvl_b(v32i8 _1, v32i8 _2) { return __lasx_xvilvl_b(_1, _2); } +// CHECK-LABEL: @xvilvl_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvilvl_h(v16i16 _1, v16i16 _2) { return __lasx_xvilvl_h(_1, _2); } +// CHECK-LABEL: @xvilvl_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvilvl_w(v8i32 _1, v8i32 _2) { return __lasx_xvilvl_w(_1, _2); } +// CHECK-LABEL: @xvilvl_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvilvl_d(v4i64 _1, v4i64 _2) { return __lasx_xvilvl_d(_1, _2); } +// CHECK-LABEL: @xvpackev_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvpackev_b(v32i8 _1, v32i8 _2) { return __lasx_xvpackev_b(_1, _2); } +// CHECK-LABEL: @xvpackev_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvpackev_h(v16i16 _1, v16i16 _2) { return __lasx_xvpackev_h(_1, _2); } +// CHECK-LABEL: @xvpackev_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvpackev_w(v8i32 _1, v8i32 _2) { return __lasx_xvpackev_w(_1, _2); } +// CHECK-LABEL: @xvpackev_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvpackev_d(v4i64 _1, v4i64 _2) { return __lasx_xvpackev_d(_1, _2); } +// CHECK-LABEL: @xvpackod_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvpackod_b(v32i8 _1, v32i8 _2) { return __lasx_xvpackod_b(_1, _2); } +// CHECK-LABEL: @xvpackod_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvpackod_h(v16i16 _1, v16i16 _2) { return __lasx_xvpackod_h(_1, _2); } +// CHECK-LABEL: @xvpackod_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvpackod_w(v8i32 _1, v8i32 _2) { return __lasx_xvpackod_w(_1, _2); } +// CHECK-LABEL: @xvpackod_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvpackod_d(v4i64 _1, v4i64 _2) { return __lasx_xvpackod_d(_1, _2); } +// CHECK-LABEL: @xvshuf_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvshuf_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvshuf_b(_1, _2, _3); } +// CHECK-LABEL: @xvshuf_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvshuf_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvshuf_h(_1, _2, _3); } +// CHECK-LABEL: @xvshuf_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> [[_136]], <8 x i32> [[_247]], <8 x i32> [[_358]]) +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvshuf_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvshuf_w(_1, _2, _3); } +// CHECK-LABEL: @xvshuf_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvshuf_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvshuf_d(_1, _2, _3); } +// CHECK-LABEL: @xvand_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvand_v(v32u8 _1, v32u8 _2) { return __lasx_xvand_v(_1, _2); } +// CHECK-LABEL: @xvandi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvandi_b(v32u8 _1) { return __lasx_xvandi_b(_1, 1); } +// CHECK-LABEL: @xvor_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvor_v(v32u8 _1, v32u8 _2) { return __lasx_xvor_v(_1, _2); } +// CHECK-LABEL: @xvori_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvori_b(v32u8 _1) { return __lasx_xvori_b(_1, 1); } +// CHECK-LABEL: @xvnor_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvnor_v(v32u8 _1, v32u8 _2) { return __lasx_xvnor_v(_1, _2); } +// CHECK-LABEL: @xvnori_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvnori_b(v32u8 _1) { return __lasx_xvnori_b(_1, 1); } +// CHECK-LABEL: @xvxor_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvxor_v(v32u8 _1, v32u8 _2) { return __lasx_xvxor_v(_1, _2); } +// CHECK-LABEL: @xvxori_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvxori_b(v32u8 _1) { return __lasx_xvxori_b(_1, 1); } +// CHECK-LABEL: @xvbitsel_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvbitsel_v(v32u8 _1, v32u8 _2, v32u8 _3) { return __lasx_xvbitsel_v(_1, _2, _3); } +// CHECK-LABEL: @xvbitseli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvbitseli_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitseli_b(_1, _2, 1); } +// CHECK-LABEL: @xvshuf4i_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvshuf4i_b(v32i8 _1) { return __lasx_xvshuf4i_b(_1, 1); } +// CHECK-LABEL: @xvshuf4i_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvshuf4i_h(v16i16 _1) { return __lasx_xvshuf4i_h(_1, 1); } +// CHECK-LABEL: @xvshuf4i_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvshuf4i_w(v8i32 _1) { return __lasx_xvshuf4i_w(_1, 1); } +// CHECK-LABEL: @xvreplgr2vr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 [[_1:%.*]]) +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvreplgr2vr_b(int _1) { return __lasx_xvreplgr2vr_b(_1); } +// CHECK-LABEL: @xvreplgr2vr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 [[_1:%.*]]) +// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvreplgr2vr_h(int _1) { return __lasx_xvreplgr2vr_h(_1); } +// CHECK-LABEL: @xvreplgr2vr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 [[_1:%.*]]) +// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvreplgr2vr_w(int _1) { return __lasx_xvreplgr2vr_w(_1); } +// CHECK-LABEL: @xvreplgr2vr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[_1:%.*]] to i64 +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 [[CONV]]) +// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvreplgr2vr_d(int _1) { return __lasx_xvreplgr2vr_d(_1); } +// CHECK-LABEL: @xvpcnt_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> [[_112]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvpcnt_b(v32i8 _1) { return __lasx_xvpcnt_b(_1); } +// CHECK-LABEL: @xvpcnt_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> [[_112]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvpcnt_h(v16i16 _1) { return __lasx_xvpcnt_h(_1); } +// CHECK-LABEL: @xvpcnt_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> [[_112]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvpcnt_w(v8i32 _1) { return __lasx_xvpcnt_w(_1); } +// CHECK-LABEL: @xvpcnt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvpcnt_d(v4i64 _1) { return __lasx_xvpcnt_d(_1); } +// CHECK-LABEL: @xvclo_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> [[_112]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvclo_b(v32i8 _1) { return __lasx_xvclo_b(_1); } +// CHECK-LABEL: @xvclo_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> [[_112]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvclo_h(v16i16 _1) { return __lasx_xvclo_h(_1); } +// CHECK-LABEL: @xvclo_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> [[_112]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvclo_w(v8i32 _1) { return __lasx_xvclo_w(_1); } +// CHECK-LABEL: @xvclo_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvclo_d(v4i64 _1) { return __lasx_xvclo_d(_1); } +// CHECK-LABEL: @xvclz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> [[_112]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvclz_b(v32i8 _1) { return __lasx_xvclz_b(_1); } +// CHECK-LABEL: @xvclz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> [[_112]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvclz_h(v16i16 _1) { return __lasx_xvclz_h(_1); } +// CHECK-LABEL: @xvclz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> [[_112]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvclz_w(v8i32 _1) { return __lasx_xvclz_w(_1); } +// CHECK-LABEL: @xvclz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvclz_d(v4i64 _1) { return __lasx_xvclz_d(_1); } +// CHECK-LABEL: @xvfadd_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfadd_s(v8f32 _1, v8f32 _2) { return __lasx_xvfadd_s(_1, _2); } +// CHECK-LABEL: @xvfadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfadd_d(v4f64 _1, v4f64 _2) { return __lasx_xvfadd_d(_1, _2); } +// CHECK-LABEL: @xvfsub_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfsub_s(v8f32 _1, v8f32 _2) { return __lasx_xvfsub_s(_1, _2); } +// CHECK-LABEL: @xvfsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfsub_d(v4f64 _1, v4f64 _2) { return __lasx_xvfsub_d(_1, _2); } +// CHECK-LABEL: @xvfmul_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfmul_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmul_s(_1, _2); } +// CHECK-LABEL: @xvfmul_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfmul_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmul_d(_1, _2); } +// CHECK-LABEL: @xvfdiv_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfdiv_s(v8f32 _1, v8f32 _2) { return __lasx_xvfdiv_s(_1, _2); } +// CHECK-LABEL: @xvfdiv_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfdiv_d(v4f64 _1, v4f64 _2) { return __lasx_xvfdiv_d(_1, _2); } +// CHECK-LABEL: @xvfcvt_h_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvfcvt_h_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcvt_h_s(_1, _2); } +// CHECK-LABEL: @xvfcvt_s_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfcvt_s_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcvt_s_d(_1, _2); } +// CHECK-LABEL: @xvfmin_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfmin_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmin_s(_1, _2); } +// CHECK-LABEL: @xvfmin_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfmin_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmin_d(_1, _2); } +// CHECK-LABEL: @xvfmina_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfmina_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmina_s(_1, _2); } +// CHECK-LABEL: @xvfmina_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfmina_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmina_d(_1, _2); } +// CHECK-LABEL: @xvfmax_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfmax_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmax_s(_1, _2); } +// CHECK-LABEL: @xvfmax_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfmax_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmax_d(_1, _2); } +// CHECK-LABEL: @xvfmaxa_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfmaxa_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmaxa_s(_1, _2); } +// CHECK-LABEL: @xvfmaxa_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfmaxa_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmaxa_d(_1, _2); } +// CHECK-LABEL: @xvfclass_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfclass_s(v8f32 _1) { return __lasx_xvfclass_s(_1); } +// CHECK-LABEL: @xvfclass_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfclass_d(v4f64 _1) { return __lasx_xvfclass_d(_1); } +// CHECK-LABEL: @xvfsqrt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfsqrt_s(v8f32 _1) { return __lasx_xvfsqrt_s(_1); } +// CHECK-LABEL: @xvfsqrt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfsqrt_d(v4f64 _1) { return __lasx_xvfsqrt_d(_1); } +// CHECK-LABEL: @xvfrecip_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfrecip_s(v8f32 _1) { return __lasx_xvfrecip_s(_1); } +// CHECK-LABEL: @xvfrecip_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfrecip_d(v4f64 _1) { return __lasx_xvfrecip_d(_1); } +// CHECK-LABEL: @xvfrint_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfrint_s(v8f32 _1) { return __lasx_xvfrint_s(_1); } +// CHECK-LABEL: @xvfrint_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfrint_d(v4f64 _1) { return __lasx_xvfrint_d(_1); } +// CHECK-LABEL: @xvfrsqrt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfrsqrt_s(v8f32 _1) { return __lasx_xvfrsqrt_s(_1); } +// CHECK-LABEL: @xvfrsqrt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfrsqrt_d(v4f64 _1) { return __lasx_xvfrsqrt_d(_1); } +// CHECK-LABEL: @xvflogb_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvflogb_s(v8f32 _1) { return __lasx_xvflogb_s(_1); } +// CHECK-LABEL: @xvflogb_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvflogb_d(v4f64 _1) { return __lasx_xvflogb_d(_1); } +// CHECK-LABEL: @xvfcvth_s_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> [[_112]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfcvth_s_h(v16i16 _1) { return __lasx_xvfcvth_s_h(_1); } +// CHECK-LABEL: @xvfcvth_d_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfcvth_d_s(v8f32 _1) { return __lasx_xvfcvth_d_s(_1); } +// CHECK-LABEL: @xvfcvtl_s_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> [[_112]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfcvtl_s_h(v16i16 _1) { return __lasx_xvfcvtl_s_h(_1); } +// CHECK-LABEL: @xvfcvtl_d_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfcvtl_d_s(v8f32 _1) { return __lasx_xvfcvtl_d_s(_1); } +// CHECK-LABEL: @xvftint_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftint_w_s(v8f32 _1) { return __lasx_xvftint_w_s(_1); } +// CHECK-LABEL: @xvftint_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftint_l_d(v4f64 _1) { return __lasx_xvftint_l_d(_1); } +// CHECK-LABEL: @xvftint_wu_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvftint_wu_s(v8f32 _1) { return __lasx_xvftint_wu_s(_1); } +// CHECK-LABEL: @xvftint_lu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvftint_lu_d(v4f64 _1) { return __lasx_xvftint_lu_d(_1); } +// CHECK-LABEL: @xvftintrz_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftintrz_w_s(v8f32 _1) { return __lasx_xvftintrz_w_s(_1); } +// CHECK-LABEL: @xvftintrz_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrz_l_d(v4f64 _1) { return __lasx_xvftintrz_l_d(_1); } +// CHECK-LABEL: @xvftintrz_wu_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvftintrz_wu_s(v8f32 _1) { return __lasx_xvftintrz_wu_s(_1); } +// CHECK-LABEL: @xvftintrz_lu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvftintrz_lu_d(v4f64 _1) { return __lasx_xvftintrz_lu_d(_1); } +// CHECK-LABEL: @xvffint_s_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> [[_112]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvffint_s_w(v8i32 _1) { return __lasx_xvffint_s_w(_1); } +// CHECK-LABEL: @xvffint_d_l( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvffint_d_l(v4i64 _1) { return __lasx_xvffint_d_l(_1); } +// CHECK-LABEL: @xvffint_s_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> [[_112]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvffint_s_wu(v8u32 _1) { return __lasx_xvffint_s_wu(_1); } +// CHECK-LABEL: @xvffint_d_lu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvffint_d_lu(v4u64 _1) { return __lasx_xvffint_d_lu(_1); } +// CHECK-LABEL: @xvreplve_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_112]], i32 [[_2:%.*]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvreplve_b(v32i8 _1, int _2) { return __lasx_xvreplve_b(_1, _2); } +// CHECK-LABEL: @xvreplve_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_112]], i32 [[_2:%.*]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvreplve_h(v16i16 _1, int _2) { return __lasx_xvreplve_h(_1, _2); } +// CHECK-LABEL: @xvreplve_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_112]], i32 [[_2:%.*]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvreplve_w(v8i32 _1, int _2) { return __lasx_xvreplve_w(_1, _2); } +// CHECK-LABEL: @xvreplve_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1]], i32 [[_2:%.*]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvreplve_d(v4i64 _1, int _2) { return __lasx_xvreplve_d(_1, _2); } +// CHECK-LABEL: @xvpermi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvpermi_w(v8i32 _1, v8i32 _2) { return __lasx_xvpermi_w(_1, _2, 1); } +// CHECK-LABEL: @xvandn_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvandn_v(v32u8 _1, v32u8 _2) { return __lasx_xvandn_v(_1, _2); } +// CHECK-LABEL: @xvneg_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> [[_112]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvneg_b(v32i8 _1) { return __lasx_xvneg_b(_1); } +// CHECK-LABEL: @xvneg_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> [[_112]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvneg_h(v16i16 _1) { return __lasx_xvneg_h(_1); } +// CHECK-LABEL: @xvneg_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> [[_112]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvneg_w(v8i32 _1) { return __lasx_xvneg_w(_1); } +// CHECK-LABEL: @xvneg_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvneg_d(v4i64 _1) { return __lasx_xvneg_d(_1); } +// CHECK-LABEL: @xvmuh_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmuh_b(v32i8 _1, v32i8 _2) { return __lasx_xvmuh_b(_1, _2); } +// CHECK-LABEL: @xvmuh_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmuh_h(v16i16 _1, v16i16 _2) { return __lasx_xvmuh_h(_1, _2); } +// CHECK-LABEL: @xvmuh_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmuh_w(v8i32 _1, v8i32 _2) { return __lasx_xvmuh_w(_1, _2); } +// CHECK-LABEL: @xvmuh_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmuh_d(v4i64 _1, v4i64 _2) { return __lasx_xvmuh_d(_1, _2); } +// CHECK-LABEL: @xvmuh_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvmuh_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmuh_bu(_1, _2); } +// CHECK-LABEL: @xvmuh_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvmuh_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmuh_hu(_1, _2); } +// CHECK-LABEL: @xvmuh_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvmuh_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmuh_wu(_1, _2); } +// CHECK-LABEL: @xvmuh_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmuh_du(v4u64 _1, v4u64 _2) { return __lasx_xvmuh_du(_1, _2); } +// CHECK-LABEL: @xvsllwil_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsllwil_h_b(v32i8 _1) { return __lasx_xvsllwil_h_b(_1, 1); } +// CHECK-LABEL: @xvsllwil_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsllwil_w_h(v16i16 _1) { return __lasx_xvsllwil_w_h(_1, 1); } +// CHECK-LABEL: @xvsllwil_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsllwil_d_w(v8i32 _1) { return __lasx_xvsllwil_d_w(_1, 1); } +// CHECK-LABEL: @xvsllwil_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvsllwil_hu_bu(v32u8 _1) { return __lasx_xvsllwil_hu_bu(_1, 1); } +// CHECK-LABEL: @xvsllwil_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvsllwil_wu_hu(v16u16 _1) { return __lasx_xvsllwil_wu_hu(_1, 1); } +// CHECK-LABEL: @xvsllwil_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvsllwil_du_wu(v8u32 _1) { return __lasx_xvsllwil_du_wu(_1, 1); } +// CHECK-LABEL: @xvsran_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsran_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsran_b_h(_1, _2); } +// CHECK-LABEL: @xvsran_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsran_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsran_h_w(_1, _2); } +// CHECK-LABEL: @xvsran_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsran_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsran_w_d(_1, _2); } +// CHECK-LABEL: @xvssran_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvssran_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssran_b_h(_1, _2); } +// CHECK-LABEL: @xvssran_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvssran_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssran_h_w(_1, _2); } +// CHECK-LABEL: @xvssran_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvssran_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssran_w_d(_1, _2); } +// CHECK-LABEL: @xvssran_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvssran_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssran_bu_h(_1, _2); } +// CHECK-LABEL: @xvssran_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvssran_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssran_hu_w(_1, _2); } +// CHECK-LABEL: @xvssran_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvssran_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssran_wu_d(_1, _2); } +// CHECK-LABEL: @xvsrarn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrarn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrarn_b_h(_1, _2); } +// CHECK-LABEL: @xvsrarn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrarn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrarn_h_w(_1, _2); } +// CHECK-LABEL: @xvsrarn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrarn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrarn_w_d(_1, _2); } +// CHECK-LABEL: @xvssrarn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvssrarn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrarn_b_h(_1, _2); } +// CHECK-LABEL: @xvssrarn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvssrarn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrarn_h_w(_1, _2); } +// CHECK-LABEL: @xvssrarn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvssrarn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrarn_w_d(_1, _2); } +// CHECK-LABEL: @xvssrarn_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvssrarn_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrarn_bu_h(_1, _2); } +// CHECK-LABEL: @xvssrarn_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvssrarn_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrarn_hu_w(_1, _2); } +// CHECK-LABEL: @xvssrarn_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvssrarn_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrarn_wu_d(_1, _2); } +// CHECK-LABEL: @xvsrln_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrln_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrln_b_h(_1, _2); } +// CHECK-LABEL: @xvsrln_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrln_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrln_h_w(_1, _2); } +// CHECK-LABEL: @xvsrln_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrln_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrln_w_d(_1, _2); } +// CHECK-LABEL: @xvssrln_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvssrln_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrln_bu_h(_1, _2); } +// CHECK-LABEL: @xvssrln_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvssrln_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrln_hu_w(_1, _2); } +// CHECK-LABEL: @xvssrln_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvssrln_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrln_wu_d(_1, _2); } +// CHECK-LABEL: @xvsrlrn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrlrn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrlrn_b_h(_1, _2); } +// CHECK-LABEL: @xvsrlrn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrlrn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrlrn_h_w(_1, _2); } +// CHECK-LABEL: @xvsrlrn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrlrn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrlrn_w_d(_1, _2); } +// CHECK-LABEL: @xvssrlrn_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvssrlrn_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrlrn_bu_h(_1, _2); } +// CHECK-LABEL: @xvssrlrn_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvssrlrn_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrlrn_hu_w(_1, _2); } +// CHECK-LABEL: @xvssrlrn_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvssrlrn_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrlrn_wu_d(_1, _2); } +// CHECK-LABEL: @xvfrstpi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2) { return __lasx_xvfrstpi_b(_1, _2, 1); } +// CHECK-LABEL: @xvfrstpi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2) { return __lasx_xvfrstpi_h(_1, _2, 1); } +// CHECK-LABEL: @xvfrstp_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvfrstp_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvfrstp_b(_1, _2, _3); } +// CHECK-LABEL: @xvfrstp_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvfrstp_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvfrstp_h(_1, _2, _3); } +// CHECK-LABEL: @xvshuf4i_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2) { return __lasx_xvshuf4i_d(_1, _2, 1); } +// CHECK-LABEL: @xvbsrl_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvbsrl_v(v32i8 _1) { return __lasx_xvbsrl_v(_1, 1); } +// CHECK-LABEL: @xvbsll_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvbsll_v(v32i8 _1) { return __lasx_xvbsll_v(_1, 1); } +// CHECK-LABEL: @xvextrins_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvextrins_b(v32i8 _1, v32i8 _2) { return __lasx_xvextrins_b(_1, _2, 1); } +// CHECK-LABEL: @xvextrins_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvextrins_h(v16i16 _1, v16i16 _2) { return __lasx_xvextrins_h(_1, _2, 1); } +// CHECK-LABEL: @xvextrins_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvextrins_w(v8i32 _1, v8i32 _2) { return __lasx_xvextrins_w(_1, _2, 1); } +// CHECK-LABEL: @xvextrins_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvextrins_d(v4i64 _1, v4i64 _2) { return __lasx_xvextrins_d(_1, _2, 1); } +// CHECK-LABEL: @xvmskltz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> [[_112]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmskltz_b(v32i8 _1) { return __lasx_xvmskltz_b(_1); } +// CHECK-LABEL: @xvmskltz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> [[_112]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmskltz_h(v16i16 _1) { return __lasx_xvmskltz_h(_1); } +// CHECK-LABEL: @xvmskltz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> [[_112]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmskltz_w(v8i32 _1) { return __lasx_xvmskltz_w(_1); } +// CHECK-LABEL: @xvmskltz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmskltz_d(v4i64 _1) { return __lasx_xvmskltz_d(_1); } +// CHECK-LABEL: @xvsigncov_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsigncov_b(v32i8 _1, v32i8 _2) { return __lasx_xvsigncov_b(_1, _2); } +// CHECK-LABEL: @xvsigncov_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsigncov_h(v16i16 _1, v16i16 _2) { return __lasx_xvsigncov_h(_1, _2); } +// CHECK-LABEL: @xvsigncov_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsigncov_w(v8i32 _1, v8i32 _2) { return __lasx_xvsigncov_w(_1, _2); } +// CHECK-LABEL: @xvsigncov_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsigncov_d(v4i64 _1, v4i64 _2) { return __lasx_xvsigncov_d(_1, _2); } +// CHECK-LABEL: @xvfmadd_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) +// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfmadd_s(_1, _2, _3); } +// CHECK-LABEL: @xvfmadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) +// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfmadd_d(_1, _2, _3); } +// CHECK-LABEL: @xvfmsub_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) +// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfmsub_s(_1, _2, _3); } +// CHECK-LABEL: @xvfmsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) +// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfmsub_d(_1, _2, _3); } +// CHECK-LABEL: @xvfnmadd_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) +// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfnmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfnmadd_s(_1, _2, _3); } +// CHECK-LABEL: @xvfnmadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) +// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfnmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfnmadd_d(_1, _2, _3); } +// CHECK-LABEL: @xvfnmsub_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) +// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfnmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfnmsub_s(_1, _2, _3); } +// CHECK-LABEL: @xvfnmsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) +// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfnmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfnmsub_d(_1, _2, _3); } +// CHECK-LABEL: @xvftintrne_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftintrne_w_s(v8f32 _1) { return __lasx_xvftintrne_w_s(_1); } +// CHECK-LABEL: @xvftintrne_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrne_l_d(v4f64 _1) { return __lasx_xvftintrne_l_d(_1); } +// CHECK-LABEL: @xvftintrp_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftintrp_w_s(v8f32 _1) { return __lasx_xvftintrp_w_s(_1); } +// CHECK-LABEL: @xvftintrp_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrp_l_d(v4f64 _1) { return __lasx_xvftintrp_l_d(_1); } +// CHECK-LABEL: @xvftintrm_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftintrm_w_s(v8f32 _1) { return __lasx_xvftintrm_w_s(_1); } +// CHECK-LABEL: @xvftintrm_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrm_l_d(v4f64 _1) { return __lasx_xvftintrm_l_d(_1); } +// CHECK-LABEL: @xvftint_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftint_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftint_w_d(_1, _2); } +// CHECK-LABEL: @xvffint_s_l( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvffint_s_l(v4i64 _1, v4i64 _2) { return __lasx_xvffint_s_l(_1, _2); } +// CHECK-LABEL: @xvftintrz_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftintrz_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrz_w_d(_1, _2); } +// CHECK-LABEL: @xvftintrp_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftintrp_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrp_w_d(_1, _2); } +// CHECK-LABEL: @xvftintrm_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftintrm_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrm_w_d(_1, _2); } +// CHECK-LABEL: @xvftintrne_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftintrne_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrne_w_d(_1, _2); } +// CHECK-LABEL: @xvftinth_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftinth_l_s(v8f32 _1) { return __lasx_xvftinth_l_s(_1); } +// CHECK-LABEL: @xvftintl_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintl_l_s(v8f32 _1) { return __lasx_xvftintl_l_s(_1); } +// CHECK-LABEL: @xvffinth_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> [[_112]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvffinth_d_w(v8i32 _1) { return __lasx_xvffinth_d_w(_1); } +// CHECK-LABEL: @xvffintl_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> [[_112]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvffintl_d_w(v8i32 _1) { return __lasx_xvffintl_d_w(_1); } +// CHECK-LABEL: @xvftintrzh_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrzh_l_s(v8f32 _1) { return __lasx_xvftintrzh_l_s(_1); } +// CHECK-LABEL: @xvftintrzl_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrzl_l_s(v8f32 _1) { return __lasx_xvftintrzl_l_s(_1); } +// CHECK-LABEL: @xvftintrph_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrph_l_s(v8f32 _1) { return __lasx_xvftintrph_l_s(_1); } +// CHECK-LABEL: @xvftintrpl_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrpl_l_s(v8f32 _1) { return __lasx_xvftintrpl_l_s(_1); } +// CHECK-LABEL: @xvftintrmh_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrmh_l_s(v8f32 _1) { return __lasx_xvftintrmh_l_s(_1); } +// CHECK-LABEL: @xvftintrml_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrml_l_s(v8f32 _1) { return __lasx_xvftintrml_l_s(_1); } +// CHECK-LABEL: @xvftintrneh_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrneh_l_s(v8f32 _1) { return __lasx_xvftintrneh_l_s(_1); } +// CHECK-LABEL: @xvftintrnel_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrnel_l_s(v8f32 _1) { return __lasx_xvftintrnel_l_s(_1); } +// CHECK-LABEL: @xvfrintrne_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfrintrne_s(v8f32 _1) { return __lasx_xvfrintrne_s(_1); } +// CHECK-LABEL: @xvfrintrne_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfrintrne_d(v4f64 _1) { return __lasx_xvfrintrne_d(_1); } +// CHECK-LABEL: @xvfrintrz_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfrintrz_s(v8f32 _1) { return __lasx_xvfrintrz_s(_1); } +// CHECK-LABEL: @xvfrintrz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfrintrz_d(v4f64 _1) { return __lasx_xvfrintrz_d(_1); } +// CHECK-LABEL: @xvfrintrp_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfrintrp_s(v8f32 _1) { return __lasx_xvfrintrp_s(_1); } +// CHECK-LABEL: @xvfrintrp_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfrintrp_d(v4f64 _1) { return __lasx_xvfrintrp_d(_1); } +// CHECK-LABEL: @xvfrintrm_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfrintrm_s(v8f32 _1) { return __lasx_xvfrintrm_s(_1); } +// CHECK-LABEL: @xvfrintrm_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfrintrm_d(v4f64 _1) { return __lasx_xvfrintrm_d(_1); } +// CHECK-LABEL: @xvld( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvld(ptr [[_1:%.*]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvld(void * _1) { return __lasx_xvld(_1, 1); } +// CHECK-LABEL: @xvst( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1]], ptr [[_2:%.*]], i32 1) +// CHECK-NEXT: ret void +// +void xvst(v32i8 _1, void * _2) { return __lasx_xvst(_1, _2, 1); } +// CHECK-LABEL: @xvstelm_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1]], ptr [[_2:%.*]], i32 1, i32 1) +// CHECK-NEXT: ret void +// +void xvstelm_b(v32i8 _1, void * _2) { return __lasx_xvstelm_b(_1, _2, 1, 1); } +// CHECK-LABEL: @xvstelm_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1]], ptr [[_2:%.*]], i32 2, i32 1) +// CHECK-NEXT: ret void +// +void xvstelm_h(v16i16 _1, void * _2) { return __lasx_xvstelm_h(_1, _2, 2, 1); } +// CHECK-LABEL: @xvstelm_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1]], ptr [[_2:%.*]], i32 4, i32 1) +// CHECK-NEXT: ret void +// +void xvstelm_w(v8i32 _1, void * _2) { return __lasx_xvstelm_w(_1, _2, 4, 1); } +// CHECK-LABEL: @xvstelm_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1]], ptr [[_2:%.*]], i32 8, i32 1) +// CHECK-NEXT: ret void +// +void xvstelm_d(v4i64 _1, void * _2) { return __lasx_xvstelm_d(_1, _2, 8, 1); } +// CHECK-LABEL: @xvinsve0_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvinsve0_w(v8i32 _1, v8i32 _2) { return __lasx_xvinsve0_w(_1, _2, 1); } +// CHECK-LABEL: @xvinsve0_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvinsve0_d(v4i64 _1, v4i64 _2) { return __lasx_xvinsve0_d(_1, _2, 1); } +// CHECK-LABEL: @xvpickve_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvpickve_w(v8i32 _1) { return __lasx_xvpickve_w(_1, 1); } +// CHECK-LABEL: @xvpickve_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvpickve_d(v4i64 _1) { return __lasx_xvpickve_d(_1, 1); } +// CHECK-LABEL: @xvssrlrn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvssrlrn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrlrn_b_h(_1, _2); } +// CHECK-LABEL: @xvssrlrn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvssrlrn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrlrn_h_w(_1, _2); } +// CHECK-LABEL: @xvssrlrn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvssrlrn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrlrn_w_d(_1, _2); } +// CHECK-LABEL: @xvssrln_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvssrln_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrln_b_h(_1, _2); } +// CHECK-LABEL: @xvssrln_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvssrln_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrln_h_w(_1, _2); } +// CHECK-LABEL: @xvssrln_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvssrln_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrln_w_d(_1, _2); } +// CHECK-LABEL: @xvorn_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvorn_v(v32i8 _1, v32i8 _2) { return __lasx_xvorn_v(_1, _2); } +// CHECK-LABEL: @xvldi( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvldi() { return __lasx_xvldi(1); } +// CHECK-LABEL: @xvldx( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr [[_1:%.*]], i64 1), !noalias [[META5:![0-9]+]] +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvldx(void * _1) { return __lasx_xvldx(_1, 1); } +// CHECK-LABEL: @xvstx( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_112]], ptr [[_2:%.*]], i64 1) +// CHECK-NEXT: ret void +// +void xvstx(v32i8 _1, void * _2) { return __lasx_xvstx(_1, _2, 1); } +// CHECK-LABEL: @xvextl_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvextl_qu_du(v4u64 _1) { return __lasx_xvextl_qu_du(_1); } +// CHECK-LABEL: @xvinsgr2vr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> [[_1]], i32 1, i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvinsgr2vr_w(v8i32 _1) { return __lasx_xvinsgr2vr_w(_1, 1, 1); } +// CHECK-LABEL: @xvinsgr2vr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> [[_1]], i64 1, i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvinsgr2vr_d(v4i64 _1) { return __lasx_xvinsgr2vr_d(_1, 1, 1); } +// CHECK-LABEL: @xvreplve0_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> [[_112]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvreplve0_b(v32i8 _1) { return __lasx_xvreplve0_b(_1); } +// CHECK-LABEL: @xvreplve0_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> [[_112]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvreplve0_h(v16i16 _1) { return __lasx_xvreplve0_h(_1); } +// CHECK-LABEL: @xvreplve0_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> [[_112]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvreplve0_w(v8i32 _1) { return __lasx_xvreplve0_w(_1); } +// CHECK-LABEL: @xvreplve0_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvreplve0_d(v4i64 _1) { return __lasx_xvreplve0_d(_1); } +// CHECK-LABEL: @xvreplve0_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> [[_112]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvreplve0_q(v32i8 _1) { return __lasx_xvreplve0_q(_1); } +// CHECK-LABEL: @vext2xv_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> [[_112]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 vext2xv_h_b(v32i8 _1) { return __lasx_vext2xv_h_b(_1); } +// CHECK-LABEL: @vext2xv_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> [[_112]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 vext2xv_w_h(v16i16 _1) { return __lasx_vext2xv_w_h(_1); } +// CHECK-LABEL: @vext2xv_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> [[_112]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 vext2xv_d_w(v8i32 _1) { return __lasx_vext2xv_d_w(_1); } +// CHECK-LABEL: @vext2xv_w_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> [[_112]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 vext2xv_w_b(v32i8 _1) { return __lasx_vext2xv_w_b(_1); } +// CHECK-LABEL: @vext2xv_d_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> [[_112]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 vext2xv_d_h(v16i16 _1) { return __lasx_vext2xv_d_h(_1); } +// CHECK-LABEL: @vext2xv_d_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> [[_112]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 vext2xv_d_b(v32i8 _1) { return __lasx_vext2xv_d_b(_1); } +// CHECK-LABEL: @vext2xv_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> [[_112]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 vext2xv_hu_bu(v32i8 _1) { return __lasx_vext2xv_hu_bu(_1); } +// CHECK-LABEL: @vext2xv_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> [[_112]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 vext2xv_wu_hu(v16i16 _1) { return __lasx_vext2xv_wu_hu(_1); } +// CHECK-LABEL: @vext2xv_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> [[_112]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 vext2xv_du_wu(v8i32 _1) { return __lasx_vext2xv_du_wu(_1); } +// CHECK-LABEL: @vext2xv_wu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> [[_112]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 vext2xv_wu_bu(v32i8 _1) { return __lasx_vext2xv_wu_bu(_1); } +// CHECK-LABEL: @vext2xv_du_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> [[_112]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 vext2xv_du_hu(v16i16 _1) { return __lasx_vext2xv_du_hu(_1); } +// CHECK-LABEL: @vext2xv_du_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> [[_112]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 vext2xv_du_bu(v32i8 _1) { return __lasx_vext2xv_du_bu(_1); } +// CHECK-LABEL: @xvpermi_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvpermi_q(v32i8 _1, v32i8 _2) { return __lasx_xvpermi_q(_1, _2, 1); } +// CHECK-LABEL: @xvpermi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvpermi_d(v4i64 _1) { return __lasx_xvpermi_d(_1, 1); } +// CHECK-LABEL: @xvperm_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvperm_w(v8i32 _1, v8i32 _2) { return __lasx_xvperm_w(_1, _2); } +// CHECK-LABEL: @xvldrepl_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr [[_1:%.*]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvldrepl_b(void * _1) { return __lasx_xvldrepl_b(_1, 1); } +// CHECK-LABEL: @xvldrepl_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr [[_1:%.*]], i32 2) +// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvldrepl_h(void * _1) { return __lasx_xvldrepl_h(_1, 2); } +// CHECK-LABEL: @xvldrepl_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr [[_1:%.*]], i32 4) +// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvldrepl_w(void * _1) { return __lasx_xvldrepl_w(_1, 4); } +// CHECK-LABEL: @xvldrepl_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr [[_1:%.*]], i32 8) +// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvldrepl_d(void * _1) { return __lasx_xvldrepl_d(_1, 8); } +// CHECK-LABEL: @xvpickve2gr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xvpickve2gr_w(v8i32 _1) { return __lasx_xvpickve2gr_w(_1, 1); } +// CHECK-LABEL: @xvpickve2gr_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: ret i32 [[TMP1]] +// +unsigned int xvpickve2gr_wu(v8i32 _1) { return __lasx_xvpickve2gr_wu(_1, 1); } +// CHECK-LABEL: @xvpickve2gr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: ret i64 [[TMP1]] +// +long xvpickve2gr_d(v4i64 _1) { return __lasx_xvpickve2gr_d(_1, 1); } +// CHECK-LABEL: @xvpickve2gr_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: ret i64 [[TMP1]] +// +unsigned long int xvpickve2gr_du(v4i64 _1) { return __lasx_xvpickve2gr_du(_1, 1); } +// CHECK-LABEL: @xvaddwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvaddwev_q_d(_1, _2); } +// CHECK-LABEL: @xvaddwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvaddwev_d_w(_1, _2); } +// CHECK-LABEL: @xvaddwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvaddwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvaddwev_w_h(_1, _2); } +// CHECK-LABEL: @xvaddwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvaddwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvaddwev_h_b(_1, _2); } +// CHECK-LABEL: @xvaddwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvaddwev_q_du(_1, _2); } +// CHECK-LABEL: @xvaddwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvaddwev_d_wu(_1, _2); } +// CHECK-LABEL: @xvaddwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvaddwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvaddwev_w_hu(_1, _2); } +// CHECK-LABEL: @xvaddwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvaddwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvaddwev_h_bu(_1, _2); } +// CHECK-LABEL: @xvsubwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsubwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvsubwev_q_d(_1, _2); } +// CHECK-LABEL: @xvsubwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsubwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvsubwev_d_w(_1, _2); } +// CHECK-LABEL: @xvsubwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsubwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvsubwev_w_h(_1, _2); } +// CHECK-LABEL: @xvsubwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsubwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvsubwev_h_b(_1, _2); } +// CHECK-LABEL: @xvsubwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsubwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvsubwev_q_du(_1, _2); } +// CHECK-LABEL: @xvsubwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsubwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsubwev_d_wu(_1, _2); } +// CHECK-LABEL: @xvsubwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsubwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsubwev_w_hu(_1, _2); } +// CHECK-LABEL: @xvsubwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsubwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsubwev_h_bu(_1, _2); } +// CHECK-LABEL: @xvmulwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvmulwev_q_d(_1, _2); } +// CHECK-LABEL: @xvmulwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvmulwev_d_w(_1, _2); } +// CHECK-LABEL: @xvmulwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmulwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvmulwev_w_h(_1, _2); } +// CHECK-LABEL: @xvmulwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmulwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvmulwev_h_b(_1, _2); } +// CHECK-LABEL: @xvmulwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvmulwev_q_du(_1, _2); } +// CHECK-LABEL: @xvmulwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmulwev_d_wu(_1, _2); } +// CHECK-LABEL: @xvmulwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmulwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmulwev_w_hu(_1, _2); } +// CHECK-LABEL: @xvmulwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmulwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmulwev_h_bu(_1, _2); } +// CHECK-LABEL: @xvaddwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvaddwod_q_d(_1, _2); } +// CHECK-LABEL: @xvaddwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvaddwod_d_w(_1, _2); } +// CHECK-LABEL: @xvaddwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvaddwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvaddwod_w_h(_1, _2); } +// CHECK-LABEL: @xvaddwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvaddwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvaddwod_h_b(_1, _2); } +// CHECK-LABEL: @xvaddwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvaddwod_q_du(_1, _2); } +// CHECK-LABEL: @xvaddwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvaddwod_d_wu(_1, _2); } +// CHECK-LABEL: @xvaddwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvaddwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvaddwod_w_hu(_1, _2); } +// CHECK-LABEL: @xvaddwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvaddwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvaddwod_h_bu(_1, _2); } +// CHECK-LABEL: @xvsubwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsubwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvsubwod_q_d(_1, _2); } +// CHECK-LABEL: @xvsubwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsubwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvsubwod_d_w(_1, _2); } +// CHECK-LABEL: @xvsubwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsubwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvsubwod_w_h(_1, _2); } +// CHECK-LABEL: @xvsubwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsubwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvsubwod_h_b(_1, _2); } +// CHECK-LABEL: @xvsubwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsubwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvsubwod_q_du(_1, _2); } +// CHECK-LABEL: @xvsubwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsubwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsubwod_d_wu(_1, _2); } +// CHECK-LABEL: @xvsubwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsubwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsubwod_w_hu(_1, _2); } +// CHECK-LABEL: @xvsubwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsubwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsubwod_h_bu(_1, _2); } +// CHECK-LABEL: @xvmulwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvmulwod_q_d(_1, _2); } +// CHECK-LABEL: @xvmulwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvmulwod_d_w(_1, _2); } +// CHECK-LABEL: @xvmulwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmulwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvmulwod_w_h(_1, _2); } +// CHECK-LABEL: @xvmulwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmulwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvmulwod_h_b(_1, _2); } +// CHECK-LABEL: @xvmulwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvmulwod_q_du(_1, _2); } +// CHECK-LABEL: @xvmulwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmulwod_d_wu(_1, _2); } +// CHECK-LABEL: @xvmulwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmulwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmulwod_w_hu(_1, _2); } +// CHECK-LABEL: @xvmulwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmulwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmulwod_h_bu(_1, _2); } +// CHECK-LABEL: @xvaddwev_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwev_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvaddwev_d_wu_w(_1, _2); } +// CHECK-LABEL: @xvaddwev_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvaddwev_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvaddwev_w_hu_h(_1, _2); } +// CHECK-LABEL: @xvaddwev_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvaddwev_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvaddwev_h_bu_b(_1, _2); } +// CHECK-LABEL: @xvmulwev_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwev_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvmulwev_d_wu_w(_1, _2); } +// CHECK-LABEL: @xvmulwev_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmulwev_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvmulwev_w_hu_h(_1, _2); } +// CHECK-LABEL: @xvmulwev_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmulwev_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvmulwev_h_bu_b(_1, _2); } +// CHECK-LABEL: @xvaddwod_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwod_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvaddwod_d_wu_w(_1, _2); } +// CHECK-LABEL: @xvaddwod_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvaddwod_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvaddwod_w_hu_h(_1, _2); } +// CHECK-LABEL: @xvaddwod_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvaddwod_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvaddwod_h_bu_b(_1, _2); } +// CHECK-LABEL: @xvmulwod_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwod_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvmulwod_d_wu_w(_1, _2); } +// CHECK-LABEL: @xvmulwod_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmulwod_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvmulwod_w_hu_h(_1, _2); } +// CHECK-LABEL: @xvmulwod_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmulwod_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvmulwod_h_bu_b(_1, _2); } +// CHECK-LABEL: @xvhaddw_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvhaddw_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvhaddw_q_d(_1, _2); } +// CHECK-LABEL: @xvhaddw_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvhaddw_qu_du(v4u64 _1, v4u64 _2) { return __lasx_xvhaddw_qu_du(_1, _2); } +// CHECK-LABEL: @xvhsubw_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvhsubw_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvhsubw_q_d(_1, _2); } +// CHECK-LABEL: @xvhsubw_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvhsubw_qu_du(v4u64 _1, v4u64 _2) { return __lasx_xvhsubw_qu_du(_1, _2); } +// CHECK-LABEL: @xvmaddwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmaddwev_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmaddwev_q_d(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmaddwev_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmaddwev_d_w(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmaddwev_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmaddwev_w_h(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmaddwev_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmaddwev_h_b(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmaddwev_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __lasx_xvmaddwev_q_du(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmaddwev_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __lasx_xvmaddwev_d_wu(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvmaddwev_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __lasx_xvmaddwev_w_hu(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvmaddwev_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __lasx_xvmaddwev_h_bu(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmaddwod_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmaddwod_q_d(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmaddwod_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmaddwod_d_w(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmaddwod_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmaddwod_w_h(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmaddwod_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmaddwod_h_b(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmaddwod_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __lasx_xvmaddwod_q_du(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmaddwod_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __lasx_xvmaddwod_d_wu(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvmaddwod_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __lasx_xvmaddwod_w_hu(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvmaddwod_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __lasx_xvmaddwod_h_bu(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmaddwev_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __lasx_xvmaddwev_q_du_d(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmaddwev_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __lasx_xvmaddwev_d_wu_w(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmaddwev_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __lasx_xvmaddwev_w_hu_h(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmaddwev_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __lasx_xvmaddwev_h_bu_b(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmaddwod_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __lasx_xvmaddwod_q_du_d(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmaddwod_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __lasx_xvmaddwod_d_wu_w(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmaddwod_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __lasx_xvmaddwod_w_hu_h(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmaddwod_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __lasx_xvmaddwod_h_bu_b(_1, _2, _3); } +// CHECK-LABEL: @xvrotr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvrotr_b(v32i8 _1, v32i8 _2) { return __lasx_xvrotr_b(_1, _2); } +// CHECK-LABEL: @xvrotr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvrotr_h(v16i16 _1, v16i16 _2) { return __lasx_xvrotr_h(_1, _2); } +// CHECK-LABEL: @xvrotr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvrotr_w(v8i32 _1, v8i32 _2) { return __lasx_xvrotr_w(_1, _2); } +// CHECK-LABEL: @xvrotr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvrotr_d(v4i64 _1, v4i64 _2) { return __lasx_xvrotr_d(_1, _2); } +// CHECK-LABEL: @xvadd_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvadd_q(v4i64 _1, v4i64 _2) { return __lasx_xvadd_q(_1, _2); } +// CHECK-LABEL: @xvsub_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsub_q(v4i64 _1, v4i64 _2) { return __lasx_xvsub_q(_1, _2); } +// CHECK-LABEL: @xvaddwev_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwev_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvaddwev_q_du_d(_1, _2); } +// CHECK-LABEL: @xvaddwod_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwod_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvaddwod_q_du_d(_1, _2); } +// CHECK-LABEL: @xvmulwev_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwev_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvmulwev_q_du_d(_1, _2); } +// CHECK-LABEL: @xvmulwod_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwod_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvmulwod_q_du_d(_1, _2); } +// CHECK-LABEL: @xvmskgez_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> [[_112]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmskgez_b(v32i8 _1) { return __lasx_xvmskgez_b(_1); } +// CHECK-LABEL: @xvmsknz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> [[_112]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmsknz_b(v32i8 _1) { return __lasx_xvmsknz_b(_1); } +// CHECK-LABEL: @xvexth_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> [[_112]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvexth_h_b(v32i8 _1) { return __lasx_xvexth_h_b(_1); } +// CHECK-LABEL: @xvexth_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> [[_112]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvexth_w_h(v16i16 _1) { return __lasx_xvexth_w_h(_1); } +// CHECK-LABEL: @xvexth_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> [[_112]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvexth_d_w(v8i32 _1) { return __lasx_xvexth_d_w(_1); } +// CHECK-LABEL: @xvexth_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvexth_q_d(v4i64 _1) { return __lasx_xvexth_q_d(_1); } +// CHECK-LABEL: @xvexth_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> [[_112]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvexth_hu_bu(v32u8 _1) { return __lasx_xvexth_hu_bu(_1); } +// CHECK-LABEL: @xvexth_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> [[_112]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvexth_wu_hu(v16u16 _1) { return __lasx_xvexth_wu_hu(_1); } +// CHECK-LABEL: @xvexth_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> [[_112]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvexth_du_wu(v8u32 _1) { return __lasx_xvexth_du_wu(_1); } +// CHECK-LABEL: @xvexth_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvexth_qu_du(v4u64 _1) { return __lasx_xvexth_qu_du(_1); } +// CHECK-LABEL: @xvrotri_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvrotri_b(v32i8 _1) { return __lasx_xvrotri_b(_1, 1); } +// CHECK-LABEL: @xvrotri_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvrotri_h(v16i16 _1) { return __lasx_xvrotri_h(_1, 1); } +// CHECK-LABEL: @xvrotri_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvrotri_w(v8i32 _1) { return __lasx_xvrotri_w(_1, 1); } +// CHECK-LABEL: @xvrotri_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvrotri_d(v4i64 _1) { return __lasx_xvrotri_d(_1, 1); } +// CHECK-LABEL: @xvextl_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvextl_q_d(v4i64 _1) { return __lasx_xvextl_q_d(_1); } +// CHECK-LABEL: @xvsrlni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrlni_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvsrlni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrlni_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvsrlni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrlni_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvsrlni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrlni_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvsrlrni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrlrni_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvsrlrni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrlrni_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvsrlrni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrlrni_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvsrlrni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrlrni_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrlni_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrlni_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrlni_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrlni_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrlni_bu_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrlni_hu_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrlni_wu_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrlni_du_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrlrni_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrlrni_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrlrni_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrlrni_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrlrni_bu_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrlrni_hu_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrlrni_wu_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrlrni_du_q(_1, _2, 1); } +// CHECK-LABEL: @xvsrani_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrani_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvsrani_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrani_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvsrani_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrani_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvsrani_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrani_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvsrarni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrarni_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvsrarni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrarni_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvsrarni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrarni_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvsrarni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrarni_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrani_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrani_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrani_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrani_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrani_bu_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrani_hu_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrani_wu_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrani_du_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrarni_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrarni_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrarni_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrarni_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrarni_bu_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrarni_hu_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrarni_wu_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrarni_du_q(_1, _2, 1); } +// CHECK-LABEL: @xbnz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbnz_b(v32u8 _1) { return __lasx_xbnz_b(_1); } +// CHECK-LABEL: @xbnz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbnz_d(v4u64 _1) { return __lasx_xbnz_d(_1); } +// CHECK-LABEL: @xbnz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbnz_h(v16u16 _1) { return __lasx_xbnz_h(_1); } +// CHECK-LABEL: @xbnz_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbnz_v(v32u8 _1) { return __lasx_xbnz_v(_1); } +// CHECK-LABEL: @xbnz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbnz_w(v8u32 _1) { return __lasx_xbnz_w(_1); } +// CHECK-LABEL: @xbz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbz_b(v32u8 _1) { return __lasx_xbz_b(_1); } +// CHECK-LABEL: @xbz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbz_d(v4u64 _1) { return __lasx_xbz_d(_1); } +// CHECK-LABEL: @xbz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbz_h(v16u16 _1) { return __lasx_xbz_h(_1); } +// CHECK-LABEL: @xbz_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbz_v(v32u8 _1) { return __lasx_xbz_v(_1); } +// CHECK-LABEL: @xbz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbz_w(v8u32 _1) { return __lasx_xbz_w(_1); } +// CHECK-LABEL: @xvfcmp_caf_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_caf_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_caf_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_caf_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_caf_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_caf_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_ceq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_ceq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_ceq_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_ceq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_ceq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_ceq_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_cle_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cle_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cle_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_cle_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cle_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_clt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_clt_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_clt_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_clt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_clt_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_clt_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cne_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_cne_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cne_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cne_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_cne_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cne_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cor_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_cor_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cor_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cor_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_cor_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cor_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cueq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_cueq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cueq_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cueq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_cueq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cueq_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cule_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_cule_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cule_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cule_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_cule_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cule_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cult_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_cult_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cult_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cult_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_cult_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cult_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cun_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_cun_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cun_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cune_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_cune_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cune_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cune_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_cune_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cune_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cun_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_cun_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cun_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_saf_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_saf_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_saf_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_saf_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_saf_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_saf_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_seq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_seq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_seq_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_seq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_seq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_seq_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_sle_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sle_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sle_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_sle_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sle_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_slt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_slt_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_slt_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_slt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_slt_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_slt_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sne_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_sne_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sne_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sne_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_sne_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sne_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sor_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_sor_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sor_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sor_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_sor_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sor_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sueq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_sueq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sueq_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sueq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_sueq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sueq_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sule_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_sule_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sule_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sule_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_sule_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sule_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sult_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_sult_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sult_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sult_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_sult_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sult_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sun_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_sun_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sun_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sune_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_sune_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sune_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sune_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_sune_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sune_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sun_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_sun_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sun_s(_1, _2); } +// CHECK-LABEL: @xvpickve_d_f( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> [[_1]], i32 1) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvpickve_d_f(v4f64 _1) { return __lasx_xvpickve_d_f(_1, 1); } +// CHECK-LABEL: @xvpickve_w_f( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> [[_1]], i32 1) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvpickve_w_f(v8f32 _1) { return __lasx_xvpickve_w_f(_1, 1); } +// CHECK-LABEL: @xvrepli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvrepli_b() { return __lasx_xvrepli_b(1); } +// CHECK-LABEL: @xvrepli_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvrepli_d() { return __lasx_xvrepli_d(1); } +// CHECK-LABEL: @xvrepli_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvrepli_h() { return __lasx_xvrepli_h(1); } +// CHECK-LABEL: @xvrepli_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvrepli_w() { return __lasx_xvrepli_w(1); } diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-error.c b/clang/test/CodeGen/LoongArch/lasx/builtin-error.c new file mode 100644 index 0000000000000000000000000000000000000000..724484465769e0e2da46fc738a1e003c4f7d1f7f --- /dev/null +++ b/clang/test/CodeGen/LoongArch/lasx/builtin-error.c @@ -0,0 +1,1392 @@ +// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -verify %s + +typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); +typedef signed char v32i8_b __attribute__((vector_size(32), aligned(1))); +typedef unsigned char v32u8 __attribute__((vector_size(32), aligned(32))); +typedef unsigned char v32u8_b __attribute__((vector_size(32), aligned(1))); +typedef short v16i16 __attribute__((vector_size(32), aligned(32))); +typedef short v16i16_h __attribute__((vector_size(32), aligned(2))); +typedef unsigned short v16u16 __attribute__((vector_size(32), aligned(32))); +typedef unsigned short v16u16_h __attribute__((vector_size(32), aligned(2))); +typedef int v8i32 __attribute__((vector_size(32), aligned(32))); +typedef int v8i32_w __attribute__((vector_size(32), aligned(4))); +typedef unsigned int v8u32 __attribute__((vector_size(32), aligned(32))); +typedef unsigned int v8u32_w __attribute__((vector_size(32), aligned(4))); +typedef long long v4i64 __attribute__((vector_size(32), aligned(32))); +typedef long long v4i64_d __attribute__((vector_size(32), aligned(8))); +typedef unsigned long long v4u64 __attribute__((vector_size(32), aligned(32))); +typedef unsigned long long v4u64_d __attribute__((vector_size(32), aligned(8))); +typedef float v8f32 __attribute__((vector_size(32), aligned(32))); +typedef float v8f32_w __attribute__((vector_size(32), aligned(4))); +typedef double v4f64 __attribute__((vector_size(32), aligned(32))); +typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); + +v32i8 xvslli_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvslli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvslli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvslli_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_b' must be a constant integer}} + return res; +} + +v16i16 xvslli_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvslli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvslli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvslli_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_h' must be a constant integer}} + return res; +} + +v8i32 xvslli_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvslli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslli_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_w' must be a constant integer}} + return res; +} + +v4i64 xvslli_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvslli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvslli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvslli_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_d' must be a constant integer}} + return res; +} + +v32i8 xvsrai_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvsrai_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsrai_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsrai_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_b' must be a constant integer}} + return res; +} + +v16i16 xvsrai_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvsrai_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrai_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrai_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_h' must be a constant integer}} + return res; +} + +v8i32 xvsrai_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvsrai_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrai_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrai_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_w' must be a constant integer}} + return res; +} + +v4i64 xvsrai_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvsrai_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrai_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrai_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_d' must be a constant integer}} + return res; +} + +v32i8 xvsrari_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvsrari_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsrari_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsrari_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_b' must be a constant integer}} + return res; +} + +v16i16 xvsrari_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvsrari_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrari_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrari_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_h' must be a constant integer}} + return res; +} + +v8i32 xvsrari_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvsrari_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrari_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrari_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_w' must be a constant integer}} + return res; +} + +v4i64 xvsrari_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvsrari_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrari_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrari_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_d' must be a constant integer}} + return res; +} + +v32i8 xvsrli_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvsrli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsrli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsrli_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_b' must be a constant integer}} + return res; +} + +v16i16 xvsrli_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvsrli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrli_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_h' must be a constant integer}} + return res; +} + +v8i32 xvsrli_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvsrli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrli_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_w' must be a constant integer}} + return res; +} + +v4i64 xvsrli_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvsrli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrli_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_d' must be a constant integer}} + return res; +} + +v32i8 xvsrlri_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvsrlri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsrlri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsrlri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_b' must be a constant integer}} + return res; +} + +v16i16 xvsrlri_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvsrlri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrlri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrlri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_h' must be a constant integer}} + return res; +} + +v8i32 xvsrlri_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvsrlri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrlri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrlri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_w' must be a constant integer}} + return res; +} + +v4i64 xvsrlri_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvsrlri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrlri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrlri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_d' must be a constant integer}} + return res; +} + +v32u8 xvbitclri_b(v32u8 _1, int var) { + v32u8 res = __builtin_lasx_xvbitclri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvbitclri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvbitclri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_b' must be a constant integer}} + return res; +} + +v16u16 xvbitclri_h(v16u16 _1, int var) { + v16u16 res = __builtin_lasx_xvbitclri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvbitclri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvbitclri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_h' must be a constant integer}} + return res; +} + +v8u32 xvbitclri_w(v8u32 _1, int var) { + v8u32 res = __builtin_lasx_xvbitclri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvbitclri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvbitclri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_w' must be a constant integer}} + return res; +} + +v4u64 xvbitclri_d(v4u64 _1, int var) { + v4u64 res = __builtin_lasx_xvbitclri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvbitclri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvbitclri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_d' must be a constant integer}} + return res; +} + +v32u8 xvbitseti_b(v32u8 _1, int var) { + v32u8 res = __builtin_lasx_xvbitseti_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvbitseti_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvbitseti_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_b' must be a constant integer}} + return res; +} + +v16u16 xvbitseti_h(v16u16 _1, int var) { + v16u16 res = __builtin_lasx_xvbitseti_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvbitseti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvbitseti_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_h' must be a constant integer}} + return res; +} + +v8u32 xvbitseti_w(v8u32 _1, int var) { + v8u32 res = __builtin_lasx_xvbitseti_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvbitseti_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvbitseti_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_w' must be a constant integer}} + return res; +} + +v4u64 xvbitseti_d(v4u64 _1, int var) { + v4u64 res = __builtin_lasx_xvbitseti_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvbitseti_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvbitseti_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_d' must be a constant integer}} + return res; +} + +v32u8 xvbitrevi_b(v32u8 _1, int var) { + v32u8 res = __builtin_lasx_xvbitrevi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvbitrevi_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvbitrevi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_b' must be a constant integer}} + return res; +} + +v16u16 xvbitrevi_h(v16u16 _1, int var) { + v16u16 res = __builtin_lasx_xvbitrevi_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvbitrevi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvbitrevi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_h' must be a constant integer}} + return res; +} + +v8u32 xvbitrevi_w(v8u32 _1, int var) { + v8u32 res = __builtin_lasx_xvbitrevi_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvbitrevi_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvbitrevi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_w' must be a constant integer}} + return res; +} + +v4u64 xvbitrevi_d(v4u64 _1, int var) { + v4u64 res = __builtin_lasx_xvbitrevi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvbitrevi_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvbitrevi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_d' must be a constant integer}} + return res; +} + +v32i8 xvaddi_bu(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvaddi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvaddi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvaddi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_bu' must be a constant integer}} + return res; +} + +v16i16 xvaddi_hu(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvaddi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvaddi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvaddi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_hu' must be a constant integer}} + return res; +} + +v8i32 xvaddi_wu(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvaddi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvaddi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvaddi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_wu' must be a constant integer}} + return res; +} + +v4i64 xvaddi_du(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvaddi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvaddi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvaddi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_du' must be a constant integer}} + return res; +} + +v32i8 xvsubi_bu(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvsubi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsubi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsubi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_bu' must be a constant integer}} + return res; +} + +v16i16 xvsubi_hu(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvsubi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsubi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsubi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_hu' must be a constant integer}} + return res; +} + +v8i32 xvsubi_wu(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvsubi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsubi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsubi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_wu' must be a constant integer}} + return res; +} + +v4i64 xvsubi_du(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvsubi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsubi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsubi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_du' must be a constant integer}} + return res; +} + +v32i8 xvmaxi_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvmaxi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmaxi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmaxi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_b' must be a constant integer}} + return res; +} + +v16i16 xvmaxi_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvmaxi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmaxi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmaxi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_h' must be a constant integer}} + return res; +} + +v8i32 xvmaxi_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvmaxi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmaxi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmaxi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_w' must be a constant integer}} + return res; +} + +v4i64 xvmaxi_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvmaxi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmaxi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmaxi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_d' must be a constant integer}} + return res; +} + +v32u8 xvmaxi_bu(v32u8 _1, int var) { + v32u8 res = __builtin_lasx_xvmaxi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmaxi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmaxi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_bu' must be a constant integer}} + return res; +} + +v16u16 xvmaxi_hu(v16u16 _1, int var) { + v16u16 res = __builtin_lasx_xvmaxi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmaxi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmaxi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_hu' must be a constant integer}} + return res; +} + +v8u32 xvmaxi_wu(v8u32 _1, int var) { + v8u32 res = __builtin_lasx_xvmaxi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmaxi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmaxi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_wu' must be a constant integer}} + return res; +} + +v4u64 xvmaxi_du(v4u64 _1, int var) { + v4u64 res = __builtin_lasx_xvmaxi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmaxi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmaxi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_du' must be a constant integer}} + return res; +} + +v32i8 xvmini_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvmini_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmini_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmini_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_b' must be a constant integer}} + return res; +} + +v16i16 xvmini_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvmini_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmini_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmini_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_h' must be a constant integer}}} + return res; +} + +v8i32 xvmini_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvmini_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmini_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmini_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_w' must be a constant integer}} + return res; +} + +v4i64 xvmini_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvmini_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmini_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmini_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_d' must be a constant integer}} + return res; +} + +v32u8 xvmini_bu(v32u8 _1, int var) { + v32u8 res = __builtin_lasx_xvmini_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmini_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmini_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_bu' must be a constant integer}} + return res; +} + +v16u16 xvmini_hu(v16u16 _1, int var) { + v16u16 res = __builtin_lasx_xvmini_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmini_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmini_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_hu' must be a constant integer}} + return res; +} + +v8u32 xvmini_wu(v8u32 _1, int var) { + v8u32 res = __builtin_lasx_xvmini_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmini_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmini_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_wu' must be a constant integer}} + return res; +} + +v4u64 xvmini_du(v4u64 _1, int var) { + v4u64 res = __builtin_lasx_xvmini_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmini_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmini_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_du' must be a constant integer}} + return res; +} + +v32i8 xvseqi_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvseqi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvseqi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvseqi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_b' must be a constant integer}} + return res; +} + +v16i16 xvseqi_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvseqi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvseqi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvseqi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_h' must be a constant integer}} + return res; +} + +v8i32 xvseqi_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvseqi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvseqi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvseqi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_w' must be a constant integer}} + return res; +} + +v4i64 xvseqi_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvseqi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvseqi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvseqi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_d' must be a constant integer}} + return res; +} + +v32i8 xvslti_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvslti_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslti_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslti_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_b' must be a constant integer}} + return res; +} + +v16i16 xvslti_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvslti_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslti_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_h' must be a constant integer}} + return res; +} + +v8i32 xvslti_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvslti_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslti_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslti_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_w' must be a constant integer}} + return res; +} + +v4i64 xvslti_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvslti_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslti_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslti_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_d' must be a constant integer}} + return res; +} + +v32i8 xvslti_bu(v32u8 _1, int var) { + v32i8 res = __builtin_lasx_xvslti_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslti_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslti_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_bu' must be a constant integer}} + return res; +} + +v16i16 xvslti_hu(v16u16 _1, int var) { + v16i16 res = __builtin_lasx_xvslti_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslti_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslti_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_hu' must be a constant integer}} + return res; +} + +v8i32 xvslti_wu(v8u32 _1, int var) { + v8i32 res = __builtin_lasx_xvslti_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslti_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslti_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_wu' must be a constant integer}} + return res; +} + +v4i64 xvslti_du(v4u64 _1, int var) { + v4i64 res = __builtin_lasx_xvslti_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslti_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslti_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_du' must be a constant integer}} + return res; +} + +v32i8 xvslei_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvslei_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslei_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_b' must be a constant integer}} + return res; +} + +v16i16 xvslei_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvslei_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslei_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslei_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_h' must be a constant integer}} + return res; +} + +v8i32 xvslei_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvslei_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslei_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslei_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_w' must be a constant integer}} + return res; +} + +v4i64 xvslei_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvslei_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslei_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslei_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_d' must be a constant integer}} + return res; +} + +v32i8 xvslei_bu(v32u8 _1, int var) { + v32i8 res = __builtin_lasx_xvslei_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslei_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslei_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_bu' must be a constant integer}} + return res; +} + +v16i16 xvslei_hu(v16u16 _1, int var) { + v16i16 res = __builtin_lasx_xvslei_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslei_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslei_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_hu' must be a constant integer}} + return res; +} + +v8i32 xvslei_wu(v8u32 _1, int var) { + v8i32 res = __builtin_lasx_xvslei_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslei_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslei_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_wu' must be a constant integer}} + return res; +} + +v4i64 xvslei_du(v4u64 _1, int var) { + v4i64 res = __builtin_lasx_xvslei_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslei_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslei_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_du' must be a constant integer}} + return res; +} + +v32i8 xvsat_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvsat_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsat_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsat_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_b' must be a constant integer}} + return res; +} + +v16i16 xvsat_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvsat_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsat_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsat_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_h' must be a constant integer}} + return res; +} + +v8i32 xvsat_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvsat_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsat_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsat_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_w' must be a constant integer}} + return res; +} + +v4i64 xvsat_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvsat_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsat_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsat_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_d' must be a constant integer}} + return res; +} + +v32u8 xvsat_bu(v32u8 _1, int var) { + v32u8 res = __builtin_lasx_xvsat_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsat_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsat_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_bu' must be a constant integer}} + return res; +} + +v16u16 xvsat_hu(v16u16 _1, int var) { + v16u16 res = __builtin_lasx_xvsat_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsat_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsat_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_hu' must be a constant integer}} + return res; +} + +v8u32 xvsat_wu(v8u32 _1, int var) { + v8u32 res = __builtin_lasx_xvsat_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsat_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsat_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_wu' must be a constant integer}} + return res; +} + +v4u64 xvsat_du(v4u64 _1, int var) { + v4u64 res = __builtin_lasx_xvsat_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsat_du(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsat_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_du' must be a constant integer}} + return res; +} + +v32i8 xvrepl128vei_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvrepl128vei_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvrepl128vei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvrepl128vei_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_b' must be a constant integer}} + return res; +} + +v16i16 xvrepl128vei_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvrepl128vei_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvrepl128vei_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvrepl128vei_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_h' must be a constant integer}} + return res; +} + +v8i32 xvrepl128vei_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvrepl128vei_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __builtin_lasx_xvrepl128vei_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __builtin_lasx_xvrepl128vei_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_w' must be a constant integer}} + return res; +} + +v4i64 xvrepl128vei_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvrepl128vei_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} + res |= __builtin_lasx_xvrepl128vei_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + res |= __builtin_lasx_xvrepl128vei_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_d' must be a constant integer}} + return res; +} + +v32u8 xvandi_b(v32u8 _1, int var) { + v32u8 res = __builtin_lasx_xvandi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvandi_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvandi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvandi_b' must be a constant integer}} + return res; +} + +v32u8 xvori_b(v32u8 _1, int var) { + v32u8 res = __builtin_lasx_xvori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvori_b' must be a constant integer}} + return res; +} + +v32u8 xvnori_b(v32u8 _1, int var) { + v32u8 res = __builtin_lasx_xvnori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvnori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvnori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvnori_b' must be a constant integer}} + return res; +} + +v32u8 xvxori_b(v32u8 _1, int var) { + v32u8 res = __builtin_lasx_xvxori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvxori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvxori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvxori_b' must be a constant integer}} + return res; +} + +v32u8 xvbitseli_b(v32u8 _1, v32u8 _2, int var) { + v32u8 res = __builtin_lasx_xvbitseli_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvbitseli_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvbitseli_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvbitseli_b' must be a constant integer}} + return res; +} + +v32i8 xvshuf4i_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvshuf4i_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvshuf4i_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvshuf4i_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_b' must be a constant integer}} + return res; +} + +v16i16 xvshuf4i_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvshuf4i_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvshuf4i_h(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvshuf4i_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_h' must be a constant integer}} + return res; +} + +v8i32 xvshuf4i_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvshuf4i_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvshuf4i_w(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvshuf4i_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_w' must be a constant integer}} + return res; +} + +v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __builtin_lasx_xvshuf4i_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvshuf4i_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvshuf4i_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_d' must be a constant integer}} + return res; +} + +v8i32 xvpermi_w(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __builtin_lasx_xvpermi_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvpermi_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvpermi_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvpermi_w' must be a constant integer}} + return res; +} + +v4i64 xvpermi_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvpermi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvpermi_d(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvpermi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpermi_d' must be a constant integer}} + return res; +} + +v32i8 xvpermi_q(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __builtin_lasx_xvpermi_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvpermi_q(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvpermi_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvpermi_q' must be a constant integer}} + return res; +} + +v16i16 xvsllwil_h_b(v32i8 _1, int var) { + v16i16 res = __builtin_lasx_xvsllwil_h_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsllwil_h_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsllwil_h_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_h_b' must be a constant integer}} + return res; +} + +v8i32 xvsllwil_w_h(v16i16 _1, int var) { + v8i32 res = __builtin_lasx_xvsllwil_w_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsllwil_w_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsllwil_w_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_w_h' must be a constant integer}} + return res; +} + +v4i64 xvsllwil_d_w(v8i32 _1, int var) { + v4i64 res = __builtin_lasx_xvsllwil_d_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsllwil_d_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsllwil_d_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_d_w' must be a constant integer}} + return res; +} + +v16u16 xvsllwil_hu_bu(v32u8 _1, int var) { + v16u16 res = __builtin_lasx_xvsllwil_hu_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsllwil_hu_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsllwil_hu_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_hu_bu' must be a constant integer}} + return res; +} + +v8u32 xvsllwil_wu_hu(v16u16 _1, int var) { + v8u32 res = __builtin_lasx_xvsllwil_wu_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsllwil_wu_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsllwil_wu_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_wu_hu' must be a constant integer}} + return res; +} + +v4u64 xvsllwil_du_wu(v8u32 _1, int var) { + v4u64 res = __builtin_lasx_xvsllwil_du_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsllwil_du_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsllwil_du_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_du_wu' must be a constant integer}} + return res; +} + +v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __builtin_lasx_xvfrstpi_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvfrstpi_b(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvfrstpi_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvfrstpi_b' must be a constant integer}} + return res; +} + +v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __builtin_lasx_xvfrstpi_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvfrstpi_h(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvfrstpi_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvfrstpi_h' must be a constant integer}} + return res; +} + +v32i8 xvbsrl_v(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvbsrl_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvbsrl_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvbsrl_v(_1, var); // expected-error {{argument to '__builtin_lasx_xvbsrl_v' must be a constant integer}} + return res; +} + +v32i8 xvbsll_v(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvbsll_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvbsll_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvbsll_v(_1, var); // expected-error {{argument to '__builtin_lasx_xvbsll_v' must be a constant integer}} + return res; +} + +v32i8 xvextrins_b(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __builtin_lasx_xvextrins_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvextrins_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvextrins_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_b' must be a constant integer}} + return res; +} + +v16i16 xvextrins_h(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __builtin_lasx_xvextrins_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvextrins_h(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvextrins_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_h' must be a constant integer}} + return res; +} + +v8i32 xvextrins_w(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __builtin_lasx_xvextrins_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvextrins_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvextrins_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_w' must be a constant integer}} + return res; +} + +v4i64 xvextrins_d(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __builtin_lasx_xvextrins_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvextrins_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvextrins_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_d' must be a constant integer}} + return res; +} + +v32i8 xvld(void *_1, int var) { + v32i8 res = __builtin_lasx_xvld(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + res |= __builtin_lasx_xvld(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} + res |= __builtin_lasx_xvld(_1, var); // expected-error {{argument to '__builtin_lasx_xvld' must be a constant integer}} + return res; +} + +void xvst(v32i8 _1, void *_2, int var) { + __builtin_lasx_xvst(_1, _2, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + __builtin_lasx_xvst(_1, _2, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} + __builtin_lasx_xvst(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvst' must be a constant integer}} +} + +void xvstelm_b(v32i8 _1, void * _2, int var) { + __builtin_lasx_xvstelm_b(_1, _2, -129, 1); // expected-error {{argument value -129 is outside the valid range [-128, 127]}} + __builtin_lasx_xvstelm_b(_1, _2, 128, 1); // expected-error {{argument value 128 is outside the valid range [-128, 127]}} + __builtin_lasx_xvstelm_b(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_b' must be a constant integer}} +} + +void xvstelm_h(v16i16 _1, void * _2, int var) { + __builtin_lasx_xvstelm_h(_1, _2, -258, 1); // expected-error {{argument value -258 is outside the valid range [-256, 254]}} + __builtin_lasx_xvstelm_h(_1, _2, 256, 1); // expected-error {{argument value 256 is outside the valid range [-256, 254]}} + __builtin_lasx_xvstelm_h(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_h' must be a constant integer}} +} + +void xvstelm_w(v8i32 _1, void * _2, int var) { + __builtin_lasx_xvstelm_w(_1, _2, -516, 1); // expected-error {{argument value -516 is outside the valid range [-512, 508]}} + __builtin_lasx_xvstelm_w(_1, _2, 512, 1); // expected-error {{argument value 512 is outside the valid range [-512, 508]}} + __builtin_lasx_xvstelm_w(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_w' must be a constant integer}} +} + +void xvstelm_d(v4i64 _1, void * _2, int var) { + __builtin_lasx_xvstelm_d(_1, _2, -1032, 1); // expected-error {{argument value -1032 is outside the valid range [-1024, 1016]}} + __builtin_lasx_xvstelm_d(_1, _2, 1024, 1); // expected-error {{argument value 1024 is outside the valid range [-1024, 1016]}} + __builtin_lasx_xvstelm_d(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_d' must be a constant integer}} +} + +void xvstelm_b_idx(v32i8 _1, void * _2, int var) { + __builtin_lasx_xvstelm_b(_1, _2, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + __builtin_lasx_xvstelm_b(_1, _2, 1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + __builtin_lasx_xvstelm_b(_1, _2, 1, var); // expected-error {{argument to '__builtin_lasx_xvstelm_b' must be a constant integer}} +} + +void xvstelm_h_idx(v16i16 _1, void * _2, int var) { + __builtin_lasx_xvstelm_h(_1, _2, 2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + __builtin_lasx_xvstelm_h(_1, _2, 2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + __builtin_lasx_xvstelm_h(_1, _2, 2, var); // expected-error {{argument to '__builtin_lasx_xvstelm_h' must be a constant integer}} +} + +void xvstelm_w_idx(v8i32 _1, void * _2, int var) { + __builtin_lasx_xvstelm_w(_1, _2, 4, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + __builtin_lasx_xvstelm_w(_1, _2, 4, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + __builtin_lasx_xvstelm_w(_1, _2, 4, var); // expected-error {{argument to '__builtin_lasx_xvstelm_w' must be a constant integer}} +} + +void xvstelm_d_idx(v4i64 _1, void * _2, int var) { + __builtin_lasx_xvstelm_d(_1, _2, 8, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + __builtin_lasx_xvstelm_d(_1, _2, 8, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + __builtin_lasx_xvstelm_d(_1, _2, 8, var); // expected-error {{argument to '__builtin_lasx_xvstelm_d' must be a constant integer}} +} + +v8i32 xvinsve0_w(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __builtin_lasx_xvinsve0_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvinsve0_w(_1, _2, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvinsve0_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvinsve0_w' must be a constant integer}} + return res; +} + +v4i64 xvinsve0_d(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __builtin_lasx_xvinsve0_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __builtin_lasx_xvinsve0_d(_1, _2, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __builtin_lasx_xvinsve0_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvinsve0_d' must be a constant integer}} + return res; +} + +v8i32 xvpickve_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvpickve_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvpickve_w(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvpickve_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_w' must be a constant integer}} + return res; +} + +v4i64 xvpickve_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvpickve_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __builtin_lasx_xvpickve_d(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __builtin_lasx_xvpickve_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_d' must be a constant integer}} + return res; +} + +v4i64 xvldi(int var) { + v4i64 res = __builtin_lasx_xvldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} + res |= __builtin_lasx_xvldi(4096); // expected-error {{argument value 4096 is outside the valid range [-4096, 4095]}} + res |= __builtin_lasx_xvldi(var); // expected-error {{argument to '__builtin_lasx_xvldi' must be a constant integer}} + return res; +} + +v8i32 xvinsgr2vr_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvinsgr2vr_w(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvinsgr2vr_w(_1, 1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvinsgr2vr_w(_1, 1, var); // expected-error {{argument to '__builtin_lasx_xvinsgr2vr_w' must be a constant integer}} + return res; +} + +v4i64 xvinsgr2vr_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvinsgr2vr_d(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __builtin_lasx_xvinsgr2vr_d(_1, 1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __builtin_lasx_xvinsgr2vr_d(_1, 1, var); // expected-error {{argument to '__builtin_lasx_xvinsgr2vr_d' must be a constant integer}} + return res; +} + +v32i8 xvldrepl_b(void *_1, int var) { + v32i8 res = __builtin_lasx_xvldrepl_b(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + res |= __builtin_lasx_xvldrepl_b(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} + res |= __builtin_lasx_xvldrepl_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_b' must be a constant integer}} + return res; +} + +v16i16 xvldrepl_h(void *_1, int var) { + v16i16 res = __builtin_lasx_xvldrepl_h(_1, -2050); // expected-error {{argument value -2050 is outside the valid range [-2048, 2046]}} + res |= __builtin_lasx_xvldrepl_h(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2046]}} + res |= __builtin_lasx_xvldrepl_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_h' must be a constant integer}} + return res; +} + +v8i32 xvldrepl_w(void *_1, int var) { + v8i32 res = __builtin_lasx_xvldrepl_w(_1, -2052); // expected-error {{argument value -2052 is outside the valid range [-2048, 2044]}} + res |= __builtin_lasx_xvldrepl_w(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2044]}} + res |= __builtin_lasx_xvldrepl_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_w' must be a constant integer}} + return res; +} + +v4i64 xvldrepl_d(void *_1, int var) { + v4i64 res = __builtin_lasx_xvldrepl_d(_1, -2056); // expected-error {{argument value -2056 is outside the valid range [-2048, 2040]}} + res |= __builtin_lasx_xvldrepl_d(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2040]}} + res |= __builtin_lasx_xvldrepl_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_d' must be a constant integer}} + return res; +} + +int xvpickve2gr_w(v8i32 _1, int var) { + int res = __builtin_lasx_xvpickve2gr_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvpickve2gr_w(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvpickve2gr_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_w' must be a constant integer}} + return res; +} + +unsigned int xvpickve2gr_wu(v8i32 _1, int var) { + unsigned int res = __builtin_lasx_xvpickve2gr_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvpickve2gr_wu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvpickve2gr_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_wu' must be a constant integer}} + return res; +} + +long xvpickve2gr_d(v4i64 _1, int var) { + long res = __builtin_lasx_xvpickve2gr_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __builtin_lasx_xvpickve2gr_d(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __builtin_lasx_xvpickve2gr_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_d' must be a constant integer}} + return res; +} + +unsigned long int xvpickve2gr_du(v4i64 _1, int var) { + unsigned long int res = __builtin_lasx_xvpickve2gr_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __builtin_lasx_xvpickve2gr_du(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __builtin_lasx_xvpickve2gr_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_du' must be a constant integer}} + return res; +} + +v32i8 xvrotri_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvrotri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvrotri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvrotri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_b' must be a constant integer}} + return res; +} + +v16i16 xvrotri_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvrotri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvrotri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvrotri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_h' must be a constant integer}} + return res; +} + +v8i32 xvrotri_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvrotri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvrotri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvrotri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_w' must be a constant integer}} + return res; +} + +v4i64 xvrotri_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvrotri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvrotri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvrotri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_d' must be a constant integer}} + return res; +} + +v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __builtin_lasx_xvsrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_b_h' must be a constant integer}} + return res; +} + +v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __builtin_lasx_xvsrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_h_w' must be a constant integer}} + return res; +} + +v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __builtin_lasx_xvsrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_w_d' must be a constant integer}} + return res; +} + +v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __builtin_lasx_xvsrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvsrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvsrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_d_q' must be a constant integer}} + return res; +} + +v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __builtin_lasx_xvsrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_b_h' must be a constant integer}} + return res; +} + +v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __builtin_lasx_xvsrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_h_w' must be a constant integer}} + return res; +} + +v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __builtin_lasx_xvsrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_w_d' must be a constant integer}} + return res; +} + +v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __builtin_lasx_xvsrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvsrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvsrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_d_q' must be a constant integer}} + return res; +} + +v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __builtin_lasx_xvssrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_b_h' must be a constant integer}} + return res; +} + +v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __builtin_lasx_xvssrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_h_w' must be a constant integer}} + return res; +} + +v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __builtin_lasx_xvssrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_w_d' must be a constant integer}} + return res; +} + +v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __builtin_lasx_xvssrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_d_q' must be a constant integer}} + return res; +} + +v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2, int var) { + v32u8 res = __builtin_lasx_xvssrlni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrlni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrlni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_bu_h' must be a constant integer}} + return res; +} + +v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2, int var) { + v16u16 res = __builtin_lasx_xvssrlni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrlni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrlni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_hu_w' must be a constant integer}} + return res; +} + +v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2, int var) { + v8u32 res = __builtin_lasx_xvssrlni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrlni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrlni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_wu_d' must be a constant integer}} + return res; +} + +v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2, int var) { + v4u64 res = __builtin_lasx_xvssrlni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrlni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrlni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_du_q' must be a constant integer}} + return res; +} + +v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __builtin_lasx_xvssrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_b_h' must be a constant integer}} + return res; +} + +v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __builtin_lasx_xvssrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_h_w' must be a constant integer}} + return res; +} + +v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __builtin_lasx_xvssrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_w_d' must be a constant integer}} + return res; +} + +v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __builtin_lasx_xvssrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_d_q' must be a constant integer}} + return res; +} + +v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2, int var) { + v32u8 res = __builtin_lasx_xvssrlrni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrlrni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrlrni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_bu_h' must be a constant integer}} + return res; +} + +v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2, int var) { + v16u16 res = __builtin_lasx_xvssrlrni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrlrni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrlrni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_hu_w' must be a constant integer}} + return res; +} + +v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2, int var) { + v8u32 res = __builtin_lasx_xvssrlrni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrlrni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrlrni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_wu_d' must be a constant integer}} + return res; +} + +v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2, int var) { + v4u64 res = __builtin_lasx_xvssrlrni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrlrni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrlrni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_du_q' must be a constant integer}} + return res; +} + +v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __builtin_lasx_xvsrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_b_h' must be a constant integer}} + return res; +} + +v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __builtin_lasx_xvsrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_h_w' must be a constant integer}} + return res; +} + +v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __builtin_lasx_xvsrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_w_d' must be a constant integer}} + return res; +} + +v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __builtin_lasx_xvsrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvsrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvsrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_d_q' must be a constant integer}} + return res; +} + +v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __builtin_lasx_xvsrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_b_h' must be a constant integer}} + return res; +} + +v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __builtin_lasx_xvsrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_h_w' must be a constant integer}} + return res; +} + +v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __builtin_lasx_xvsrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_w_d' must be a constant integer}} + return res; +} + +v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __builtin_lasx_xvsrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvsrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvsrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_d_q' must be a constant integer}} + return res; +} + +v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __builtin_lasx_xvssrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_b_h' must be a constant integer}} + return res; +} + +v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __builtin_lasx_xvssrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_h_w' must be a constant integer}} + return res; +} + +v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __builtin_lasx_xvssrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_w_d' must be a constant integer}} + return res; +} + +v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __builtin_lasx_xvssrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_d_q' must be a constant integer}} + return res; +} + +v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2, int var) { + v32u8 res = __builtin_lasx_xvssrani_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrani_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrani_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_bu_h' must be a constant integer}} + return res; +} + +v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2, int var) { + v16u16 res = __builtin_lasx_xvssrani_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrani_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrani_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_hu_w' must be a constant integer}} + return res; +} + +v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2, int var) { + v8u32 res = __builtin_lasx_xvssrani_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrani_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrani_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_wu_d' must be a constant integer}} + return res; +} + +v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2, int var) { + v4u64 res = __builtin_lasx_xvssrani_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrani_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrani_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_du_q' must be a constant integer}} + return res; +} + +v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __builtin_lasx_xvssrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_b_h' must be a constant integer}} + return res; +} + +v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __builtin_lasx_xvssrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_h_w' must be a constant integer}} + return res; +} + +v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __builtin_lasx_xvssrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_w_d' must be a constant integer}} + return res; +} + +v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __builtin_lasx_xvssrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_d_q' must be a constant integer}} + return res; +} + +v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2, int var) { + v32u8 res = __builtin_lasx_xvssrarni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrarni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrarni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_bu_h' must be a constant integer}} + return res; +} + +v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2, int var) { + v16u16 res = __builtin_lasx_xvssrarni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrarni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrarni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_hu_w' must be a constant integer}} + return res; +} + +v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2, int var) { + v8u32 res = __builtin_lasx_xvssrarni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrarni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrarni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_wu_d' must be a constant integer}} + return res; +} + +v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2, int var) { + v4u64 res = __builtin_lasx_xvssrarni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrarni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrarni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_du_q' must be a constant integer}} + return res; +} + +v4f64 xvpickve_d_f(v4f64 _1, int var) { + v4f64 res = __builtin_lasx_xvpickve_d_f(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res += __builtin_lasx_xvpickve_d_f(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res += __builtin_lasx_xvpickve_d_f(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_d_f' must be a constant integer}} + return res; +} + +v8f32 xvpickve_w_f(v8f32 _1, int var) { + v8f32 res = __builtin_lasx_xvpickve_w_f(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res += __builtin_lasx_xvpickve_w_f(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res += __builtin_lasx_xvpickve_w_f(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_w_f' must be a constant integer}} + return res; +} + +v32i8 xvrepli_b(int var) { + v32i8 res = __builtin_lasx_xvrepli_b(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __builtin_lasx_xvrepli_b(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __builtin_lasx_xvrepli_b(var); // expected-error {{argument to '__builtin_lasx_xvrepli_b' must be a constant integer}} + return res; +} + +v4i64 xvrepli_d(int var) { + v4i64 res = __builtin_lasx_xvrepli_d(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __builtin_lasx_xvrepli_d(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __builtin_lasx_xvrepli_d(var); // expected-error {{argument to '__builtin_lasx_xvrepli_d' must be a constant integer}} + return res; +} + +v16i16 xvrepli_h(int var) { + v16i16 res = __builtin_lasx_xvrepli_h(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __builtin_lasx_xvrepli_h(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __builtin_lasx_xvrepli_h(var); // expected-error {{argument to '__builtin_lasx_xvrepli_h' must be a constant integer}} + return res; +} + +v8i32 xvrepli_w(int var) { + v8i32 res = __builtin_lasx_xvrepli_w(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __builtin_lasx_xvrepli_w(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __builtin_lasx_xvrepli_w(var); // expected-error {{argument to '__builtin_lasx_xvrepli_w' must be a constant integer}} + return res; +} diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin.c b/clang/test/CodeGen/LoongArch/lasx/builtin.c new file mode 100644 index 0000000000000000000000000000000000000000..f52a23a5faea7b2b0e07adf48db3c178d1190c68 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/lasx/builtin.c @@ -0,0 +1,6408 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -O2 -emit-llvm %s -o - | FileCheck %s + +typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); +typedef signed char v32i8_b __attribute__((vector_size(32), aligned(1))); +typedef unsigned char v32u8 __attribute__((vector_size(32), aligned(32))); +typedef unsigned char v32u8_b __attribute__((vector_size(32), aligned(1))); +typedef short v16i16 __attribute__((vector_size(32), aligned(32))); +typedef short v16i16_h __attribute__((vector_size(32), aligned(2))); +typedef unsigned short v16u16 __attribute__((vector_size(32), aligned(32))); +typedef unsigned short v16u16_h __attribute__((vector_size(32), aligned(2))); +typedef int v8i32 __attribute__((vector_size(32), aligned(32))); +typedef int v8i32_w __attribute__((vector_size(32), aligned(4))); +typedef unsigned int v8u32 __attribute__((vector_size(32), aligned(32))); +typedef unsigned int v8u32_w __attribute__((vector_size(32), aligned(4))); +typedef long long v4i64 __attribute__((vector_size(32), aligned(32))); +typedef long long v4i64_d __attribute__((vector_size(32), aligned(8))); +typedef unsigned long long v4u64 __attribute__((vector_size(32), aligned(32))); +typedef unsigned long long v4u64_d __attribute__((vector_size(32), aligned(8))); +typedef float v8f32 __attribute__((vector_size(32), aligned(32))); +typedef float v8f32_w __attribute__((vector_size(32), aligned(4))); +typedef double v4f64 __attribute__((vector_size(32), aligned(32))); +typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); + +typedef double v4f64 __attribute__((vector_size(32), aligned(32))); +typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); + +// CHECK-LABEL: @xvsll_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsll_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsll_b(_1, _2); } +// CHECK-LABEL: @xvsll_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsll_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsll_h(_1, _2); } +// CHECK-LABEL: @xvsll_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsll_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsll_w(_1, _2); } +// CHECK-LABEL: @xvsll_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsll_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsll_d(_1, _2); } +// CHECK-LABEL: @xvslli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvslli_b(v32i8 _1) { return __builtin_lasx_xvslli_b(_1, 1); } +// CHECK-LABEL: @xvslli_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvslli_h(v16i16 _1) { return __builtin_lasx_xvslli_h(_1, 1); } +// CHECK-LABEL: @xvslli_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvslli_w(v8i32 _1) { return __builtin_lasx_xvslli_w(_1, 1); } +// CHECK-LABEL: @xvslli_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvslli_d(v4i64 _1) { return __builtin_lasx_xvslli_d(_1, 1); } +// CHECK-LABEL: @xvsra_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsra_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsra_b(_1, _2); } +// CHECK-LABEL: @xvsra_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsra_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsra_h(_1, _2); } +// CHECK-LABEL: @xvsra_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsra_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsra_w(_1, _2); } +// CHECK-LABEL: @xvsra_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsra_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsra_d(_1, _2); } +// CHECK-LABEL: @xvsrai_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrai_b(v32i8 _1) { return __builtin_lasx_xvsrai_b(_1, 1); } +// CHECK-LABEL: @xvsrai_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrai_h(v16i16 _1) { return __builtin_lasx_xvsrai_h(_1, 1); } +// CHECK-LABEL: @xvsrai_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrai_w(v8i32 _1) { return __builtin_lasx_xvsrai_w(_1, 1); } +// CHECK-LABEL: @xvsrai_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrai_d(v4i64 _1) { return __builtin_lasx_xvsrai_d(_1, 1); } +// CHECK-LABEL: @xvsrar_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrar_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrar_b(_1, _2); } +// CHECK-LABEL: @xvsrar_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrar_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrar_h(_1, _2); } +// CHECK-LABEL: @xvsrar_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrar_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrar_w(_1, _2); } +// CHECK-LABEL: @xvsrar_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrar_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrar_d(_1, _2); } +// CHECK-LABEL: @xvsrari_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrari_b(v32i8 _1) { return __builtin_lasx_xvsrari_b(_1, 1); } +// CHECK-LABEL: @xvsrari_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrari_h(v16i16 _1) { return __builtin_lasx_xvsrari_h(_1, 1); } +// CHECK-LABEL: @xvsrari_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrari_w(v8i32 _1) { return __builtin_lasx_xvsrari_w(_1, 1); } +// CHECK-LABEL: @xvsrari_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrari_d(v4i64 _1) { return __builtin_lasx_xvsrari_d(_1, 1); } +// CHECK-LABEL: @xvsrl_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrl_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrl_b(_1, _2); } +// CHECK-LABEL: @xvsrl_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrl_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrl_h(_1, _2); } +// CHECK-LABEL: @xvsrl_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrl_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrl_w(_1, _2); } +// CHECK-LABEL: @xvsrl_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrl_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrl_d(_1, _2); } +// CHECK-LABEL: @xvsrli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrli_b(v32i8 _1) { return __builtin_lasx_xvsrli_b(_1, 1); } +// CHECK-LABEL: @xvsrli_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrli_h(v16i16 _1) { return __builtin_lasx_xvsrli_h(_1, 1); } +// CHECK-LABEL: @xvsrli_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrli_w(v8i32 _1) { return __builtin_lasx_xvsrli_w(_1, 1); } +// CHECK-LABEL: @xvsrli_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrli_d(v4i64 _1) { return __builtin_lasx_xvsrli_d(_1, 1); } +// CHECK-LABEL: @xvsrlr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrlr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlr_b(_1, _2); } +// CHECK-LABEL: @xvsrlr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrlr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlr_h(_1, _2); } +// CHECK-LABEL: @xvsrlr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrlr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlr_w(_1, _2); } +// CHECK-LABEL: @xvsrlr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrlr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlr_d(_1, _2); } +// CHECK-LABEL: @xvsrlri_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrlri_b(v32i8 _1) { return __builtin_lasx_xvsrlri_b(_1, 1); } +// CHECK-LABEL: @xvsrlri_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrlri_h(v16i16 _1) { return __builtin_lasx_xvsrlri_h(_1, 1); } +// CHECK-LABEL: @xvsrlri_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrlri_w(v8i32 _1) { return __builtin_lasx_xvsrlri_w(_1, 1); } +// CHECK-LABEL: @xvsrlri_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrlri_d(v4i64 _1) { return __builtin_lasx_xvsrlri_d(_1, 1); } +// CHECK-LABEL: @xvbitclr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvbitclr_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitclr_b(_1, _2); } +// CHECK-LABEL: @xvbitclr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvbitclr_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitclr_h(_1, _2); } +// CHECK-LABEL: @xvbitclr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvbitclr_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitclr_w(_1, _2); } +// CHECK-LABEL: @xvbitclr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvbitclr_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitclr_d(_1, _2); } +// CHECK-LABEL: @xvbitclri_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvbitclri_b(v32u8 _1) { return __builtin_lasx_xvbitclri_b(_1, 1); } +// CHECK-LABEL: @xvbitclri_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvbitclri_h(v16u16 _1) { return __builtin_lasx_xvbitclri_h(_1, 1); } +// CHECK-LABEL: @xvbitclri_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvbitclri_w(v8u32 _1) { return __builtin_lasx_xvbitclri_w(_1, 1); } +// CHECK-LABEL: @xvbitclri_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvbitclri_d(v4u64 _1) { return __builtin_lasx_xvbitclri_d(_1, 1); } +// CHECK-LABEL: @xvbitset_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvbitset_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitset_b(_1, _2); } +// CHECK-LABEL: @xvbitset_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvbitset_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitset_h(_1, _2); } +// CHECK-LABEL: @xvbitset_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvbitset_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitset_w(_1, _2); } +// CHECK-LABEL: @xvbitset_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvbitset_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitset_d(_1, _2); } +// CHECK-LABEL: @xvbitseti_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvbitseti_b(v32u8 _1) { return __builtin_lasx_xvbitseti_b(_1, 1); } +// CHECK-LABEL: @xvbitseti_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvbitseti_h(v16u16 _1) { return __builtin_lasx_xvbitseti_h(_1, 1); } +// CHECK-LABEL: @xvbitseti_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvbitseti_w(v8u32 _1) { return __builtin_lasx_xvbitseti_w(_1, 1); } +// CHECK-LABEL: @xvbitseti_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvbitseti_d(v4u64 _1) { return __builtin_lasx_xvbitseti_d(_1, 1); } +// CHECK-LABEL: @xvbitrev_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvbitrev_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitrev_b(_1, _2); } +// CHECK-LABEL: @xvbitrev_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvbitrev_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitrev_h(_1, _2); } +// CHECK-LABEL: @xvbitrev_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvbitrev_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitrev_w(_1, _2); } +// CHECK-LABEL: @xvbitrev_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvbitrev_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitrev_d(_1, _2); } +// CHECK-LABEL: @xvbitrevi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvbitrevi_b(v32u8 _1) { return __builtin_lasx_xvbitrevi_b(_1, 1); } +// CHECK-LABEL: @xvbitrevi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvbitrevi_h(v16u16 _1) { return __builtin_lasx_xvbitrevi_h(_1, 1); } +// CHECK-LABEL: @xvbitrevi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvbitrevi_w(v8u32 _1) { return __builtin_lasx_xvbitrevi_w(_1, 1); } +// CHECK-LABEL: @xvbitrevi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvbitrevi_d(v4u64 _1) { return __builtin_lasx_xvbitrevi_d(_1, 1); } +// CHECK-LABEL: @xvadd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvadd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvadd_b(_1, _2); } +// CHECK-LABEL: @xvadd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvadd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvadd_h(_1, _2); } +// CHECK-LABEL: @xvadd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvadd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvadd_w(_1, _2); } +// CHECK-LABEL: @xvadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvadd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadd_d(_1, _2); } +// CHECK-LABEL: @xvaddi_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvaddi_bu(v32i8 _1) { return __builtin_lasx_xvaddi_bu(_1, 1); } +// CHECK-LABEL: @xvaddi_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvaddi_hu(v16i16 _1) { return __builtin_lasx_xvaddi_hu(_1, 1); } +// CHECK-LABEL: @xvaddi_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvaddi_wu(v8i32 _1) { return __builtin_lasx_xvaddi_wu(_1, 1); } +// CHECK-LABEL: @xvaddi_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddi_du(v4i64 _1) { return __builtin_lasx_xvaddi_du(_1, 1); } +// CHECK-LABEL: @xvsub_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsub_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsub_b(_1, _2); } +// CHECK-LABEL: @xvsub_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsub_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsub_h(_1, _2); } +// CHECK-LABEL: @xvsub_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsub_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsub_w(_1, _2); } +// CHECK-LABEL: @xvsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsub_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsub_d(_1, _2); } +// CHECK-LABEL: @xvsubi_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsubi_bu(v32i8 _1) { return __builtin_lasx_xvsubi_bu(_1, 1); } +// CHECK-LABEL: @xvsubi_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsubi_hu(v16i16 _1) { return __builtin_lasx_xvsubi_hu(_1, 1); } +// CHECK-LABEL: @xvsubi_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsubi_wu(v8i32 _1) { return __builtin_lasx_xvsubi_wu(_1, 1); } +// CHECK-LABEL: @xvsubi_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsubi_du(v4i64 _1) { return __builtin_lasx_xvsubi_du(_1, 1); } +// CHECK-LABEL: @xvmax_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmax_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmax_b(_1, _2); } +// CHECK-LABEL: @xvmax_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmax_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmax_h(_1, _2); } +// CHECK-LABEL: @xvmax_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmax_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmax_w(_1, _2); } +// CHECK-LABEL: @xvmax_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmax_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmax_d(_1, _2); } +// CHECK-LABEL: @xvmaxi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmaxi_b(v32i8 _1) { return __builtin_lasx_xvmaxi_b(_1, 1); } +// CHECK-LABEL: @xvmaxi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmaxi_h(v16i16 _1) { return __builtin_lasx_xvmaxi_h(_1, 1); } +// CHECK-LABEL: @xvmaxi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmaxi_w(v8i32 _1) { return __builtin_lasx_xvmaxi_w(_1, 1); } +// CHECK-LABEL: @xvmaxi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmaxi_d(v4i64 _1) { return __builtin_lasx_xvmaxi_d(_1, 1); } +// CHECK-LABEL: @xvmax_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvmax_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmax_bu(_1, _2); } +// CHECK-LABEL: @xvmax_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvmax_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmax_hu(_1, _2); } +// CHECK-LABEL: @xvmax_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvmax_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmax_wu(_1, _2); } +// CHECK-LABEL: @xvmax_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmax_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmax_du(_1, _2); } +// CHECK-LABEL: @xvmaxi_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvmaxi_bu(v32u8 _1) { return __builtin_lasx_xvmaxi_bu(_1, 1); } +// CHECK-LABEL: @xvmaxi_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvmaxi_hu(v16u16 _1) { return __builtin_lasx_xvmaxi_hu(_1, 1); } +// CHECK-LABEL: @xvmaxi_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvmaxi_wu(v8u32 _1) { return __builtin_lasx_xvmaxi_wu(_1, 1); } +// CHECK-LABEL: @xvmaxi_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmaxi_du(v4u64 _1) { return __builtin_lasx_xvmaxi_du(_1, 1); } +// CHECK-LABEL: @xvmin_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmin_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmin_b(_1, _2); } +// CHECK-LABEL: @xvmin_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmin_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmin_h(_1, _2); } +// CHECK-LABEL: @xvmin_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmin_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmin_w(_1, _2); } +// CHECK-LABEL: @xvmin_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmin_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmin_d(_1, _2); } +// CHECK-LABEL: @xvmini_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmini_b(v32i8 _1) { return __builtin_lasx_xvmini_b(_1, 1); } +// CHECK-LABEL: @xvmini_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmini_h(v16i16 _1) { return __builtin_lasx_xvmini_h(_1, 1); } +// CHECK-LABEL: @xvmini_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmini_w(v8i32 _1) { return __builtin_lasx_xvmini_w(_1, 1); } +// CHECK-LABEL: @xvmini_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmini_d(v4i64 _1) { return __builtin_lasx_xvmini_d(_1, 1); } +// CHECK-LABEL: @xvmin_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvmin_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmin_bu(_1, _2); } +// CHECK-LABEL: @xvmin_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvmin_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmin_hu(_1, _2); } +// CHECK-LABEL: @xvmin_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvmin_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmin_wu(_1, _2); } +// CHECK-LABEL: @xvmin_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmin_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmin_du(_1, _2); } +// CHECK-LABEL: @xvmini_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvmini_bu(v32u8 _1) { return __builtin_lasx_xvmini_bu(_1, 1); } +// CHECK-LABEL: @xvmini_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvmini_hu(v16u16 _1) { return __builtin_lasx_xvmini_hu(_1, 1); } +// CHECK-LABEL: @xvmini_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvmini_wu(v8u32 _1) { return __builtin_lasx_xvmini_wu(_1, 1); } +// CHECK-LABEL: @xvmini_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmini_du(v4u64 _1) { return __builtin_lasx_xvmini_du(_1, 1); } +// CHECK-LABEL: @xvseq_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvseq_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvseq_b(_1, _2); } +// CHECK-LABEL: @xvseq_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvseq_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvseq_h(_1, _2); } +// CHECK-LABEL: @xvseq_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvseq_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvseq_w(_1, _2); } +// CHECK-LABEL: @xvseq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvseq_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvseq_d(_1, _2); } +// CHECK-LABEL: @xvseqi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvseqi_b(v32i8 _1) { return __builtin_lasx_xvseqi_b(_1, 1); } +// CHECK-LABEL: @xvseqi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvseqi_h(v16i16 _1) { return __builtin_lasx_xvseqi_h(_1, 1); } +// CHECK-LABEL: @xvseqi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvseqi_w(v8i32 _1) { return __builtin_lasx_xvseqi_w(_1, 1); } +// CHECK-LABEL: @xvseqi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvseqi_d(v4i64 _1) { return __builtin_lasx_xvseqi_d(_1, 1); } +// CHECK-LABEL: @xvslt_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvslt_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvslt_b(_1, _2); } +// CHECK-LABEL: @xvslt_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvslt_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvslt_h(_1, _2); } +// CHECK-LABEL: @xvslt_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvslt_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvslt_w(_1, _2); } +// CHECK-LABEL: @xvslt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvslt_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvslt_d(_1, _2); } +// CHECK-LABEL: @xvslti_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvslti_b(v32i8 _1) { return __builtin_lasx_xvslti_b(_1, 1); } +// CHECK-LABEL: @xvslti_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvslti_h(v16i16 _1) { return __builtin_lasx_xvslti_h(_1, 1); } +// CHECK-LABEL: @xvslti_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvslti_w(v8i32 _1) { return __builtin_lasx_xvslti_w(_1, 1); } +// CHECK-LABEL: @xvslti_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvslti_d(v4i64 _1) { return __builtin_lasx_xvslti_d(_1, 1); } +// CHECK-LABEL: @xvslt_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvslt_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvslt_bu(_1, _2); } +// CHECK-LABEL: @xvslt_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvslt_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvslt_hu(_1, _2); } +// CHECK-LABEL: @xvslt_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvslt_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvslt_wu(_1, _2); } +// CHECK-LABEL: @xvslt_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvslt_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvslt_du(_1, _2); } +// CHECK-LABEL: @xvslti_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvslti_bu(v32u8 _1) { return __builtin_lasx_xvslti_bu(_1, 1); } +// CHECK-LABEL: @xvslti_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvslti_hu(v16u16 _1) { return __builtin_lasx_xvslti_hu(_1, 1); } +// CHECK-LABEL: @xvslti_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvslti_wu(v8u32 _1) { return __builtin_lasx_xvslti_wu(_1, 1); } +// CHECK-LABEL: @xvslti_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvslti_du(v4u64 _1) { return __builtin_lasx_xvslti_du(_1, 1); } +// CHECK-LABEL: @xvsle_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsle_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsle_b(_1, _2); } +// CHECK-LABEL: @xvsle_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsle_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsle_h(_1, _2); } +// CHECK-LABEL: @xvsle_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsle_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsle_w(_1, _2); } +// CHECK-LABEL: @xvsle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsle_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsle_d(_1, _2); } +// CHECK-LABEL: @xvslei_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvslei_b(v32i8 _1) { return __builtin_lasx_xvslei_b(_1, 1); } +// CHECK-LABEL: @xvslei_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvslei_h(v16i16 _1) { return __builtin_lasx_xvslei_h(_1, 1); } +// CHECK-LABEL: @xvslei_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvslei_w(v8i32 _1) { return __builtin_lasx_xvslei_w(_1, 1); } +// CHECK-LABEL: @xvslei_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvslei_d(v4i64 _1) { return __builtin_lasx_xvslei_d(_1, 1); } +// CHECK-LABEL: @xvsle_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsle_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsle_bu(_1, _2); } +// CHECK-LABEL: @xvsle_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsle_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsle_hu(_1, _2); } +// CHECK-LABEL: @xvsle_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsle_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsle_wu(_1, _2); } +// CHECK-LABEL: @xvsle_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsle_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsle_du(_1, _2); } +// CHECK-LABEL: @xvslei_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvslei_bu(v32u8 _1) { return __builtin_lasx_xvslei_bu(_1, 1); } +// CHECK-LABEL: @xvslei_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvslei_hu(v16u16 _1) { return __builtin_lasx_xvslei_hu(_1, 1); } +// CHECK-LABEL: @xvslei_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvslei_wu(v8u32 _1) { return __builtin_lasx_xvslei_wu(_1, 1); } +// CHECK-LABEL: @xvslei_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvslei_du(v4u64 _1) { return __builtin_lasx_xvslei_du(_1, 1); } +// CHECK-LABEL: @xvsat_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsat_b(v32i8 _1) { return __builtin_lasx_xvsat_b(_1, 1); } +// CHECK-LABEL: @xvsat_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsat_h(v16i16 _1) { return __builtin_lasx_xvsat_h(_1, 1); } +// CHECK-LABEL: @xvsat_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsat_w(v8i32 _1) { return __builtin_lasx_xvsat_w(_1, 1); } +// CHECK-LABEL: @xvsat_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsat_d(v4i64 _1) { return __builtin_lasx_xvsat_d(_1, 1); } +// CHECK-LABEL: @xvsat_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvsat_bu(v32u8 _1) { return __builtin_lasx_xvsat_bu(_1, 1); } +// CHECK-LABEL: @xvsat_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvsat_hu(v16u16 _1) { return __builtin_lasx_xvsat_hu(_1, 1); } +// CHECK-LABEL: @xvsat_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvsat_wu(v8u32 _1) { return __builtin_lasx_xvsat_wu(_1, 1); } +// CHECK-LABEL: @xvsat_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvsat_du(v4u64 _1) { return __builtin_lasx_xvsat_du(_1, 1); } +// CHECK-LABEL: @xvadda_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvadda_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvadda_b(_1, _2); } +// CHECK-LABEL: @xvadda_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvadda_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvadda_h(_1, _2); } +// CHECK-LABEL: @xvadda_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvadda_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvadda_w(_1, _2); } +// CHECK-LABEL: @xvadda_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvadda_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadda_d(_1, _2); } +// CHECK-LABEL: @xvsadd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsadd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsadd_b(_1, _2); } +// CHECK-LABEL: @xvsadd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsadd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsadd_h(_1, _2); } +// CHECK-LABEL: @xvsadd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsadd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsadd_w(_1, _2); } +// CHECK-LABEL: @xvsadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsadd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsadd_d(_1, _2); } +// CHECK-LABEL: @xvsadd_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvsadd_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsadd_bu(_1, _2); } +// CHECK-LABEL: @xvsadd_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvsadd_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsadd_hu(_1, _2); } +// CHECK-LABEL: @xvsadd_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvsadd_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsadd_wu(_1, _2); } +// CHECK-LABEL: @xvsadd_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvsadd_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsadd_du(_1, _2); } +// CHECK-LABEL: @xvavg_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvavg_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvavg_b(_1, _2); } +// CHECK-LABEL: @xvavg_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvavg_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvavg_h(_1, _2); } +// CHECK-LABEL: @xvavg_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvavg_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvavg_w(_1, _2); } +// CHECK-LABEL: @xvavg_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvavg_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvavg_d(_1, _2); } +// CHECK-LABEL: @xvavg_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvavg_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvavg_bu(_1, _2); } +// CHECK-LABEL: @xvavg_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvavg_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvavg_hu(_1, _2); } +// CHECK-LABEL: @xvavg_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvavg_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvavg_wu(_1, _2); } +// CHECK-LABEL: @xvavg_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvavg_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvavg_du(_1, _2); } +// CHECK-LABEL: @xvavgr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvavgr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvavgr_b(_1, _2); } +// CHECK-LABEL: @xvavgr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvavgr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvavgr_h(_1, _2); } +// CHECK-LABEL: @xvavgr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvavgr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvavgr_w(_1, _2); } +// CHECK-LABEL: @xvavgr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvavgr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvavgr_d(_1, _2); } +// CHECK-LABEL: @xvavgr_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvavgr_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvavgr_bu(_1, _2); } +// CHECK-LABEL: @xvavgr_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvavgr_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvavgr_hu(_1, _2); } +// CHECK-LABEL: @xvavgr_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvavgr_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvavgr_wu(_1, _2); } +// CHECK-LABEL: @xvavgr_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvavgr_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvavgr_du(_1, _2); } +// CHECK-LABEL: @xvssub_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvssub_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssub_b(_1, _2); } +// CHECK-LABEL: @xvssub_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvssub_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssub_h(_1, _2); } +// CHECK-LABEL: @xvssub_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvssub_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssub_w(_1, _2); } +// CHECK-LABEL: @xvssub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvssub_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssub_d(_1, _2); } +// CHECK-LABEL: @xvssub_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvssub_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvssub_bu(_1, _2); } +// CHECK-LABEL: @xvssub_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvssub_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssub_hu(_1, _2); } +// CHECK-LABEL: @xvssub_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvssub_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssub_wu(_1, _2); } +// CHECK-LABEL: @xvssub_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvssub_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssub_du(_1, _2); } +// CHECK-LABEL: @xvabsd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvabsd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvabsd_b(_1, _2); } +// CHECK-LABEL: @xvabsd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvabsd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvabsd_h(_1, _2); } +// CHECK-LABEL: @xvabsd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvabsd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvabsd_w(_1, _2); } +// CHECK-LABEL: @xvabsd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvabsd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvabsd_d(_1, _2); } +// CHECK-LABEL: @xvabsd_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvabsd_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvabsd_bu(_1, _2); } +// CHECK-LABEL: @xvabsd_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvabsd_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvabsd_hu(_1, _2); } +// CHECK-LABEL: @xvabsd_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvabsd_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvabsd_wu(_1, _2); } +// CHECK-LABEL: @xvabsd_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvabsd_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvabsd_du(_1, _2); } +// CHECK-LABEL: @xvmul_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmul_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmul_b(_1, _2); } +// CHECK-LABEL: @xvmul_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmul_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmul_h(_1, _2); } +// CHECK-LABEL: @xvmul_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmul_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmul_w(_1, _2); } +// CHECK-LABEL: @xvmul_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmul_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmul_d(_1, _2); } +// CHECK-LABEL: @xvmadd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmadd_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmadd_b(_1, _2, _3); } +// CHECK-LABEL: @xvmadd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmadd_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmadd_h(_1, _2, _3); } +// CHECK-LABEL: @xvmadd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmadd_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmadd_w(_1, _2, _3); } +// CHECK-LABEL: @xvmadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmadd_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmadd_d(_1, _2, _3); } +// CHECK-LABEL: @xvmsub_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmsub_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmsub_b(_1, _2, _3); } +// CHECK-LABEL: @xvmsub_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmsub_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmsub_h(_1, _2, _3); } +// CHECK-LABEL: @xvmsub_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmsub_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmsub_w(_1, _2, _3); } +// CHECK-LABEL: @xvmsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmsub_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmsub_d(_1, _2, _3); } +// CHECK-LABEL: @xvdiv_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvdiv_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvdiv_b(_1, _2); } +// CHECK-LABEL: @xvdiv_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvdiv_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvdiv_h(_1, _2); } +// CHECK-LABEL: @xvdiv_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvdiv_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvdiv_w(_1, _2); } +// CHECK-LABEL: @xvdiv_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvdiv_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvdiv_d(_1, _2); } +// CHECK-LABEL: @xvdiv_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvdiv_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvdiv_bu(_1, _2); } +// CHECK-LABEL: @xvdiv_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvdiv_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvdiv_hu(_1, _2); } +// CHECK-LABEL: @xvdiv_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvdiv_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvdiv_wu(_1, _2); } +// CHECK-LABEL: @xvdiv_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvdiv_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvdiv_du(_1, _2); } +// CHECK-LABEL: @xvhaddw_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvhaddw_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvhaddw_h_b(_1, _2); } +// CHECK-LABEL: @xvhaddw_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvhaddw_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvhaddw_w_h(_1, _2); } +// CHECK-LABEL: @xvhaddw_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvhaddw_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvhaddw_d_w(_1, _2); } +// CHECK-LABEL: @xvhaddw_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvhaddw_hu_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvhaddw_hu_bu(_1, _2); } +// CHECK-LABEL: @xvhaddw_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvhaddw_wu_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvhaddw_wu_hu(_1, _2); } +// CHECK-LABEL: @xvhaddw_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvhaddw_du_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvhaddw_du_wu(_1, _2); } +// CHECK-LABEL: @xvhsubw_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvhsubw_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvhsubw_h_b(_1, _2); } +// CHECK-LABEL: @xvhsubw_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvhsubw_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvhsubw_w_h(_1, _2); } +// CHECK-LABEL: @xvhsubw_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvhsubw_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvhsubw_d_w(_1, _2); } +// CHECK-LABEL: @xvhsubw_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvhsubw_hu_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvhsubw_hu_bu(_1, _2); } +// CHECK-LABEL: @xvhsubw_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvhsubw_wu_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvhsubw_wu_hu(_1, _2); } +// CHECK-LABEL: @xvhsubw_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvhsubw_du_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvhsubw_du_wu(_1, _2); } +// CHECK-LABEL: @xvmod_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmod_b(_1, _2); } +// CHECK-LABEL: @xvmod_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmod_h(_1, _2); } +// CHECK-LABEL: @xvmod_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmod_w(_1, _2); } +// CHECK-LABEL: @xvmod_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmod_d(_1, _2); } +// CHECK-LABEL: @xvmod_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvmod_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmod_bu(_1, _2); } +// CHECK-LABEL: @xvmod_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvmod_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmod_hu(_1, _2); } +// CHECK-LABEL: @xvmod_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvmod_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmod_wu(_1, _2); } +// CHECK-LABEL: @xvmod_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmod_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmod_du(_1, _2); } +// CHECK-LABEL: @xvrepl128vei_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvrepl128vei_b(v32i8 _1) { return __builtin_lasx_xvrepl128vei_b(_1, 1); } +// CHECK-LABEL: @xvrepl128vei_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvrepl128vei_h(v16i16 _1) { return __builtin_lasx_xvrepl128vei_h(_1, 1); } +// CHECK-LABEL: @xvrepl128vei_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvrepl128vei_w(v8i32 _1) { return __builtin_lasx_xvrepl128vei_w(_1, 1); } +// CHECK-LABEL: @xvrepl128vei_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvrepl128vei_d(v4i64 _1) { return __builtin_lasx_xvrepl128vei_d(_1, 1); } +// CHECK-LABEL: @xvpickev_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvpickev_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpickev_b(_1, _2); } +// CHECK-LABEL: @xvpickev_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvpickev_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpickev_h(_1, _2); } +// CHECK-LABEL: @xvpickev_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvpickev_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpickev_w(_1, _2); } +// CHECK-LABEL: @xvpickev_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvpickev_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpickev_d(_1, _2); } +// CHECK-LABEL: @xvpickod_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvpickod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpickod_b(_1, _2); } +// CHECK-LABEL: @xvpickod_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvpickod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpickod_h(_1, _2); } +// CHECK-LABEL: @xvpickod_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvpickod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpickod_w(_1, _2); } +// CHECK-LABEL: @xvpickod_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvpickod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpickod_d(_1, _2); } +// CHECK-LABEL: @xvilvh_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvilvh_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvilvh_b(_1, _2); } +// CHECK-LABEL: @xvilvh_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvilvh_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvilvh_h(_1, _2); } +// CHECK-LABEL: @xvilvh_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvilvh_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvilvh_w(_1, _2); } +// CHECK-LABEL: @xvilvh_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvilvh_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvilvh_d(_1, _2); } +// CHECK-LABEL: @xvilvl_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvilvl_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvilvl_b(_1, _2); } +// CHECK-LABEL: @xvilvl_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvilvl_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvilvl_h(_1, _2); } +// CHECK-LABEL: @xvilvl_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvilvl_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvilvl_w(_1, _2); } +// CHECK-LABEL: @xvilvl_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvilvl_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvilvl_d(_1, _2); } +// CHECK-LABEL: @xvpackev_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvpackev_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpackev_b(_1, _2); } +// CHECK-LABEL: @xvpackev_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvpackev_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpackev_h(_1, _2); } +// CHECK-LABEL: @xvpackev_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvpackev_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpackev_w(_1, _2); } +// CHECK-LABEL: @xvpackev_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvpackev_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpackev_d(_1, _2); } +// CHECK-LABEL: @xvpackod_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvpackod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpackod_b(_1, _2); } +// CHECK-LABEL: @xvpackod_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvpackod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpackod_h(_1, _2); } +// CHECK-LABEL: @xvpackod_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvpackod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpackod_w(_1, _2); } +// CHECK-LABEL: @xvpackod_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvpackod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpackod_d(_1, _2); } +// CHECK-LABEL: @xvshuf_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvshuf_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvshuf_b(_1, _2, _3); } +// CHECK-LABEL: @xvshuf_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvshuf_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvshuf_h(_1, _2, _3); } +// CHECK-LABEL: @xvshuf_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvshuf_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvshuf_w(_1, _2, _3); } +// CHECK-LABEL: @xvshuf_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvshuf_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvshuf_d(_1, _2, _3); } +// CHECK-LABEL: @xvand_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvand_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvand_v(_1, _2); } +// CHECK-LABEL: @xvandi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvandi_b(v32u8 _1) { return __builtin_lasx_xvandi_b(_1, 1); } +// CHECK-LABEL: @xvor_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvor_v(_1, _2); } +// CHECK-LABEL: @xvori_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvori_b(v32u8 _1) { return __builtin_lasx_xvori_b(_1, 1); } +// CHECK-LABEL: @xvnor_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvnor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvnor_v(_1, _2); } +// CHECK-LABEL: @xvnori_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvnori_b(v32u8 _1) { return __builtin_lasx_xvnori_b(_1, 1); } +// CHECK-LABEL: @xvxor_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvxor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvxor_v(_1, _2); } +// CHECK-LABEL: @xvxori_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvxori_b(v32u8 _1) { return __builtin_lasx_xvxori_b(_1, 1); } +// CHECK-LABEL: @xvbitsel_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvbitsel_v(v32u8 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvbitsel_v(_1, _2, _3); } +// CHECK-LABEL: @xvbitseli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvbitseli_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitseli_b(_1, _2, 1); } +// CHECK-LABEL: @xvshuf4i_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvshuf4i_b(v32i8 _1) { return __builtin_lasx_xvshuf4i_b(_1, 1); } +// CHECK-LABEL: @xvshuf4i_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvshuf4i_h(v16i16 _1) { return __builtin_lasx_xvshuf4i_h(_1, 1); } +// CHECK-LABEL: @xvshuf4i_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvshuf4i_w(v8i32 _1) { return __builtin_lasx_xvshuf4i_w(_1, 1); } +// CHECK-LABEL: @xvreplgr2vr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 [[_1:%.*]]) +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvreplgr2vr_b(int _1) { return __builtin_lasx_xvreplgr2vr_b(_1); } +// CHECK-LABEL: @xvreplgr2vr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 [[_1:%.*]]) +// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvreplgr2vr_h(int _1) { return __builtin_lasx_xvreplgr2vr_h(_1); } +// CHECK-LABEL: @xvreplgr2vr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 [[_1:%.*]]) +// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvreplgr2vr_w(int _1) { return __builtin_lasx_xvreplgr2vr_w(_1); } +// CHECK-LABEL: @xvreplgr2vr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[_1:%.*]] to i64 +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 [[CONV]]) +// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvreplgr2vr_d(int _1) { return __builtin_lasx_xvreplgr2vr_d(_1); } +// CHECK-LABEL: @xvpcnt_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> [[_1]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvpcnt_b(v32i8 _1) { return __builtin_lasx_xvpcnt_b(_1); } +// CHECK-LABEL: @xvpcnt_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> [[_1]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvpcnt_h(v16i16 _1) { return __builtin_lasx_xvpcnt_h(_1); } +// CHECK-LABEL: @xvpcnt_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvpcnt_w(v8i32 _1) { return __builtin_lasx_xvpcnt_w(_1); } +// CHECK-LABEL: @xvpcnt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvpcnt_d(v4i64 _1) { return __builtin_lasx_xvpcnt_d(_1); } +// CHECK-LABEL: @xvclo_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> [[_1]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvclo_b(v32i8 _1) { return __builtin_lasx_xvclo_b(_1); } +// CHECK-LABEL: @xvclo_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> [[_1]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvclo_h(v16i16 _1) { return __builtin_lasx_xvclo_h(_1); } +// CHECK-LABEL: @xvclo_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvclo_w(v8i32 _1) { return __builtin_lasx_xvclo_w(_1); } +// CHECK-LABEL: @xvclo_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvclo_d(v4i64 _1) { return __builtin_lasx_xvclo_d(_1); } +// CHECK-LABEL: @xvclz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> [[_1]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvclz_b(v32i8 _1) { return __builtin_lasx_xvclz_b(_1); } +// CHECK-LABEL: @xvclz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> [[_1]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvclz_h(v16i16 _1) { return __builtin_lasx_xvclz_h(_1); } +// CHECK-LABEL: @xvclz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvclz_w(v8i32 _1) { return __builtin_lasx_xvclz_w(_1); } +// CHECK-LABEL: @xvclz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvclz_d(v4i64 _1) { return __builtin_lasx_xvclz_d(_1); } +// CHECK-LABEL: @xvfadd_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfadd_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfadd_s(_1, _2); } +// CHECK-LABEL: @xvfadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfadd_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfadd_d(_1, _2); } +// CHECK-LABEL: @xvfsub_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfsub_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfsub_s(_1, _2); } +// CHECK-LABEL: @xvfsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfsub_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfsub_d(_1, _2); } +// CHECK-LABEL: @xvfmul_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfmul_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmul_s(_1, _2); } +// CHECK-LABEL: @xvfmul_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfmul_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmul_d(_1, _2); } +// CHECK-LABEL: @xvfdiv_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfdiv_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfdiv_s(_1, _2); } +// CHECK-LABEL: @xvfdiv_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfdiv_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfdiv_d(_1, _2); } +// CHECK-LABEL: @xvfcvt_h_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvfcvt_h_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcvt_h_s(_1, _2); } +// CHECK-LABEL: @xvfcvt_s_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfcvt_s_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcvt_s_d(_1, _2); } +// CHECK-LABEL: @xvfmin_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfmin_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmin_s(_1, _2); } +// CHECK-LABEL: @xvfmin_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfmin_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmin_d(_1, _2); } +// CHECK-LABEL: @xvfmina_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfmina_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmina_s(_1, _2); } +// CHECK-LABEL: @xvfmina_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfmina_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmina_d(_1, _2); } +// CHECK-LABEL: @xvfmax_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfmax_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmax_s(_1, _2); } +// CHECK-LABEL: @xvfmax_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfmax_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmax_d(_1, _2); } +// CHECK-LABEL: @xvfmaxa_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfmaxa_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmaxa_s(_1, _2); } +// CHECK-LABEL: @xvfmaxa_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfmaxa_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmaxa_d(_1, _2); } +// CHECK-LABEL: @xvfclass_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfclass_s(v8f32 _1) { return __builtin_lasx_xvfclass_s(_1); } +// CHECK-LABEL: @xvfclass_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfclass_d(v4f64 _1) { return __builtin_lasx_xvfclass_d(_1); } +// CHECK-LABEL: @xvfsqrt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfsqrt_s(v8f32 _1) { return __builtin_lasx_xvfsqrt_s(_1); } +// CHECK-LABEL: @xvfsqrt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfsqrt_d(v4f64 _1) { return __builtin_lasx_xvfsqrt_d(_1); } +// CHECK-LABEL: @xvfrecip_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfrecip_s(v8f32 _1) { return __builtin_lasx_xvfrecip_s(_1); } +// CHECK-LABEL: @xvfrecip_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfrecip_d(v4f64 _1) { return __builtin_lasx_xvfrecip_d(_1); } +// CHECK-LABEL: @xvfrint_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfrint_s(v8f32 _1) { return __builtin_lasx_xvfrint_s(_1); } +// CHECK-LABEL: @xvfrint_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfrint_d(v4f64 _1) { return __builtin_lasx_xvfrint_d(_1); } +// CHECK-LABEL: @xvfrsqrt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfrsqrt_s(v8f32 _1) { return __builtin_lasx_xvfrsqrt_s(_1); } +// CHECK-LABEL: @xvfrsqrt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfrsqrt_d(v4f64 _1) { return __builtin_lasx_xvfrsqrt_d(_1); } +// CHECK-LABEL: @xvflogb_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvflogb_s(v8f32 _1) { return __builtin_lasx_xvflogb_s(_1); } +// CHECK-LABEL: @xvflogb_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvflogb_d(v4f64 _1) { return __builtin_lasx_xvflogb_d(_1); } +// CHECK-LABEL: @xvfcvth_s_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfcvth_s_h(v16i16 _1) { return __builtin_lasx_xvfcvth_s_h(_1); } +// CHECK-LABEL: @xvfcvth_d_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfcvth_d_s(v8f32 _1) { return __builtin_lasx_xvfcvth_d_s(_1); } +// CHECK-LABEL: @xvfcvtl_s_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfcvtl_s_h(v16i16 _1) { return __builtin_lasx_xvfcvtl_s_h(_1); } +// CHECK-LABEL: @xvfcvtl_d_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfcvtl_d_s(v8f32 _1) { return __builtin_lasx_xvfcvtl_d_s(_1); } +// CHECK-LABEL: @xvftint_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftint_w_s(v8f32 _1) { return __builtin_lasx_xvftint_w_s(_1); } +// CHECK-LABEL: @xvftint_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftint_l_d(v4f64 _1) { return __builtin_lasx_xvftint_l_d(_1); } +// CHECK-LABEL: @xvftint_wu_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvftint_wu_s(v8f32 _1) { return __builtin_lasx_xvftint_wu_s(_1); } +// CHECK-LABEL: @xvftint_lu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvftint_lu_d(v4f64 _1) { return __builtin_lasx_xvftint_lu_d(_1); } +// CHECK-LABEL: @xvftintrz_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftintrz_w_s(v8f32 _1) { return __builtin_lasx_xvftintrz_w_s(_1); } +// CHECK-LABEL: @xvftintrz_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrz_l_d(v4f64 _1) { return __builtin_lasx_xvftintrz_l_d(_1); } +// CHECK-LABEL: @xvftintrz_wu_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvftintrz_wu_s(v8f32 _1) { return __builtin_lasx_xvftintrz_wu_s(_1); } +// CHECK-LABEL: @xvftintrz_lu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvftintrz_lu_d(v4f64 _1) { return __builtin_lasx_xvftintrz_lu_d(_1); } +// CHECK-LABEL: @xvffint_s_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvffint_s_w(v8i32 _1) { return __builtin_lasx_xvffint_s_w(_1); } +// CHECK-LABEL: @xvffint_d_l( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvffint_d_l(v4i64 _1) { return __builtin_lasx_xvffint_d_l(_1); } +// CHECK-LABEL: @xvffint_s_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvffint_s_wu(v8u32 _1) { return __builtin_lasx_xvffint_s_wu(_1); } +// CHECK-LABEL: @xvffint_d_lu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvffint_d_lu(v4u64 _1) { return __builtin_lasx_xvffint_d_lu(_1); } +// CHECK-LABEL: @xvreplve_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_1]], i32 [[_2:%.*]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvreplve_b(v32i8 _1, int _2) { return __builtin_lasx_xvreplve_b(_1, _2); } +// CHECK-LABEL: @xvreplve_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_1]], i32 [[_2:%.*]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvreplve_h(v16i16 _1, int _2) { return __builtin_lasx_xvreplve_h(_1, _2); } +// CHECK-LABEL: @xvreplve_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_1]], i32 [[_2:%.*]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvreplve_w(v8i32 _1, int _2) { return __builtin_lasx_xvreplve_w(_1, _2); } +// CHECK-LABEL: @xvreplve_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1]], i32 [[_2:%.*]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvreplve_d(v4i64 _1, int _2) { return __builtin_lasx_xvreplve_d(_1, _2); } +// CHECK-LABEL: @xvpermi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvpermi_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpermi_w(_1, _2, 1); } +// CHECK-LABEL: @xvandn_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvandn_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvandn_v(_1, _2); } +// CHECK-LABEL: @xvneg_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> [[_1]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvneg_b(v32i8 _1) { return __builtin_lasx_xvneg_b(_1); } +// CHECK-LABEL: @xvneg_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> [[_1]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvneg_h(v16i16 _1) { return __builtin_lasx_xvneg_h(_1); } +// CHECK-LABEL: @xvneg_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvneg_w(v8i32 _1) { return __builtin_lasx_xvneg_w(_1); } +// CHECK-LABEL: @xvneg_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvneg_d(v4i64 _1) { return __builtin_lasx_xvneg_d(_1); } +// CHECK-LABEL: @xvmuh_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmuh_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmuh_b(_1, _2); } +// CHECK-LABEL: @xvmuh_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmuh_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmuh_h(_1, _2); } +// CHECK-LABEL: @xvmuh_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmuh_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmuh_w(_1, _2); } +// CHECK-LABEL: @xvmuh_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmuh_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmuh_d(_1, _2); } +// CHECK-LABEL: @xvmuh_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvmuh_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmuh_bu(_1, _2); } +// CHECK-LABEL: @xvmuh_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvmuh_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmuh_hu(_1, _2); } +// CHECK-LABEL: @xvmuh_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvmuh_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmuh_wu(_1, _2); } +// CHECK-LABEL: @xvmuh_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmuh_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmuh_du(_1, _2); } +// CHECK-LABEL: @xvsllwil_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsllwil_h_b(v32i8 _1) { return __builtin_lasx_xvsllwil_h_b(_1, 1); } +// CHECK-LABEL: @xvsllwil_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsllwil_w_h(v16i16 _1) { return __builtin_lasx_xvsllwil_w_h(_1, 1); } +// CHECK-LABEL: @xvsllwil_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsllwil_d_w(v8i32 _1) { return __builtin_lasx_xvsllwil_d_w(_1, 1); } +// CHECK-LABEL: @xvsllwil_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvsllwil_hu_bu(v32u8 _1) { return __builtin_lasx_xvsllwil_hu_bu(_1, 1); } +// CHECK-LABEL: @xvsllwil_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvsllwil_wu_hu(v16u16 _1) { return __builtin_lasx_xvsllwil_wu_hu(_1, 1); } +// CHECK-LABEL: @xvsllwil_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvsllwil_du_wu(v8u32 _1) { return __builtin_lasx_xvsllwil_du_wu(_1, 1); } +// CHECK-LABEL: @xvsran_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsran_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsran_b_h(_1, _2); } +// CHECK-LABEL: @xvsran_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsran_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsran_h_w(_1, _2); } +// CHECK-LABEL: @xvsran_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsran_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsran_w_d(_1, _2); } +// CHECK-LABEL: @xvssran_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvssran_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssran_b_h(_1, _2); } +// CHECK-LABEL: @xvssran_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvssran_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssran_h_w(_1, _2); } +// CHECK-LABEL: @xvssran_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvssran_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssran_w_d(_1, _2); } +// CHECK-LABEL: @xvssran_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvssran_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssran_bu_h(_1, _2); } +// CHECK-LABEL: @xvssran_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvssran_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssran_hu_w(_1, _2); } +// CHECK-LABEL: @xvssran_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvssran_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssran_wu_d(_1, _2); } +// CHECK-LABEL: @xvsrarn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrarn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrarn_b_h(_1, _2); } +// CHECK-LABEL: @xvsrarn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrarn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrarn_h_w(_1, _2); } +// CHECK-LABEL: @xvsrarn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrarn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrarn_w_d(_1, _2); } +// CHECK-LABEL: @xvssrarn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvssrarn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrarn_b_h(_1, _2); } +// CHECK-LABEL: @xvssrarn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvssrarn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrarn_h_w(_1, _2); } +// CHECK-LABEL: @xvssrarn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvssrarn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrarn_w_d(_1, _2); } +// CHECK-LABEL: @xvssrarn_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvssrarn_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrarn_bu_h(_1, _2); } +// CHECK-LABEL: @xvssrarn_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvssrarn_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrarn_hu_w(_1, _2); } +// CHECK-LABEL: @xvssrarn_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvssrarn_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrarn_wu_d(_1, _2); } +// CHECK-LABEL: @xvsrln_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrln_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrln_b_h(_1, _2); } +// CHECK-LABEL: @xvsrln_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrln_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrln_h_w(_1, _2); } +// CHECK-LABEL: @xvsrln_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrln_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrln_w_d(_1, _2); } +// CHECK-LABEL: @xvssrln_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvssrln_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrln_bu_h(_1, _2); } +// CHECK-LABEL: @xvssrln_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvssrln_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrln_hu_w(_1, _2); } +// CHECK-LABEL: @xvssrln_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvssrln_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrln_wu_d(_1, _2); } +// CHECK-LABEL: @xvsrlrn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrlrn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlrn_b_h(_1, _2); } +// CHECK-LABEL: @xvsrlrn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrlrn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlrn_h_w(_1, _2); } +// CHECK-LABEL: @xvsrlrn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrlrn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlrn_w_d(_1, _2); } +// CHECK-LABEL: @xvssrlrn_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvssrlrn_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrlrn_bu_h(_1, _2); } +// CHECK-LABEL: @xvssrlrn_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvssrlrn_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrlrn_hu_w(_1, _2); } +// CHECK-LABEL: @xvssrlrn_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvssrlrn_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrlrn_wu_d(_1, _2); } +// CHECK-LABEL: @xvfrstpi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvfrstpi_b(_1, _2, 1); } +// CHECK-LABEL: @xvfrstpi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvfrstpi_h(_1, _2, 1); } +// CHECK-LABEL: @xvfrstp_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvfrstp_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvfrstp_b(_1, _2, _3); } +// CHECK-LABEL: @xvfrstp_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvfrstp_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvfrstp_h(_1, _2, _3); } +// CHECK-LABEL: @xvshuf4i_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvshuf4i_d(_1, _2, 1); } +// CHECK-LABEL: @xvbsrl_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvbsrl_v(v32i8 _1) { return __builtin_lasx_xvbsrl_v(_1, 1); } +// CHECK-LABEL: @xvbsll_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvbsll_v(v32i8 _1) { return __builtin_lasx_xvbsll_v(_1, 1); } +// CHECK-LABEL: @xvextrins_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvextrins_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvextrins_b(_1, _2, 1); } +// CHECK-LABEL: @xvextrins_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvextrins_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvextrins_h(_1, _2, 1); } +// CHECK-LABEL: @xvextrins_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvextrins_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvextrins_w(_1, _2, 1); } +// CHECK-LABEL: @xvextrins_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvextrins_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvextrins_d(_1, _2, 1); } +// CHECK-LABEL: @xvmskltz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> [[_1]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmskltz_b(v32i8 _1) { return __builtin_lasx_xvmskltz_b(_1); } +// CHECK-LABEL: @xvmskltz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> [[_1]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmskltz_h(v16i16 _1) { return __builtin_lasx_xvmskltz_h(_1); } +// CHECK-LABEL: @xvmskltz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmskltz_w(v8i32 _1) { return __builtin_lasx_xvmskltz_w(_1); } +// CHECK-LABEL: @xvmskltz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmskltz_d(v4i64 _1) { return __builtin_lasx_xvmskltz_d(_1); } +// CHECK-LABEL: @xvsigncov_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsigncov_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsigncov_b(_1, _2); } +// CHECK-LABEL: @xvsigncov_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsigncov_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsigncov_h(_1, _2); } +// CHECK-LABEL: @xvsigncov_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsigncov_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsigncov_w(_1, _2); } +// CHECK-LABEL: @xvsigncov_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsigncov_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsigncov_d(_1, _2); } +// CHECK-LABEL: @xvfmadd_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) +// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfmadd_s(_1, _2, _3); } +// CHECK-LABEL: @xvfmadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) +// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfmadd_d(_1, _2, _3); } +// CHECK-LABEL: @xvfmsub_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) +// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfmsub_s(_1, _2, _3); } +// CHECK-LABEL: @xvfmsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) +// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfmsub_d(_1, _2, _3); } +// CHECK-LABEL: @xvfnmadd_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) +// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfnmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfnmadd_s(_1, _2, _3); } +// CHECK-LABEL: @xvfnmadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) +// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfnmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfnmadd_d(_1, _2, _3); } +// CHECK-LABEL: @xvfnmsub_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) +// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfnmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfnmsub_s(_1, _2, _3); } +// CHECK-LABEL: @xvfnmsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) +// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfnmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfnmsub_d(_1, _2, _3); } +// CHECK-LABEL: @xvftintrne_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftintrne_w_s(v8f32 _1) { return __builtin_lasx_xvftintrne_w_s(_1); } +// CHECK-LABEL: @xvftintrne_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrne_l_d(v4f64 _1) { return __builtin_lasx_xvftintrne_l_d(_1); } +// CHECK-LABEL: @xvftintrp_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftintrp_w_s(v8f32 _1) { return __builtin_lasx_xvftintrp_w_s(_1); } +// CHECK-LABEL: @xvftintrp_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrp_l_d(v4f64 _1) { return __builtin_lasx_xvftintrp_l_d(_1); } +// CHECK-LABEL: @xvftintrm_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftintrm_w_s(v8f32 _1) { return __builtin_lasx_xvftintrm_w_s(_1); } +// CHECK-LABEL: @xvftintrm_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrm_l_d(v4f64 _1) { return __builtin_lasx_xvftintrm_l_d(_1); } +// CHECK-LABEL: @xvftint_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftint_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftint_w_d(_1, _2); } +// CHECK-LABEL: @xvffint_s_l( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvffint_s_l(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvffint_s_l(_1, _2); } +// CHECK-LABEL: @xvftintrz_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftintrz_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrz_w_d(_1, _2); } +// CHECK-LABEL: @xvftintrp_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftintrp_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrp_w_d(_1, _2); } +// CHECK-LABEL: @xvftintrm_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftintrm_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrm_w_d(_1, _2); } +// CHECK-LABEL: @xvftintrne_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftintrne_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrne_w_d(_1, _2); } +// CHECK-LABEL: @xvftinth_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftinth_l_s(v8f32 _1) { return __builtin_lasx_xvftinth_l_s(_1); } +// CHECK-LABEL: @xvftintl_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintl_l_s(v8f32 _1) { return __builtin_lasx_xvftintl_l_s(_1); } +// CHECK-LABEL: @xvffinth_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvffinth_d_w(v8i32 _1) { return __builtin_lasx_xvffinth_d_w(_1); } +// CHECK-LABEL: @xvffintl_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvffintl_d_w(v8i32 _1) { return __builtin_lasx_xvffintl_d_w(_1); } +// CHECK-LABEL: @xvftintrzh_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrzh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrzh_l_s(_1); } +// CHECK-LABEL: @xvftintrzl_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrzl_l_s(v8f32 _1) { return __builtin_lasx_xvftintrzl_l_s(_1); } +// CHECK-LABEL: @xvftintrph_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrph_l_s(v8f32 _1) { return __builtin_lasx_xvftintrph_l_s(_1); } +// CHECK-LABEL: @xvftintrpl_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrpl_l_s(v8f32 _1) { return __builtin_lasx_xvftintrpl_l_s(_1); } +// CHECK-LABEL: @xvftintrmh_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrmh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrmh_l_s(_1); } +// CHECK-LABEL: @xvftintrml_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrml_l_s(v8f32 _1) { return __builtin_lasx_xvftintrml_l_s(_1); } +// CHECK-LABEL: @xvftintrneh_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrneh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrneh_l_s(_1); } +// CHECK-LABEL: @xvftintrnel_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrnel_l_s(v8f32 _1) { return __builtin_lasx_xvftintrnel_l_s(_1); } +// CHECK-LABEL: @xvfrintrne_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfrintrne_s(v8f32 _1) { return __builtin_lasx_xvfrintrne_s(_1); } +// CHECK-LABEL: @xvfrintrne_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfrintrne_d(v4f64 _1) { return __builtin_lasx_xvfrintrne_d(_1); } +// CHECK-LABEL: @xvfrintrz_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfrintrz_s(v8f32 _1) { return __builtin_lasx_xvfrintrz_s(_1); } +// CHECK-LABEL: @xvfrintrz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfrintrz_d(v4f64 _1) { return __builtin_lasx_xvfrintrz_d(_1); } +// CHECK-LABEL: @xvfrintrp_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfrintrp_s(v8f32 _1) { return __builtin_lasx_xvfrintrp_s(_1); } +// CHECK-LABEL: @xvfrintrp_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfrintrp_d(v4f64 _1) { return __builtin_lasx_xvfrintrp_d(_1); } +// CHECK-LABEL: @xvfrintrm_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfrintrm_s(v8f32 _1) { return __builtin_lasx_xvfrintrm_s(_1); } +// CHECK-LABEL: @xvfrintrm_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfrintrm_d(v4f64 _1) { return __builtin_lasx_xvfrintrm_d(_1); } +// CHECK-LABEL: @xvld( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvld(ptr [[_1:%.*]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvld(void *_1) { return __builtin_lasx_xvld(_1, 1); } +// CHECK-LABEL: @xvst( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1]], ptr [[_2:%.*]], i32 1) +// CHECK-NEXT: ret void +// +void xvst(v32i8 _1, void *_2) { return __builtin_lasx_xvst(_1, _2, 1); } +// CHECK-LABEL: @xvstelm_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1]], ptr [[_2:%.*]], i32 1, i32 1) +// CHECK-NEXT: ret void +// +void xvstelm_b(v32i8 _1, void * _2) { return __builtin_lasx_xvstelm_b(_1, _2, 1, 1); } +// CHECK-LABEL: @xvstelm_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1]], ptr [[_2:%.*]], i32 2, i32 1) +// CHECK-NEXT: ret void +// +void xvstelm_h(v16i16 _1, void * _2) { return __builtin_lasx_xvstelm_h(_1, _2, 2, 1); } +// CHECK-LABEL: @xvstelm_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1]], ptr [[_2:%.*]], i32 4, i32 1) +// CHECK-NEXT: ret void +// +void xvstelm_w(v8i32 _1, void * _2) { return __builtin_lasx_xvstelm_w(_1, _2, 4, 1); } +// CHECK-LABEL: @xvstelm_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1]], ptr [[_2:%.*]], i32 8, i32 1) +// CHECK-NEXT: ret void +// +void xvstelm_d(v4i64 _1, void * _2) { return __builtin_lasx_xvstelm_d(_1, _2, 8, 1); } +// CHECK-LABEL: @xvinsve0_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvinsve0_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvinsve0_w(_1, _2, 1); } +// CHECK-LABEL: @xvinsve0_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvinsve0_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvinsve0_d(_1, _2, 1); } +// CHECK-LABEL: @xvpickve_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvpickve_w(v8i32 _1) { return __builtin_lasx_xvpickve_w(_1, 1); } +// CHECK-LABEL: @xvpickve_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvpickve_d(v4i64 _1) { return __builtin_lasx_xvpickve_d(_1, 1); } +// CHECK-LABEL: @xvssrlrn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvssrlrn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrn_b_h(_1, _2); } +// CHECK-LABEL: @xvssrlrn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvssrlrn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrn_h_w(_1, _2); } +// CHECK-LABEL: @xvssrlrn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvssrlrn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrn_w_d(_1, _2); } +// CHECK-LABEL: @xvssrln_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvssrln_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrln_b_h(_1, _2); } +// CHECK-LABEL: @xvssrln_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvssrln_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrln_h_w(_1, _2); } +// CHECK-LABEL: @xvssrln_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvssrln_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrln_w_d(_1, _2); } +// CHECK-LABEL: @xvorn_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvorn_v(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvorn_v(_1, _2); } +// CHECK-LABEL: @xvldi( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvldi() { return __builtin_lasx_xvldi(1); } +// CHECK-LABEL: @xvldx( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr [[_1:%.*]], i64 1) +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvldx(void *_1) { return __builtin_lasx_xvldx(_1, 1); } +// CHECK-LABEL: @xvstx( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_1]], ptr [[_2:%.*]], i64 1) +// CHECK-NEXT: ret void +// +void xvstx(v32i8 _1, void *_2) { return __builtin_lasx_xvstx(_1, _2, 1); } +// CHECK-LABEL: @xvextl_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvextl_qu_du(v4u64 _1) { return __builtin_lasx_xvextl_qu_du(_1); } +// CHECK-LABEL: @xvinsgr2vr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> [[_1]], i32 1, i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvinsgr2vr_w(v8i32 _1) { return __builtin_lasx_xvinsgr2vr_w(_1, 1, 1); } +// CHECK-LABEL: @xvinsgr2vr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> [[_1]], i64 1, i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvinsgr2vr_d(v4i64 _1) { return __builtin_lasx_xvinsgr2vr_d(_1, 1, 1); } +// CHECK-LABEL: @xvreplve0_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> [[_1]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvreplve0_b(v32i8 _1) { return __builtin_lasx_xvreplve0_b(_1); } +// CHECK-LABEL: @xvreplve0_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> [[_1]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvreplve0_h(v16i16 _1) { return __builtin_lasx_xvreplve0_h(_1); } +// CHECK-LABEL: @xvreplve0_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvreplve0_w(v8i32 _1) { return __builtin_lasx_xvreplve0_w(_1); } +// CHECK-LABEL: @xvreplve0_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvreplve0_d(v4i64 _1) { return __builtin_lasx_xvreplve0_d(_1); } +// CHECK-LABEL: @xvreplve0_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> [[_1]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvreplve0_q(v32i8 _1) { return __builtin_lasx_xvreplve0_q(_1); } +// CHECK-LABEL: @vext2xv_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> [[_1]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 vext2xv_h_b(v32i8 _1) { return __builtin_lasx_vext2xv_h_b(_1); } +// CHECK-LABEL: @vext2xv_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 vext2xv_w_h(v16i16 _1) { return __builtin_lasx_vext2xv_w_h(_1); } +// CHECK-LABEL: @vext2xv_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 vext2xv_d_w(v8i32 _1) { return __builtin_lasx_vext2xv_d_w(_1); } +// CHECK-LABEL: @vext2xv_w_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 vext2xv_w_b(v32i8 _1) { return __builtin_lasx_vext2xv_w_b(_1); } +// CHECK-LABEL: @vext2xv_d_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 vext2xv_d_h(v16i16 _1) { return __builtin_lasx_vext2xv_d_h(_1); } +// CHECK-LABEL: @vext2xv_d_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 vext2xv_d_b(v32i8 _1) { return __builtin_lasx_vext2xv_d_b(_1); } +// CHECK-LABEL: @vext2xv_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> [[_1]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 vext2xv_hu_bu(v32i8 _1) { return __builtin_lasx_vext2xv_hu_bu(_1); } +// CHECK-LABEL: @vext2xv_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 vext2xv_wu_hu(v16i16 _1) { return __builtin_lasx_vext2xv_wu_hu(_1); } +// CHECK-LABEL: @vext2xv_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 vext2xv_du_wu(v8i32 _1) { return __builtin_lasx_vext2xv_du_wu(_1); } +// CHECK-LABEL: @vext2xv_wu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 vext2xv_wu_bu(v32i8 _1) { return __builtin_lasx_vext2xv_wu_bu(_1); } +// CHECK-LABEL: @vext2xv_du_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 vext2xv_du_hu(v16i16 _1) { return __builtin_lasx_vext2xv_du_hu(_1); } +// CHECK-LABEL: @vext2xv_du_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 vext2xv_du_bu(v32i8 _1) { return __builtin_lasx_vext2xv_du_bu(_1); } +// CHECK-LABEL: @xvpermi_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvpermi_q(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpermi_q(_1, _2, 1); } +// CHECK-LABEL: @xvpermi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvpermi_d(v4i64 _1) { return __builtin_lasx_xvpermi_d(_1, 1); } +// CHECK-LABEL: @xvperm_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvperm_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvperm_w(_1, _2); } +// CHECK-LABEL: @xvldrepl_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr [[_1:%.*]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvldrepl_b(void *_1) { return __builtin_lasx_xvldrepl_b(_1, 1); } +// CHECK-LABEL: @xvldrepl_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr [[_1:%.*]], i32 2) +// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvldrepl_h(void *_1) { return __builtin_lasx_xvldrepl_h(_1, 2); } +// CHECK-LABEL: @xvldrepl_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr [[_1:%.*]], i32 4) +// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvldrepl_w(void *_1) { return __builtin_lasx_xvldrepl_w(_1, 4); } +// CHECK-LABEL: @xvldrepl_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr [[_1:%.*]], i32 8) +// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvldrepl_d(void *_1) { return __builtin_lasx_xvldrepl_d(_1, 8); } +// CHECK-LABEL: @xvpickve2gr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xvpickve2gr_w(v8i32 _1) { return __builtin_lasx_xvpickve2gr_w(_1, 1); } +// CHECK-LABEL: @xvpickve2gr_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: ret i32 [[TMP1]] +// +unsigned int xvpickve2gr_wu(v8i32 _1) { return __builtin_lasx_xvpickve2gr_wu(_1, 1); } +// CHECK-LABEL: @xvpickve2gr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: ret i64 [[TMP1]] +// +long xvpickve2gr_d(v4i64 _1) { return __builtin_lasx_xvpickve2gr_d(_1, 1); } +// CHECK-LABEL: @xvpickve2gr_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: ret i64 [[TMP1]] +// +unsigned long int xvpickve2gr_du(v4i64 _1) { return __builtin_lasx_xvpickve2gr_du(_1, 1); } +// CHECK-LABEL: @xvaddwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvaddwev_q_d(_1, _2); } +// CHECK-LABEL: @xvaddwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvaddwev_d_w(_1, _2); } +// CHECK-LABEL: @xvaddwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvaddwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvaddwev_w_h(_1, _2); } +// CHECK-LABEL: @xvaddwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvaddwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvaddwev_h_b(_1, _2); } +// CHECK-LABEL: @xvaddwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvaddwev_q_du(_1, _2); } +// CHECK-LABEL: @xvaddwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvaddwev_d_wu(_1, _2); } +// CHECK-LABEL: @xvaddwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvaddwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvaddwev_w_hu(_1, _2); } +// CHECK-LABEL: @xvaddwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvaddwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvaddwev_h_bu(_1, _2); } +// CHECK-LABEL: @xvsubwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsubwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsubwev_q_d(_1, _2); } +// CHECK-LABEL: @xvsubwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsubwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsubwev_d_w(_1, _2); } +// CHECK-LABEL: @xvsubwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsubwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsubwev_w_h(_1, _2); } +// CHECK-LABEL: @xvsubwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsubwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsubwev_h_b(_1, _2); } +// CHECK-LABEL: @xvsubwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsubwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsubwev_q_du(_1, _2); } +// CHECK-LABEL: @xvsubwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsubwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsubwev_d_wu(_1, _2); } +// CHECK-LABEL: @xvsubwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsubwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsubwev_w_hu(_1, _2); } +// CHECK-LABEL: @xvsubwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsubwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsubwev_h_bu(_1, _2); } +// CHECK-LABEL: @xvmulwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmulwev_q_d(_1, _2); } +// CHECK-LABEL: @xvmulwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmulwev_d_w(_1, _2); } +// CHECK-LABEL: @xvmulwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmulwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmulwev_w_h(_1, _2); } +// CHECK-LABEL: @xvmulwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmulwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmulwev_h_b(_1, _2); } +// CHECK-LABEL: @xvmulwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmulwev_q_du(_1, _2); } +// CHECK-LABEL: @xvmulwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmulwev_d_wu(_1, _2); } +// CHECK-LABEL: @xvmulwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmulwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmulwev_w_hu(_1, _2); } +// CHECK-LABEL: @xvmulwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmulwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmulwev_h_bu(_1, _2); } +// CHECK-LABEL: @xvaddwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvaddwod_q_d(_1, _2); } +// CHECK-LABEL: @xvaddwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvaddwod_d_w(_1, _2); } +// CHECK-LABEL: @xvaddwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvaddwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvaddwod_w_h(_1, _2); } +// CHECK-LABEL: @xvaddwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvaddwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvaddwod_h_b(_1, _2); } +// CHECK-LABEL: @xvaddwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvaddwod_q_du(_1, _2); } +// CHECK-LABEL: @xvaddwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvaddwod_d_wu(_1, _2); } +// CHECK-LABEL: @xvaddwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvaddwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvaddwod_w_hu(_1, _2); } +// CHECK-LABEL: @xvaddwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvaddwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvaddwod_h_bu(_1, _2); } +// CHECK-LABEL: @xvsubwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsubwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsubwod_q_d(_1, _2); } +// CHECK-LABEL: @xvsubwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsubwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsubwod_d_w(_1, _2); } +// CHECK-LABEL: @xvsubwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsubwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsubwod_w_h(_1, _2); } +// CHECK-LABEL: @xvsubwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsubwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsubwod_h_b(_1, _2); } +// CHECK-LABEL: @xvsubwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsubwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsubwod_q_du(_1, _2); } +// CHECK-LABEL: @xvsubwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsubwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsubwod_d_wu(_1, _2); } +// CHECK-LABEL: @xvsubwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsubwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsubwod_w_hu(_1, _2); } +// CHECK-LABEL: @xvsubwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsubwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsubwod_h_bu(_1, _2); } +// CHECK-LABEL: @xvmulwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmulwod_q_d(_1, _2); } +// CHECK-LABEL: @xvmulwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmulwod_d_w(_1, _2); } +// CHECK-LABEL: @xvmulwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmulwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmulwod_w_h(_1, _2); } +// CHECK-LABEL: @xvmulwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmulwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmulwod_h_b(_1, _2); } +// CHECK-LABEL: @xvmulwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmulwod_q_du(_1, _2); } +// CHECK-LABEL: @xvmulwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmulwod_d_wu(_1, _2); } +// CHECK-LABEL: @xvmulwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmulwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmulwod_w_hu(_1, _2); } +// CHECK-LABEL: @xvmulwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmulwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmulwod_h_bu(_1, _2); } +// CHECK-LABEL: @xvaddwev_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwev_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvaddwev_d_wu_w(_1, _2); } +// CHECK-LABEL: @xvaddwev_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvaddwev_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvaddwev_w_hu_h(_1, _2); } +// CHECK-LABEL: @xvaddwev_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvaddwev_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvaddwev_h_bu_b(_1, _2); } +// CHECK-LABEL: @xvmulwev_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwev_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvmulwev_d_wu_w(_1, _2); } +// CHECK-LABEL: @xvmulwev_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmulwev_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvmulwev_w_hu_h(_1, _2); } +// CHECK-LABEL: @xvmulwev_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmulwev_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvmulwev_h_bu_b(_1, _2); } +// CHECK-LABEL: @xvaddwod_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwod_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvaddwod_d_wu_w(_1, _2); } +// CHECK-LABEL: @xvaddwod_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvaddwod_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvaddwod_w_hu_h(_1, _2); } +// CHECK-LABEL: @xvaddwod_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvaddwod_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvaddwod_h_bu_b(_1, _2); } +// CHECK-LABEL: @xvmulwod_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwod_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvmulwod_d_wu_w(_1, _2); } +// CHECK-LABEL: @xvmulwod_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmulwod_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvmulwod_w_hu_h(_1, _2); } +// CHECK-LABEL: @xvmulwod_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmulwod_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvmulwod_h_bu_b(_1, _2); } +// CHECK-LABEL: @xvhaddw_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvhaddw_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvhaddw_q_d(_1, _2); } +// CHECK-LABEL: @xvhaddw_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvhaddw_qu_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvhaddw_qu_du(_1, _2); } +// CHECK-LABEL: @xvhsubw_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvhsubw_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvhsubw_q_d(_1, _2); } +// CHECK-LABEL: @xvhsubw_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvhsubw_qu_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvhsubw_qu_du(_1, _2); } +// CHECK-LABEL: @xvmaddwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmaddwev_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwev_q_d(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmaddwev_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwev_d_w(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmaddwev_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwev_w_h(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmaddwev_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwev_h_b(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmaddwev_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __builtin_lasx_xvmaddwev_q_du(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmaddwev_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __builtin_lasx_xvmaddwev_d_wu(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvmaddwev_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __builtin_lasx_xvmaddwev_w_hu(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvmaddwev_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvmaddwev_h_bu(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmaddwod_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwod_q_d(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmaddwod_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwod_d_w(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmaddwod_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwod_w_h(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmaddwod_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwod_h_b(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmaddwod_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __builtin_lasx_xvmaddwod_q_du(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmaddwod_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __builtin_lasx_xvmaddwod_d_wu(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvmaddwod_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __builtin_lasx_xvmaddwod_w_hu(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvmaddwod_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvmaddwod_h_bu(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmaddwev_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwev_q_du_d(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmaddwev_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwev_d_wu_w(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmaddwev_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwev_w_hu_h(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmaddwev_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwev_h_bu_b(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmaddwod_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwod_q_du_d(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmaddwod_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwod_d_wu_w(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmaddwod_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwod_w_hu_h(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmaddwod_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwod_h_bu_b(_1, _2, _3); } +// CHECK-LABEL: @xvrotr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvrotr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvrotr_b(_1, _2); } +// CHECK-LABEL: @xvrotr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvrotr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvrotr_h(_1, _2); } +// CHECK-LABEL: @xvrotr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvrotr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvrotr_w(_1, _2); } +// CHECK-LABEL: @xvrotr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvrotr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvrotr_d(_1, _2); } +// CHECK-LABEL: @xvadd_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvadd_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadd_q(_1, _2); } +// CHECK-LABEL: @xvsub_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsub_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsub_q(_1, _2); } +// CHECK-LABEL: @xvaddwev_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwev_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvaddwev_q_du_d(_1, _2); } +// CHECK-LABEL: @xvaddwod_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwod_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvaddwod_q_du_d(_1, _2); } +// CHECK-LABEL: @xvmulwev_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwev_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvmulwev_q_du_d(_1, _2); } +// CHECK-LABEL: @xvmulwod_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwod_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvmulwod_q_du_d(_1, _2); } +// CHECK-LABEL: @xvmskgez_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> [[_1]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmskgez_b(v32i8 _1) { return __builtin_lasx_xvmskgez_b(_1); } +// CHECK-LABEL: @xvmsknz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> [[_1]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmsknz_b(v32i8 _1) { return __builtin_lasx_xvmsknz_b(_1); } +// CHECK-LABEL: @xvexth_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> [[_1]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvexth_h_b(v32i8 _1) { return __builtin_lasx_xvexth_h_b(_1); } +// CHECK-LABEL: @xvexth_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvexth_w_h(v16i16 _1) { return __builtin_lasx_xvexth_w_h(_1); } +// CHECK-LABEL: @xvexth_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvexth_d_w(v8i32 _1) { return __builtin_lasx_xvexth_d_w(_1); } +// CHECK-LABEL: @xvexth_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvexth_q_d(v4i64 _1) { return __builtin_lasx_xvexth_q_d(_1); } +// CHECK-LABEL: @xvexth_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> [[_1]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvexth_hu_bu(v32u8 _1) { return __builtin_lasx_xvexth_hu_bu(_1); } +// CHECK-LABEL: @xvexth_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvexth_wu_hu(v16u16 _1) { return __builtin_lasx_xvexth_wu_hu(_1); } +// CHECK-LABEL: @xvexth_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvexth_du_wu(v8u32 _1) { return __builtin_lasx_xvexth_du_wu(_1); } +// CHECK-LABEL: @xvexth_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvexth_qu_du(v4u64 _1) { return __builtin_lasx_xvexth_qu_du(_1); } +// CHECK-LABEL: @xvrotri_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvrotri_b(v32i8 _1) { return __builtin_lasx_xvrotri_b(_1, 1); } +// CHECK-LABEL: @xvrotri_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvrotri_h(v16i16 _1) { return __builtin_lasx_xvrotri_h(_1, 1); } +// CHECK-LABEL: @xvrotri_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvrotri_w(v8i32 _1) { return __builtin_lasx_xvrotri_w(_1, 1); } +// CHECK-LABEL: @xvrotri_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvrotri_d(v4i64 _1) { return __builtin_lasx_xvrotri_d(_1, 1); } +// CHECK-LABEL: @xvextl_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvextl_q_d(v4i64 _1) { return __builtin_lasx_xvextl_q_d(_1); } +// CHECK-LABEL: @xvsrlni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlni_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvsrlni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlni_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvsrlni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlni_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvsrlni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlni_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvsrlrni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlrni_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvsrlrni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlrni_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvsrlrni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlrni_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvsrlrni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlrni_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrlni_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlni_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlni_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlni_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrlni_bu_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrlni_hu_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrlni_wu_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrlni_du_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrlrni_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrni_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrni_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrni_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrlrni_bu_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrni_hu_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrni_wu_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrni_du_q(_1, _2, 1); } +// CHECK-LABEL: @xvsrani_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrani_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvsrani_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrani_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvsrani_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrani_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvsrani_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrani_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvsrarni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrarni_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvsrarni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrarni_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvsrarni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrarni_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvsrarni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrarni_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrani_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrani_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrani_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrani_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrani_bu_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrani_hu_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrani_wu_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrani_du_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrarni_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrarni_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrarni_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrarni_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrarni_bu_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrarni_hu_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrarni_wu_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrarni_du_q(_1, _2, 1); } +// CHECK-LABEL: @xbnz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbnz_b(v32u8 _1) { return __builtin_lasx_xbnz_b(_1); } +// CHECK-LABEL: @xbnz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbnz_d(v4u64 _1) { return __builtin_lasx_xbnz_d(_1); } +// CHECK-LABEL: @xbnz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbnz_h(v16u16 _1) { return __builtin_lasx_xbnz_h(_1); } +// CHECK-LABEL: @xbnz_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbnz_v(v32u8 _1) { return __builtin_lasx_xbnz_v(_1); } +// CHECK-LABEL: @xbnz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbnz_w(v8u32 _1) { return __builtin_lasx_xbnz_w(_1); } +// CHECK-LABEL: @xbz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbz_b(v32u8 _1) { return __builtin_lasx_xbz_b(_1); } +// CHECK-LABEL: @xbz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbz_d(v4u64 _1) { return __builtin_lasx_xbz_d(_1); } +// CHECK-LABEL: @xbz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbz_h(v16u16 _1) { return __builtin_lasx_xbz_h(_1); } +// CHECK-LABEL: @xbz_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbz_v(v32u8 _1) { return __builtin_lasx_xbz_v(_1); } +// CHECK-LABEL: @xbz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbz_w(v8u32 _1) { return __builtin_lasx_xbz_w(_1); } +// CHECK-LABEL: @xvfcmp_caf_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_caf_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_caf_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_caf_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_caf_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_caf_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_ceq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_ceq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_ceq_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_ceq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_ceq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_ceq_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_cle_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cle_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cle_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_cle_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cle_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_clt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_clt_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_clt_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_clt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_clt_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_clt_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cne_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_cne_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cne_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cne_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_cne_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cne_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cor_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_cor_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cor_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cor_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_cor_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cor_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cueq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_cueq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cueq_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cueq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_cueq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cueq_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cule_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_cule_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cule_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cule_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_cule_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cule_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cult_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_cult_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cult_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cult_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_cult_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cult_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cun_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_cun_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cun_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cune_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_cune_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cune_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cune_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_cune_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cune_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cun_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_cun_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cun_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_saf_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_saf_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_saf_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_saf_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_saf_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_saf_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_seq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_seq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_seq_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_seq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_seq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_seq_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_sle_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sle_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sle_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_sle_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sle_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_slt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_slt_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_slt_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_slt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_slt_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_slt_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sne_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_sne_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sne_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sne_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_sne_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sne_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sor_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_sor_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sor_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sor_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_sor_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sor_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sueq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_sueq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sueq_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sueq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_sueq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sueq_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sule_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_sule_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sule_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sule_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_sule_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sule_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sult_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_sult_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sult_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sult_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_sult_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sult_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sun_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_sun_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sun_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sune_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_sune_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sune_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sune_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_sune_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sune_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sun_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_sun_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sun_s(_1, _2); } +// CHECK-LABEL: @xvpickve_d_f( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> [[_1]], i32 1) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvpickve_d_f(v4f64 _1) { return __builtin_lasx_xvpickve_d_f(_1, 1); } +// CHECK-LABEL: @xvpickve_w_f( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> [[_1]], i32 1) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvpickve_w_f(v8f32 _1) { return __builtin_lasx_xvpickve_w_f(_1, 1); } +// CHECK-LABEL: @xvrepli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvrepli_b() { return __builtin_lasx_xvrepli_b(1); } +// CHECK-LABEL: @xvrepli_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvrepli_d() { return __builtin_lasx_xvrepli_d(1); } +// CHECK-LABEL: @xvrepli_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvrepli_h() { return __builtin_lasx_xvrepli_h(1); } +// CHECK-LABEL: @xvrepli_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvrepli_w() { return __builtin_lasx_xvrepli_w(1); } diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin-alias-error.c b/clang/test/CodeGen/LoongArch/lsx/builtin-alias-error.c new file mode 100644 index 0000000000000000000000000000000000000000..69cf2254fdd797944bc2333d4c01a4d62c2c45b7 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/lsx/builtin-alias-error.c @@ -0,0 +1,1359 @@ +// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -verify %s + +#include + +v16i8 vslli_b(v16i8 _1, int var) { + v16i8 res = __lsx_vslli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vslli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vslli_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_b' must be a constant integer}} + return res; +} + +v8i16 vslli_h(v8i16 _1, int var) { + v8i16 res = __lsx_vslli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vslli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vslli_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_h' must be a constant integer}} + return res; +} + +v4i32 vslli_w(v4i32 _1, int var) { + v4i32 res = __lsx_vslli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vslli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vslli_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_w' must be a constant integer}} + return res; +} + +v2i64 vslli_d(v2i64 _1, int var) { + v2i64 res = __lsx_vslli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vslli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vslli_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_d' must be a constant integer}} + return res; +} + +v16i8 vsrai_b(v16i8 _1, int var) { + v16i8 res = __lsx_vsrai_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vsrai_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vsrai_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_b' must be a constant integer}} + return res; +} + +v8i16 vsrai_h(v8i16 _1, int var) { + v8i16 res = __lsx_vsrai_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vsrai_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vsrai_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_h' must be a constant integer}} + return res; +} + +v4i32 vsrai_w(v4i32 _1, int var) { + v4i32 res = __lsx_vsrai_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsrai_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsrai_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_w' must be a constant integer}} + return res; +} + +v2i64 vsrai_d(v2i64 _1, int var) { + v2i64 res = __lsx_vsrai_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vsrai_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vsrai_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_d' must be a constant integer}} + return res; +} + +v16i8 vsrari_b(v16i8 _1, int var) { + v16i8 res = __lsx_vsrari_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vsrari_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vsrari_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_b' must be a constant integer}} + return res; +} + +v8i16 vsrari_h(v8i16 _1, int var) { + v8i16 res = __lsx_vsrari_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vsrari_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vsrari_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_h' must be a constant integer}} + return res; +} + +v4i32 vsrari_w(v4i32 _1, int var) { + v4i32 res = __lsx_vsrari_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsrari_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsrari_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_w' must be a constant integer}} + return res; +} + +v2i64 vsrari_d(v2i64 _1, int var) { + v2i64 res = __lsx_vsrari_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vsrari_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vsrari_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_d' must be a constant integer}} + return res; +} + +v16i8 vsrli_b(v16i8 _1, int var) { + v16i8 res = __lsx_vsrli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vsrli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vsrli_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_b' must be a constant integer}} + return res; +} + +v8i16 vsrli_h(v8i16 _1, int var) { + v8i16 res = __lsx_vsrli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vsrli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vsrli_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_h' must be a constant integer}} + return res; +} + +v4i32 vsrli_w(v4i32 _1, int var) { + v4i32 res = __lsx_vsrli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsrli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsrli_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_w' must be a constant integer}} + return res; +} + +v2i64 vsrli_d(v2i64 _1, int var) { + v2i64 res = __lsx_vsrli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vsrli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vsrli_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_d' must be a constant integer}} + return res; +} + +v16i8 vsrlri_b(v16i8 _1, int var) { + v16i8 res = __lsx_vsrlri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vsrlri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vsrlri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_b' must be a constant integer}} + return res; +} + +v8i16 vsrlri_h(v8i16 _1, int var) { + v8i16 res = __lsx_vsrlri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vsrlri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vsrlri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_h' must be a constant integer}} + return res; +} + +v4i32 vsrlri_w(v4i32 _1, int var) { + v4i32 res = __lsx_vsrlri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsrlri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsrlri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_w' must be a constant integer}} + return res; +} + +v2i64 vsrlri_d(v2i64 _1, int var) { + v2i64 res = __lsx_vsrlri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vsrlri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vsrlri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_d' must be a constant integer}} + return res; +} + +v16u8 vbitclri_b(v16u8 _1, int var) { + v16u8 res = __lsx_vbitclri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vbitclri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vbitclri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_b' must be a constant integer}} + return res; +} + +v8u16 vbitclri_h(v8u16 _1, int var) { + v8u16 res = __lsx_vbitclri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vbitclri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vbitclri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_h' must be a constant integer}} + return res; +} + +v4u32 vbitclri_w(v4u32 _1, int var) { + v4u32 res = __lsx_vbitclri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vbitclri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vbitclri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_w' must be a constant integer}} + return res; +} + +v2u64 vbitclri_d(v2u64 _1, int var) { + v2u64 res = __lsx_vbitclri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vbitclri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vbitclri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_d' must be a constant integer}} + return res; +} + +v16u8 vbitseti_b(v16u8 _1, int var) { + v16u8 res = __lsx_vbitseti_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vbitseti_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vbitseti_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_b' must be a constant integer}} + return res; +} + +v8u16 vbitseti_h(v8u16 _1, int var) { + v8u16 res = __lsx_vbitseti_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vbitseti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vbitseti_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_h' must be a constant integer}} + return res; +} + +v4u32 vbitseti_w(v4u32 _1, int var) { + v4u32 res = __lsx_vbitseti_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vbitseti_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vbitseti_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_w' must be a constant integer}} + return res; +} + +v2u64 vbitseti_d(v2u64 _1, int var) { + v2u64 res = __lsx_vbitseti_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vbitseti_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vbitseti_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_d' must be a constant integer}} + return res; +} + +v16u8 vbitrevi_b(v16u8 _1, int var) { + v16u8 res = __lsx_vbitrevi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vbitrevi_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vbitrevi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_b' must be a constant integer}} + return res; +} + +v8u16 vbitrevi_h(v8u16 _1, int var) { + v8u16 res = __lsx_vbitrevi_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vbitrevi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vbitrevi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_h' must be a constant integer}} + return res; +} + +v4u32 vbitrevi_w(v4u32 _1, int var) { + v4u32 res = __lsx_vbitrevi_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vbitrevi_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vbitrevi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_w' must be a constant integer}} + return res; +} + +v2u64 vbitrevi_d(v2u64 _1, int var) { + v2u64 res = __lsx_vbitrevi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vbitrevi_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vbitrevi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_d' must be a constant integer}} + return res; +} + +v16i8 vaddi_bu(v16i8 _1, int var) { + v16i8 res = __lsx_vaddi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vaddi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vaddi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_bu' must be a constant integer}} + return res; +} + +v8i16 vaddi_hu(v8i16 _1, int var) { + v8i16 res = __lsx_vaddi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vaddi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vaddi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_hu' must be a constant integer}} + return res; +} + +v4i32 vaddi_wu(v4i32 _1, int var) { + v4i32 res = __lsx_vaddi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vaddi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vaddi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_wu' must be a constant integer}} + return res; +} + +v2i64 vaddi_du(v2i64 _1, int var) { + v2i64 res = __lsx_vaddi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vaddi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vaddi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_du' must be a constant integer}} + return res; +} + +v16i8 vsubi_bu(v16i8 _1, int var) { + v16i8 res = __lsx_vsubi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsubi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsubi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_bu' must be a constant integer}} + return res; +} + +v8i16 vsubi_hu(v8i16 _1, int var) { + v8i16 res = __lsx_vsubi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsubi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsubi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_hu' must be a constant integer}} + return res; +} + +v4i32 vsubi_wu(v4i32 _1, int var) { + v4i32 res = __lsx_vsubi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsubi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsubi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_wu' must be a constant integer}} + return res; +} + +v2i64 vsubi_du(v2i64 _1, int var) { + v2i64 res = __lsx_vsubi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsubi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsubi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_du' must be a constant integer}} + return res; +} + +v16i8 vmaxi_b(v16i8 _1, int var) { + v16i8 res = __lsx_vmaxi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vmaxi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vmaxi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_b' must be a constant integer}} + return res; +} + +v8i16 vmaxi_h(v8i16 _1, int var) { + v8i16 res = __lsx_vmaxi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vmaxi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vmaxi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_h' must be a constant integer}} + return res; +} + +v4i32 vmaxi_w(v4i32 _1, int var) { + v4i32 res = __lsx_vmaxi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vmaxi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vmaxi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_w' must be a constant integer}} + return res; +} + +v2i64 vmaxi_d(v2i64 _1, int var) { + v2i64 res = __lsx_vmaxi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vmaxi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vmaxi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_d' must be a constant integer}} + return res; +} + +v16u8 vmaxi_bu(v16u8 _1, int var) { + v16u8 res = __lsx_vmaxi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vmaxi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vmaxi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_bu' must be a constant integer}} + return res; +} + +v8u16 vmaxi_hu(v8u16 _1, int var) { + v8u16 res = __lsx_vmaxi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vmaxi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vmaxi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_hu' must be a constant integer}} + return res; +} + +v4u32 vmaxi_wu(v4u32 _1, int var) { + v4u32 res = __lsx_vmaxi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vmaxi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vmaxi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_wu' must be a constant integer}} + return res; +} + +v2u64 vmaxi_du(v2u64 _1, int var) { + v2u64 res = __lsx_vmaxi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vmaxi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vmaxi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_du' must be a constant integer}} + return res; +} + +v16i8 vmini_b(v16i8 _1, int var) { + v16i8 res = __lsx_vmini_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vmini_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vmini_b(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_b' must be a constant integer}} + return res; +} + +v8i16 vmini_h(v8i16 _1, int var) { + v8i16 res = __lsx_vmini_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vmini_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vmini_h(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_h' must be a constant integer}}} + return res; +} + +v4i32 vmini_w(v4i32 _1, int var) { + v4i32 res = __lsx_vmini_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vmini_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vmini_w(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_w' must be a constant integer}} + return res; +} + +v2i64 vmini_d(v2i64 _1, int var) { + v2i64 res = __lsx_vmini_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vmini_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vmini_d(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_d' must be a constant integer}} + return res; +} + +v16u8 vmini_bu(v16u8 _1, int var) { + v16u8 res = __lsx_vmini_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vmini_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vmini_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_bu' must be a constant integer}} + return res; +} + +v8u16 vmini_hu(v8u16 _1, int var) { + v8u16 res = __lsx_vmini_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vmini_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vmini_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_hu' must be a constant integer}} + return res; +} + +v4u32 vmini_wu(v4u32 _1, int var) { + v4u32 res = __lsx_vmini_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vmini_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vmini_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_wu' must be a constant integer}} + return res; +} + +v2u64 vmini_du(v2u64 _1, int var) { + v2u64 res = __lsx_vmini_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vmini_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vmini_du(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_du' must be a constant integer}} + return res; +} + +v16i8 vseqi_b(v16i8 _1, int var) { + v16i8 res = __lsx_vseqi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vseqi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vseqi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_b' must be a constant integer}} + return res; +} + +v8i16 vseqi_h(v8i16 _1, int var) { + v8i16 res = __lsx_vseqi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vseqi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vseqi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_h' must be a constant integer}} + return res; +} + +v4i32 vseqi_w(v4i32 _1, int var) { + v4i32 res = __lsx_vseqi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vseqi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vseqi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_w' must be a constant integer}} + return res; +} + +v2i64 vseqi_d(v2i64 _1, int var) { + v2i64 res = __lsx_vseqi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vseqi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vseqi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_d' must be a constant integer}} + return res; +} + +v16i8 vslti_b(v16i8 _1, int var) { + v16i8 res = __lsx_vslti_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vslti_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vslti_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_b' must be a constant integer}} + return res; +} + +v8i16 vslti_h(v8i16 _1, int var) { + v8i16 res = __lsx_vslti_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vslti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vslti_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_h' must be a constant integer}} + return res; +} + +v4i32 vslti_w(v4i32 _1, int var) { + v4i32 res = __lsx_vslti_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vslti_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vslti_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_w' must be a constant integer}} + return res; +} + +v2i64 vslti_d(v2i64 _1, int var) { + v2i64 res = __lsx_vslti_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vslti_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vslti_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_d' must be a constant integer}} + return res; +} + +v16i8 vslti_bu(v16u8 _1, int var) { + v16i8 res = __lsx_vslti_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vslti_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vslti_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_bu' must be a constant integer}} + return res; +} + +v8i16 vslti_hu(v8u16 _1, int var) { + v8i16 res = __lsx_vslti_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vslti_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vslti_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_hu' must be a constant integer}} + return res; +} + +v4i32 vslti_wu(v4u32 _1, int var) { + v4i32 res = __lsx_vslti_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vslti_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vslti_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_wu' must be a constant integer}} + return res; +} + +v2i64 vslti_du(v2u64 _1, int var) { + v2i64 res = __lsx_vslti_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vslti_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vslti_du(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_du' must be a constant integer}} + return res; +} + +v16i8 vslei_b(v16i8 _1, int var) { + v16i8 res = __lsx_vslei_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vslei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vslei_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_b' must be a constant integer}} + return res; +} + +v8i16 vslei_h(v8i16 _1, int var) { + v8i16 res = __lsx_vslei_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vslei_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vslei_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_h' must be a constant integer}} + return res; +} + +v4i32 vslei_w(v4i32 _1, int var) { + v4i32 res = __lsx_vslei_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vslei_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vslei_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_w' must be a constant integer}} + return res; +} + +v2i64 vslei_d(v2i64 _1, int var) { + v2i64 res = __lsx_vslei_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vslei_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vslei_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_d' must be a constant integer}} + return res; +} + +v16i8 vslei_bu(v16u8 _1, int var) { + v16i8 res = __lsx_vslei_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vslei_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vslei_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_bu' must be a constant integer}} + return res; +} + +v8i16 vslei_hu(v8u16 _1, int var) { + v8i16 res = __lsx_vslei_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vslei_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vslei_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_hu' must be a constant integer}} + return res; +} + +v4i32 vslei_wu(v4u32 _1, int var) { + v4i32 res = __lsx_vslei_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vslei_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vslei_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_wu' must be a constant integer}} + return res; +} + +v2i64 vslei_du(v2u64 _1, int var) { + v2i64 res = __lsx_vslei_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vslei_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vslei_du(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_du' must be a constant integer}} + return res; +} + +v16i8 vsat_b(v16i8 _1, int var) { + v16i8 res = __lsx_vsat_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vsat_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vsat_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_b' must be a constant integer}} + return res; +} + +v8i16 vsat_h(v8i16 _1, int var) { + v8i16 res = __lsx_vsat_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vsat_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vsat_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_h' must be a constant integer}} + return res; +} + +v4i32 vsat_w(v4i32 _1, int var) { + v4i32 res = __lsx_vsat_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsat_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsat_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_w' must be a constant integer}} + return res; +} + +v2i64 vsat_d(v2i64 _1, int var) { + v2i64 res = __lsx_vsat_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vsat_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vsat_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_d' must be a constant integer}} + return res; +} + +v16u8 vsat_bu(v16u8 _1, int var) { + v16u8 res = __lsx_vsat_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vsat_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vsat_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_bu' must be a constant integer}} + return res; +} + +v8u16 vsat_hu(v8u16 _1, int var) { + v8u16 res = __lsx_vsat_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vsat_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vsat_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_hu' must be a constant integer}} + return res; +} + +v4u32 vsat_wu(v4u32 _1, int var) { + v4u32 res = __lsx_vsat_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsat_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsat_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_wu' must be a constant integer}} + return res; +} + +v2u64 vsat_du(v2u64 _1, int var) { + v2u64 res = __lsx_vsat_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vsat_du(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vsat_du(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_du' must be a constant integer}} + return res; +} + +v16i8 vreplvei_b(v16i8 _1, int var) { + v16i8 res = __lsx_vreplvei_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vreplvei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vreplvei_b(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_b' must be a constant integer}} + return res; +} + +v8i16 vreplvei_h(v8i16 _1, int var) { + v8i16 res = __lsx_vreplvei_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vreplvei_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vreplvei_h(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_h' must be a constant integer}} + return res; +} + +v4i32 vreplvei_w(v4i32 _1, int var) { + v4i32 res = __lsx_vreplvei_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __lsx_vreplvei_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __lsx_vreplvei_w(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_w' must be a constant integer}} + return res; +} + +v2i64 vreplvei_d(v2i64 _1, int var) { + v2i64 res = __lsx_vreplvei_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} + res |= __lsx_vreplvei_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + res |= __lsx_vreplvei_d(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_d' must be a constant integer}} + return res; +} + +v16u8 vandi_b(v16u8 _1, int var) { + v16u8 res = __lsx_vandi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vandi_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vandi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vandi_b' must be a constant integer}} + return res; +} + +v16u8 vori_b(v16u8 _1, int var) { + v16u8 res = __lsx_vori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vori_b' must be a constant integer}} + return res; +} + +v16u8 vnori_b(v16u8 _1, int var) { + v16u8 res = __lsx_vnori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vnori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vnori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vnori_b' must be a constant integer}} + return res; +} + +v16u8 vxori_b(v16u8 _1, int var) { + v16u8 res = __lsx_vxori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vxori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vxori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vxori_b' must be a constant integer}} + return res; +} + +v16u8 vbitseli_b(v16u8 _1, v16u8 _2, int var) { + v16u8 res = __lsx_vbitseli_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vbitseli_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vbitseli_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vbitseli_b' must be a constant integer}} + return res; +} + +v16i8 vshuf4i_b(v16i8 _1, int var) { + v16i8 res = __lsx_vshuf4i_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vshuf4i_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vshuf4i_b(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_b' must be a constant integer}} + return res; +} + +v8i16 vshuf4i_h(v8i16 _1, int var) { + v8i16 res = __lsx_vshuf4i_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vshuf4i_h(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vshuf4i_h(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_h' must be a constant integer}} + return res; +} + +v4i32 vshuf4i_w(v4i32 _1, int var) { + v4i32 res = __lsx_vshuf4i_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vshuf4i_w(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vshuf4i_w(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_w' must be a constant integer}} + return res; +} + +int vpickve2gr_b(v16i8 _1, int var) { + int res = __lsx_vpickve2gr_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vpickve2gr_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vpickve2gr_b(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_b' must be a constant integer}} + return res; +} + +int vpickve2gr_h(v8i16 _1, int var) { + int res = __lsx_vpickve2gr_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vpickve2gr_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vpickve2gr_h(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_h' must be a constant integer}} + return res; +} + +int vpickve2gr_w(v4i32 _1, int var) { + int res = __lsx_vpickve2gr_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __lsx_vpickve2gr_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __lsx_vpickve2gr_w(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_w' must be a constant integer}} + return res; +} + +long vpickve2gr_d(v2i64 _1, int var) { + long res = __lsx_vpickve2gr_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} + res |= __lsx_vpickve2gr_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + res |= __lsx_vpickve2gr_d(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_d' must be a constant integer}} + return res; +} + +unsigned int vpickve2gr_bu(v16i8 _1, int var) { + unsigned int res = __lsx_vpickve2gr_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vpickve2gr_bu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vpickve2gr_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_bu' must be a constant integer}} + return res; +} + +unsigned int vpickve2gr_hu(v8i16 _1, int var) { + unsigned int res = __lsx_vpickve2gr_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vpickve2gr_hu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vpickve2gr_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_hu' must be a constant integer}} + return res; +} + +unsigned int vpickve2gr_wu(v4i32 _1, int var) { + unsigned int res = __lsx_vpickve2gr_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __lsx_vpickve2gr_wu(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __lsx_vpickve2gr_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_wu' must be a constant integer}} + return res; +} + +unsigned long int vpickve2gr_du(v2i64 _1, int var) { + unsigned long int res = __lsx_vpickve2gr_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} + res |= __lsx_vpickve2gr_du(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + res |= __lsx_vpickve2gr_du(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_du' must be a constant integer}} + return res; +} + +v16i8 vinsgr2vr_b(v16i8 _1, int var) { + v16i8 res = __lsx_vinsgr2vr_b(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vinsgr2vr_b(_1, 1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vinsgr2vr_b(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_b' must be a constant integer}} + return res; +} + +v8i16 vinsgr2vr_h(v8i16 _1, int var) { + v8i16 res = __lsx_vinsgr2vr_h(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vinsgr2vr_h(_1, 1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vinsgr2vr_h(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_h' must be a constant integer}} + return res; +} + +v4i32 vinsgr2vr_w(v4i32 _1, int var) { + v4i32 res = __lsx_vinsgr2vr_w(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __lsx_vinsgr2vr_w(_1, 1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __lsx_vinsgr2vr_w(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_w' must be a constant integer}} + return res; +} + +v2i64 vinsgr2vr_d(v2i64 _1, int var) { + v2i64 res = __lsx_vinsgr2vr_d(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} + res |= __lsx_vinsgr2vr_d(_1, 1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + res |= __lsx_vinsgr2vr_d(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_d' must be a constant integer}} + return res; +} + +v8i16 vsllwil_h_b(v16i8 _1, int var) { + v8i16 res = __lsx_vsllwil_h_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vsllwil_h_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vsllwil_h_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_h_b' must be a constant integer}} + return res; +} + +v4i32 vsllwil_w_h(v8i16 _1, int var) { + v4i32 res = __lsx_vsllwil_w_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vsllwil_w_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vsllwil_w_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_w_h' must be a constant integer}} + return res; +} + +v2i64 vsllwil_d_w(v4i32 _1, int var) { + v2i64 res = __lsx_vsllwil_d_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsllwil_d_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsllwil_d_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_d_w' must be a constant integer}} + return res; +} + +v8u16 vsllwil_hu_bu(v16u8 _1, int var) { + v8u16 res = __lsx_vsllwil_hu_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vsllwil_hu_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vsllwil_hu_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_hu_bu' must be a constant integer}} + return res; +} + +v4u32 vsllwil_wu_hu(v8u16 _1, int var) { + v4u32 res = __lsx_vsllwil_wu_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vsllwil_wu_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vsllwil_wu_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_wu_hu' must be a constant integer}} + return res; +} + +v2u64 vsllwil_du_wu(v4u32 _1, int var) { + v2u64 res = __lsx_vsllwil_du_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsllwil_du_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsllwil_du_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_du_wu' must be a constant integer}} + return res; +} + +v16i8 vfrstpi_b(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __lsx_vfrstpi_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vfrstpi_b(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vfrstpi_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vfrstpi_b' must be a constant integer}} + return res; +} + +v8i16 vfrstpi_h(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __lsx_vfrstpi_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vfrstpi_h(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vfrstpi_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vfrstpi_h' must be a constant integer}} + return res; +} + +v2i64 vshuf4i_d(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __lsx_vshuf4i_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vshuf4i_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vshuf4i_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_d' must be a constant integer}} + return res; +} + +v16i8 vbsrl_v(v16i8 _1, int var) { + v16i8 res = __lsx_vbsrl_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vbsrl_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vbsrl_v(_1, var); // expected-error {{argument to '__builtin_lsx_vbsrl_v' must be a constant integer}} + return res; +} + +v16i8 vbsll_v(v16i8 _1, int var) { + v16i8 res = __lsx_vbsll_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vbsll_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vbsll_v(_1, var); // expected-error {{argument to '__builtin_lsx_vbsll_v' must be a constant integer}} + return res; +} + +v16i8 vextrins_b(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __lsx_vextrins_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vextrins_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vextrins_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_b' must be a constant integer}} + return res; +} + +v8i16 vextrins_h(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __lsx_vextrins_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vextrins_h(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vextrins_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_h' must be a constant integer}} + return res; +} + +v4i32 vextrins_w(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __lsx_vextrins_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vextrins_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vextrins_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_w' must be a constant integer}} + return res; +} + +v2i64 vextrins_d(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __lsx_vextrins_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vextrins_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vextrins_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_d' must be a constant integer}} + return res; +} + +void vstelm_b_idx(v16i8 _1, void *_2, int var) { + __lsx_vstelm_b(_1, _2, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + __lsx_vstelm_b(_1, _2, 1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + __lsx_vstelm_b(_1, _2, 1, var); // expected-error {{argument to '__builtin_lsx_vstelm_b' must be a constant integer}} +} + +void vstelm_h_idx(v8i16 _1, void *_2, int var) { + __lsx_vstelm_h(_1, _2, 2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + __lsx_vstelm_h(_1, _2, 2, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + __lsx_vstelm_h(_1, _2, 2, var); // expected-error {{argument to '__builtin_lsx_vstelm_h' must be a constant integer}} +} + +void vstelm_w_idx(v4i32 _1, void *_2, int var) { + __lsx_vstelm_w(_1, _2, 4, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + __lsx_vstelm_w(_1, _2, 4, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + __lsx_vstelm_w(_1, _2, 4, var); // expected-error {{argument to '__builtin_lsx_vstelm_w' must be a constant integer}} +} + +void vstelm_d_idx(v2i64 _1, void *_2, int var) { + __lsx_vstelm_d(_1, _2, 8, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} + __lsx_vstelm_d(_1, _2, 8, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + __lsx_vstelm_d(_1, _2, 8, var); // expected-error {{argument to '__builtin_lsx_vstelm_d' must be a constant integer}} +} + +void vstelm_b(v16i8 _1, void *_2, int var) { + __lsx_vstelm_b(_1, _2, -129, 1); // expected-error {{argument value -129 is outside the valid range [-128, 127]}} + __lsx_vstelm_b(_1, _2, 128, 1); // expected-error {{argument value 128 is outside the valid range [-128, 127]}} + __lsx_vstelm_b(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_b' must be a constant integer}} +} + +void vstelm_h(v8i16 _1, void *_2, int var) { + __lsx_vstelm_h(_1, _2, -258, 1); // expected-error {{argument value -258 is outside the valid range [-256, 254]}} + __lsx_vstelm_h(_1, _2, 256, 1); // expected-error {{argument value 256 is outside the valid range [-256, 254]}} + __lsx_vstelm_h(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_h' must be a constant integer}} +} + +void vstelm_w(v4i32 _1, void *_2, int var) { + __lsx_vstelm_w(_1, _2, -516, 1); // expected-error {{argument value -516 is outside the valid range [-512, 508]}} + __lsx_vstelm_w(_1, _2, 512, 1); // expected-error {{argument value 512 is outside the valid range [-512, 508]}} + __lsx_vstelm_w(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_w' must be a constant integer}} +} + +void vstelm_d(v2i64 _1, void *_2, int var) { + __lsx_vstelm_d(_1, _2, -1032, 1); // expected-error {{argument value -1032 is outside the valid range [-1024, 1016]}} + __lsx_vstelm_d(_1, _2, 1024, 1); // expected-error {{argument value 1024 is outside the valid range [-1024, 1016]}} + __lsx_vstelm_d(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_d' must be a constant integer}} +} + +v16i8 vldrepl_b(void *_1, int var) { + v16i8 res = __lsx_vldrepl_b(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + res |= __lsx_vldrepl_b(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} + res |= __lsx_vldrepl_b(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_b' must be a constant integer}} + return res; +} + +v8i16 vldrepl_h(void *_1, int var) { + v8i16 res = __lsx_vldrepl_h(_1, -2050); // expected-error {{argument value -2050 is outside the valid range [-2048, 2046]}} + res |= __lsx_vldrepl_h(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2046]}} + res |= __lsx_vldrepl_h(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_h' must be a constant integer}} + return res; +} + +v4i32 vldrepl_w(void *_1, int var) { + v4i32 res = __lsx_vldrepl_w(_1, -2052); // expected-error {{argument value -2052 is outside the valid range [-2048, 2044]}} + res |= __lsx_vldrepl_w(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2044]}} + res |= __lsx_vldrepl_w(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_w' must be a constant integer}} + return res; +} + +v2i64 vldrepl_d(void *_1, int var) { + v2i64 res = __lsx_vldrepl_d(_1, -2056); // expected-error {{argument value -2056 is outside the valid range [-2048, 2040]}} + res |= __lsx_vldrepl_d(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2040]}} + res |= __lsx_vldrepl_d(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_d' must be a constant integer}} + return res; +} + +v16i8 vrotri_b(v16i8 _1, int var) { + v16i8 res = __lsx_vrotri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vrotri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vrotri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_b' must be a constant integer}} + return res; +} + +v8i16 vrotri_h(v8i16 _1, int var) { + v8i16 res = __lsx_vrotri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vrotri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vrotri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_h' must be a constant integer}} + return res; +} + +v4i32 vrotri_w(v4i32 _1, int var) { + v4i32 res = __lsx_vrotri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vrotri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vrotri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_w' must be a constant integer}} + return res; +} + +v2i64 vrotri_d(v2i64 _1, int var) { + v2i64 res = __lsx_vrotri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vrotri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vrotri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_d' must be a constant integer}} + return res; +} + +v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __lsx_vsrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vsrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vsrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_b_h' must be a constant integer}} + return res; +} + +v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __lsx_vsrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_h_w' must be a constant integer}} + return res; +} + +v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __lsx_vsrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vsrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vsrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_w_d' must be a constant integer}} + return res; +} + +v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __lsx_vsrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lsx_vsrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lsx_vsrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_d_q' must be a constant integer}} + return res; +} + +v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __lsx_vsrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vsrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vsrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_b_h' must be a constant integer}} + return res; +} + +v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __lsx_vsrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_h_w' must be a constant integer}} + return res; +} + +v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __lsx_vsrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vsrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vsrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_w_d' must be a constant integer}} + return res; +} + +v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __lsx_vsrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lsx_vsrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lsx_vsrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_d_q' must be a constant integer}} + return res; +} + +v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __lsx_vssrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vssrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vssrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_b_h' must be a constant integer}} + return res; +} + +v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __lsx_vssrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vssrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vssrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_h_w' must be a constant integer}} + return res; +} + +v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __lsx_vssrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vssrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vssrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_w_d' must be a constant integer}} + return res; +} + +v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __lsx_vssrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lsx_vssrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lsx_vssrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_d_q' must be a constant integer}} + return res; +} + +v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2, int var) { + v16u8 res = __lsx_vssrlni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vssrlni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vssrlni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_bu_h' must be a constant integer}} + return res; +} + +v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2, int var) { + v8u16 res = __lsx_vssrlni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vssrlni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vssrlni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_hu_w' must be a constant integer}} + return res; +} + +v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2, int var) { + v4u32 res = __lsx_vssrlni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vssrlni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vssrlni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_wu_d' must be a constant integer}} + return res; +} + +v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2, int var) { + v2u64 res = __lsx_vssrlni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lsx_vssrlni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lsx_vssrlni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_du_q' must be a constant integer}} + return res; +} + +v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __lsx_vssrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vssrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vssrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_b_h' must be a constant integer}} + return res; +} + +v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __lsx_vssrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vssrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vssrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_h_w' must be a constant integer}} + return res; +} + +v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __lsx_vssrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vssrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vssrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_w_d' must be a constant integer}} + return res; +} + +v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __lsx_vssrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lsx_vssrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lsx_vssrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_d_q' must be a constant integer}} + return res; +} + +v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2, int var) { + v16u8 res = __lsx_vssrlrni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vssrlrni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vssrlrni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_bu_h' must be a constant integer}} + return res; +} + +v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2, int var) { + v8u16 res = __lsx_vssrlrni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vssrlrni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vssrlrni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_hu_w' must be a constant integer}} + return res; +} + +v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2, int var) { + v4u32 res = __lsx_vssrlrni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vssrlrni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vssrlrni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_wu_d' must be a constant integer}} + return res; +} + +v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2, int var) { + v2u64 res = __lsx_vssrlrni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lsx_vssrlrni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lsx_vssrlrni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_du_q' must be a constant integer}} + return res; +} + +v16i8 vsrani_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __lsx_vsrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vsrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vsrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_b_h' must be a constant integer}} + return res; +} + +v8i16 vsrani_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __lsx_vsrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_h_w' must be a constant integer}} + return res; +} + +v4i32 vsrani_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __lsx_vsrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vsrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vsrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_w_d' must be a constant integer}} + return res; +} + +v2i64 vsrani_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __lsx_vsrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lsx_vsrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lsx_vsrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_d_q' must be a constant integer}} + return res; +} + +v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __lsx_vsrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vsrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vsrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_b_h' must be a constant integer}} + return res; +} + +v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __lsx_vsrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_h_w' must be a constant integer}} + return res; +} + +v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __lsx_vsrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vsrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vsrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_w_d' must be a constant integer}} + return res; +} + +v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __lsx_vsrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lsx_vsrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lsx_vsrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_d_q' must be a constant integer}} + return res; +} + +v16i8 vssrani_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __lsx_vssrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vssrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vssrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_b_h' must be a constant integer}} + return res; +} + +v8i16 vssrani_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __lsx_vssrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vssrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vssrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_h_w' must be a constant integer}} + return res; +} + +v4i32 vssrani_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __lsx_vssrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vssrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vssrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_w_d' must be a constant integer}} + return res; +} + +v2i64 vssrani_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __lsx_vssrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lsx_vssrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lsx_vssrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_d_q' must be a constant integer}} + return res; +} + +v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2, int var) { + v16u8 res = __lsx_vssrani_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vssrani_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vssrani_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_bu_h' must be a constant integer}} + return res; +} + +v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2, int var) { + v8u16 res = __lsx_vssrani_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vssrani_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vssrani_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_hu_w' must be a constant integer}} + return res; +} + +v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2, int var) { + v4u32 res = __lsx_vssrani_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vssrani_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vssrani_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_wu_d' must be a constant integer}} + return res; +} + +v2u64 vssrani_du_q(v2u64 _1, v2i64 _2, int var) { + v2u64 res = __lsx_vssrani_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lsx_vssrani_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lsx_vssrani_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_du_q' must be a constant integer}} + return res; +} + +v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __lsx_vssrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vssrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vssrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_b_h' must be a constant integer}} + return res; +} + +v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __lsx_vssrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vssrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vssrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_h_w' must be a constant integer}} + return res; +} + +v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __lsx_vssrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vssrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vssrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_w_d' must be a constant integer}} + return res; +} + +v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __lsx_vssrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lsx_vssrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lsx_vssrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_d_q' must be a constant integer}} + return res; +} + +v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2, int var) { + v16u8 res = __lsx_vssrarni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vssrarni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vssrarni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_bu_h' must be a constant integer}} + return res; +} + +v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2, int var) { + v8u16 res = __lsx_vssrarni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vssrarni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vssrarni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_hu_w' must be a constant integer}} + return res; +} + +v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2, int var) { + v4u32 res = __lsx_vssrarni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vssrarni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vssrarni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_wu_d' must be a constant integer}} + return res; +} + +v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2, int var) { + v2u64 res = __lsx_vssrarni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lsx_vssrarni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lsx_vssrarni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_du_q' must be a constant integer}} + return res; +} + +v4i32 vpermi_w(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __lsx_vpermi_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vpermi_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vpermi_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vpermi_w' must be a constant integer}} + return res; +} + +v16i8 vld(void *_1, int var) { + v16i8 res = __lsx_vld(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + res |= __lsx_vld(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} + res |= __lsx_vld(_1, var); // expected-error {{argument to '__builtin_lsx_vld' must be a constant integer}} + return res; +} + +void vst(v16i8 _1, void *_2, int var) { + __lsx_vst(_1, _2, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + __lsx_vst(_1, _2, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} + __lsx_vst(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vst' must be a constant integer}} +} + +v2i64 vldi(int var) { + v2i64 res = __lsx_vldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} + res |= __lsx_vldi(4096); // expected-error {{argument value 4096 is outside the valid range [-4096, 4095]}} + res |= __lsx_vldi(var); // expected-error {{argument to '__builtin_lsx_vldi' must be a constant integer}} + return res; +} + +v16i8 vrepli_b(int var) { + v16i8 res = __lsx_vrepli_b(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __lsx_vrepli_b(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __lsx_vrepli_b(var); // expected-error {{argument to '__builtin_lsx_vrepli_b' must be a constant integer}} + return res; +} + +v2i64 vrepli_d(int var) { + v2i64 res = __lsx_vrepli_d(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __lsx_vrepli_d(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __lsx_vrepli_d(var); // expected-error {{argument to '__builtin_lsx_vrepli_d' must be a constant integer}} + return res; +} + +v8i16 vrepli_h(int var) { + v8i16 res = __lsx_vrepli_h(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __lsx_vrepli_h(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __lsx_vrepli_h(var); // expected-error {{argument to '__builtin_lsx_vrepli_h' must be a constant integer}} + return res; +} + +v4i32 vrepli_w(int var) { + v4i32 res = __lsx_vrepli_w(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __lsx_vrepli_w(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __lsx_vrepli_w(var); // expected-error {{argument to '__builtin_lsx_vrepli_w' must be a constant integer}} + return res; +} diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c b/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c new file mode 100644 index 0000000000000000000000000000000000000000..7a84e0ae24f950b8d898cac908d06e15474f8db8 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c @@ -0,0 +1,6359 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -O2 -emit-llvm %s -o - | FileCheck %s + +#include + +// CHECK-LABEL: @vsll_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsll_b(v16i8 _1, v16i8 _2) { return __lsx_vsll_b(_1, _2); } +// CHECK-LABEL: @vsll_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsll_h(v8i16 _1, v8i16 _2) { return __lsx_vsll_h(_1, _2); } +// CHECK-LABEL: @vsll_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsll_w(v4i32 _1, v4i32 _2) { return __lsx_vsll_w(_1, _2); } +// CHECK-LABEL: @vsll_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsll_d(v2i64 _1, v2i64 _2) { return __lsx_vsll_d(_1, _2); } +// CHECK-LABEL: @vslli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vslli_b(v16i8 _1) { return __lsx_vslli_b(_1, 1); } +// CHECK-LABEL: @vslli_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vslli_h(v8i16 _1) { return __lsx_vslli_h(_1, 1); } +// CHECK-LABEL: @vslli_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vslli_w(v4i32 _1) { return __lsx_vslli_w(_1, 1); } +// CHECK-LABEL: @vslli_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vslli_d(v2i64 _1) { return __lsx_vslli_d(_1, 1); } +// CHECK-LABEL: @vsra_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsra_b(v16i8 _1, v16i8 _2) { return __lsx_vsra_b(_1, _2); } +// CHECK-LABEL: @vsra_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsra_h(v8i16 _1, v8i16 _2) { return __lsx_vsra_h(_1, _2); } +// CHECK-LABEL: @vsra_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsra_w(v4i32 _1, v4i32 _2) { return __lsx_vsra_w(_1, _2); } +// CHECK-LABEL: @vsra_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsra_d(v2i64 _1, v2i64 _2) { return __lsx_vsra_d(_1, _2); } +// CHECK-LABEL: @vsrai_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vsrai_b(v16i8 _1) { return __lsx_vsrai_b(_1, 1); } +// CHECK-LABEL: @vsrai_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vsrai_h(v8i16 _1) { return __lsx_vsrai_h(_1, 1); } +// CHECK-LABEL: @vsrai_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vsrai_w(v4i32 _1) { return __lsx_vsrai_w(_1, 1); } +// CHECK-LABEL: @vsrai_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vsrai_d(v2i64 _1) { return __lsx_vsrai_d(_1, 1); } +// CHECK-LABEL: @vsrar_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrar_b(v16i8 _1, v16i8 _2) { return __lsx_vsrar_b(_1, _2); } +// CHECK-LABEL: @vsrar_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrar_h(v8i16 _1, v8i16 _2) { return __lsx_vsrar_h(_1, _2); } +// CHECK-LABEL: @vsrar_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrar_w(v4i32 _1, v4i32 _2) { return __lsx_vsrar_w(_1, _2); } +// CHECK-LABEL: @vsrar_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsrar_d(v2i64 _1, v2i64 _2) { return __lsx_vsrar_d(_1, _2); } +// CHECK-LABEL: @vsrari_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vsrari_b(v16i8 _1) { return __lsx_vsrari_b(_1, 1); } +// CHECK-LABEL: @vsrari_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vsrari_h(v8i16 _1) { return __lsx_vsrari_h(_1, 1); } +// CHECK-LABEL: @vsrari_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vsrari_w(v4i32 _1) { return __lsx_vsrari_w(_1, 1); } +// CHECK-LABEL: @vsrari_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vsrari_d(v2i64 _1) { return __lsx_vsrari_d(_1, 1); } +// CHECK-LABEL: @vsrl_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrl_b(v16i8 _1, v16i8 _2) { return __lsx_vsrl_b(_1, _2); } +// CHECK-LABEL: @vsrl_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrl_h(v8i16 _1, v8i16 _2) { return __lsx_vsrl_h(_1, _2); } +// CHECK-LABEL: @vsrl_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrl_w(v4i32 _1, v4i32 _2) { return __lsx_vsrl_w(_1, _2); } +// CHECK-LABEL: @vsrl_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsrl_d(v2i64 _1, v2i64 _2) { return __lsx_vsrl_d(_1, _2); } +// CHECK-LABEL: @vsrli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vsrli_b(v16i8 _1) { return __lsx_vsrli_b(_1, 1); } +// CHECK-LABEL: @vsrli_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vsrli_h(v8i16 _1) { return __lsx_vsrli_h(_1, 1); } +// CHECK-LABEL: @vsrli_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vsrli_w(v4i32 _1) { return __lsx_vsrli_w(_1, 1); } +// CHECK-LABEL: @vsrli_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vsrli_d(v2i64 _1) { return __lsx_vsrli_d(_1, 1); } +// CHECK-LABEL: @vsrlr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrlr_b(v16i8 _1, v16i8 _2) { return __lsx_vsrlr_b(_1, _2); } +// CHECK-LABEL: @vsrlr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrlr_h(v8i16 _1, v8i16 _2) { return __lsx_vsrlr_h(_1, _2); } +// CHECK-LABEL: @vsrlr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrlr_w(v4i32 _1, v4i32 _2) { return __lsx_vsrlr_w(_1, _2); } +// CHECK-LABEL: @vsrlr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsrlr_d(v2i64 _1, v2i64 _2) { return __lsx_vsrlr_d(_1, _2); } +// CHECK-LABEL: @vsrlri_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vsrlri_b(v16i8 _1) { return __lsx_vsrlri_b(_1, 1); } +// CHECK-LABEL: @vsrlri_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vsrlri_h(v8i16 _1) { return __lsx_vsrlri_h(_1, 1); } +// CHECK-LABEL: @vsrlri_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vsrlri_w(v4i32 _1) { return __lsx_vsrlri_w(_1, 1); } +// CHECK-LABEL: @vsrlri_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vsrlri_d(v2i64 _1) { return __lsx_vsrlri_d(_1, 1); } +// CHECK-LABEL: @vbitclr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vbitclr_b(v16u8 _1, v16u8 _2) { return __lsx_vbitclr_b(_1, _2); } +// CHECK-LABEL: @vbitclr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vbitclr_h(v8u16 _1, v8u16 _2) { return __lsx_vbitclr_h(_1, _2); } +// CHECK-LABEL: @vbitclr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vbitclr_w(v4u32 _1, v4u32 _2) { return __lsx_vbitclr_w(_1, _2); } +// CHECK-LABEL: @vbitclr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vbitclr_d(v2u64 _1, v2u64 _2) { return __lsx_vbitclr_d(_1, _2); } +// CHECK-LABEL: @vbitclri_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vbitclri_b(v16u8 _1) { return __lsx_vbitclri_b(_1, 1); } +// CHECK-LABEL: @vbitclri_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8u16 vbitclri_h(v8u16 _1) { return __lsx_vbitclri_h(_1, 1); } +// CHECK-LABEL: @vbitclri_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vbitclri_w(v4u32 _1) { return __lsx_vbitclri_w(_1, 1); } +// CHECK-LABEL: @vbitclri_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vbitclri_d(v2u64 _1) { return __lsx_vbitclri_d(_1, 1); } +// CHECK-LABEL: @vbitset_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vbitset_b(v16u8 _1, v16u8 _2) { return __lsx_vbitset_b(_1, _2); } +// CHECK-LABEL: @vbitset_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vbitset_h(v8u16 _1, v8u16 _2) { return __lsx_vbitset_h(_1, _2); } +// CHECK-LABEL: @vbitset_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vbitset_w(v4u32 _1, v4u32 _2) { return __lsx_vbitset_w(_1, _2); } +// CHECK-LABEL: @vbitset_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vbitset_d(v2u64 _1, v2u64 _2) { return __lsx_vbitset_d(_1, _2); } +// CHECK-LABEL: @vbitseti_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vbitseti_b(v16u8 _1) { return __lsx_vbitseti_b(_1, 1); } +// CHECK-LABEL: @vbitseti_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8u16 vbitseti_h(v8u16 _1) { return __lsx_vbitseti_h(_1, 1); } +// CHECK-LABEL: @vbitseti_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vbitseti_w(v4u32 _1) { return __lsx_vbitseti_w(_1, 1); } +// CHECK-LABEL: @vbitseti_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vbitseti_d(v2u64 _1) { return __lsx_vbitseti_d(_1, 1); } +// CHECK-LABEL: @vbitrev_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vbitrev_b(v16u8 _1, v16u8 _2) { return __lsx_vbitrev_b(_1, _2); } +// CHECK-LABEL: @vbitrev_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vbitrev_h(v8u16 _1, v8u16 _2) { return __lsx_vbitrev_h(_1, _2); } +// CHECK-LABEL: @vbitrev_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vbitrev_w(v4u32 _1, v4u32 _2) { return __lsx_vbitrev_w(_1, _2); } +// CHECK-LABEL: @vbitrev_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vbitrev_d(v2u64 _1, v2u64 _2) { return __lsx_vbitrev_d(_1, _2); } +// CHECK-LABEL: @vbitrevi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vbitrevi_b(v16u8 _1) { return __lsx_vbitrevi_b(_1, 1); } +// CHECK-LABEL: @vbitrevi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8u16 vbitrevi_h(v8u16 _1) { return __lsx_vbitrevi_h(_1, 1); } +// CHECK-LABEL: @vbitrevi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vbitrevi_w(v4u32 _1) { return __lsx_vbitrevi_w(_1, 1); } +// CHECK-LABEL: @vbitrevi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vbitrevi_d(v2u64 _1) { return __lsx_vbitrevi_d(_1, 1); } +// CHECK-LABEL: @vadd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vadd_b(v16i8 _1, v16i8 _2) { return __lsx_vadd_b(_1, _2); } +// CHECK-LABEL: @vadd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vadd_h(v8i16 _1, v8i16 _2) { return __lsx_vadd_h(_1, _2); } +// CHECK-LABEL: @vadd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vadd_w(v4i32 _1, v4i32 _2) { return __lsx_vadd_w(_1, _2); } +// CHECK-LABEL: @vadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vadd_d(v2i64 _1, v2i64 _2) { return __lsx_vadd_d(_1, _2); } +// CHECK-LABEL: @vaddi_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vaddi_bu(v16i8 _1) { return __lsx_vaddi_bu(_1, 1); } +// CHECK-LABEL: @vaddi_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vaddi_hu(v8i16 _1) { return __lsx_vaddi_hu(_1, 1); } +// CHECK-LABEL: @vaddi_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vaddi_wu(v4i32 _1) { return __lsx_vaddi_wu(_1, 1); } +// CHECK-LABEL: @vaddi_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vaddi_du(v2i64 _1) { return __lsx_vaddi_du(_1, 1); } +// CHECK-LABEL: @vsub_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsub_b(v16i8 _1, v16i8 _2) { return __lsx_vsub_b(_1, _2); } +// CHECK-LABEL: @vsub_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsub_h(v8i16 _1, v8i16 _2) { return __lsx_vsub_h(_1, _2); } +// CHECK-LABEL: @vsub_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsub_w(v4i32 _1, v4i32 _2) { return __lsx_vsub_w(_1, _2); } +// CHECK-LABEL: @vsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsub_d(v2i64 _1, v2i64 _2) { return __lsx_vsub_d(_1, _2); } +// CHECK-LABEL: @vsubi_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vsubi_bu(v16i8 _1) { return __lsx_vsubi_bu(_1, 1); } +// CHECK-LABEL: @vsubi_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vsubi_hu(v8i16 _1) { return __lsx_vsubi_hu(_1, 1); } +// CHECK-LABEL: @vsubi_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vsubi_wu(v4i32 _1) { return __lsx_vsubi_wu(_1, 1); } +// CHECK-LABEL: @vsubi_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vsubi_du(v2i64 _1) { return __lsx_vsubi_du(_1, 1); } +// CHECK-LABEL: @vmax_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vmax_b(v16i8 _1, v16i8 _2) { return __lsx_vmax_b(_1, _2); } +// CHECK-LABEL: @vmax_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmax_h(v8i16 _1, v8i16 _2) { return __lsx_vmax_h(_1, _2); } +// CHECK-LABEL: @vmax_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmax_w(v4i32 _1, v4i32 _2) { return __lsx_vmax_w(_1, _2); } +// CHECK-LABEL: @vmax_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmax_d(v2i64 _1, v2i64 _2) { return __lsx_vmax_d(_1, _2); } +// CHECK-LABEL: @vmaxi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vmaxi_b(v16i8 _1) { return __lsx_vmaxi_b(_1, 1); } +// CHECK-LABEL: @vmaxi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vmaxi_h(v8i16 _1) { return __lsx_vmaxi_h(_1, 1); } +// CHECK-LABEL: @vmaxi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vmaxi_w(v4i32 _1) { return __lsx_vmaxi_w(_1, 1); } +// CHECK-LABEL: @vmaxi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vmaxi_d(v2i64 _1) { return __lsx_vmaxi_d(_1, 1); } +// CHECK-LABEL: @vmax_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vmax_bu(v16u8 _1, v16u8 _2) { return __lsx_vmax_bu(_1, _2); } +// CHECK-LABEL: @vmax_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vmax_hu(v8u16 _1, v8u16 _2) { return __lsx_vmax_hu(_1, _2); } +// CHECK-LABEL: @vmax_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vmax_wu(v4u32 _1, v4u32 _2) { return __lsx_vmax_wu(_1, _2); } +// CHECK-LABEL: @vmax_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vmax_du(v2u64 _1, v2u64 _2) { return __lsx_vmax_du(_1, _2); } +// CHECK-LABEL: @vmaxi_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vmaxi_bu(v16u8 _1) { return __lsx_vmaxi_bu(_1, 1); } +// CHECK-LABEL: @vmaxi_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8u16 vmaxi_hu(v8u16 _1) { return __lsx_vmaxi_hu(_1, 1); } +// CHECK-LABEL: @vmaxi_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vmaxi_wu(v4u32 _1) { return __lsx_vmaxi_wu(_1, 1); } +// CHECK-LABEL: @vmaxi_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vmaxi_du(v2u64 _1) { return __lsx_vmaxi_du(_1, 1); } +// CHECK-LABEL: @vmin_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vmin_b(v16i8 _1, v16i8 _2) { return __lsx_vmin_b(_1, _2); } +// CHECK-LABEL: @vmin_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmin_h(v8i16 _1, v8i16 _2) { return __lsx_vmin_h(_1, _2); } +// CHECK-LABEL: @vmin_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmin_w(v4i32 _1, v4i32 _2) { return __lsx_vmin_w(_1, _2); } +// CHECK-LABEL: @vmin_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmin_d(v2i64 _1, v2i64 _2) { return __lsx_vmin_d(_1, _2); } +// CHECK-LABEL: @vmini_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vmini_b(v16i8 _1) { return __lsx_vmini_b(_1, 1); } +// CHECK-LABEL: @vmini_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vmini_h(v8i16 _1) { return __lsx_vmini_h(_1, 1); } +// CHECK-LABEL: @vmini_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vmini_w(v4i32 _1) { return __lsx_vmini_w(_1, 1); } +// CHECK-LABEL: @vmini_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vmini_d(v2i64 _1) { return __lsx_vmini_d(_1, 1); } +// CHECK-LABEL: @vmin_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vmin_bu(v16u8 _1, v16u8 _2) { return __lsx_vmin_bu(_1, _2); } +// CHECK-LABEL: @vmin_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vmin_hu(v8u16 _1, v8u16 _2) { return __lsx_vmin_hu(_1, _2); } +// CHECK-LABEL: @vmin_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vmin_wu(v4u32 _1, v4u32 _2) { return __lsx_vmin_wu(_1, _2); } +// CHECK-LABEL: @vmin_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vmin_du(v2u64 _1, v2u64 _2) { return __lsx_vmin_du(_1, _2); } +// CHECK-LABEL: @vmini_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vmini_bu(v16u8 _1) { return __lsx_vmini_bu(_1, 1); } +// CHECK-LABEL: @vmini_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8u16 vmini_hu(v8u16 _1) { return __lsx_vmini_hu(_1, 1); } +// CHECK-LABEL: @vmini_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vmini_wu(v4u32 _1) { return __lsx_vmini_wu(_1, 1); } +// CHECK-LABEL: @vmini_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vmini_du(v2u64 _1) { return __lsx_vmini_du(_1, 1); } +// CHECK-LABEL: @vseq_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vseq_b(v16i8 _1, v16i8 _2) { return __lsx_vseq_b(_1, _2); } +// CHECK-LABEL: @vseq_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vseq_h(v8i16 _1, v8i16 _2) { return __lsx_vseq_h(_1, _2); } +// CHECK-LABEL: @vseq_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vseq_w(v4i32 _1, v4i32 _2) { return __lsx_vseq_w(_1, _2); } +// CHECK-LABEL: @vseq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vseq_d(v2i64 _1, v2i64 _2) { return __lsx_vseq_d(_1, _2); } +// CHECK-LABEL: @vseqi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vseqi_b(v16i8 _1) { return __lsx_vseqi_b(_1, 1); } +// CHECK-LABEL: @vseqi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vseqi_h(v8i16 _1) { return __lsx_vseqi_h(_1, 1); } +// CHECK-LABEL: @vseqi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vseqi_w(v4i32 _1) { return __lsx_vseqi_w(_1, 1); } +// CHECK-LABEL: @vseqi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vseqi_d(v2i64 _1) { return __lsx_vseqi_d(_1, 1); } +// CHECK-LABEL: @vslti_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vslti_b(v16i8 _1) { return __lsx_vslti_b(_1, 1); } +// CHECK-LABEL: @vslt_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vslt_b(v16i8 _1, v16i8 _2) { return __lsx_vslt_b(_1, _2); } +// CHECK-LABEL: @vslt_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vslt_h(v8i16 _1, v8i16 _2) { return __lsx_vslt_h(_1, _2); } +// CHECK-LABEL: @vslt_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vslt_w(v4i32 _1, v4i32 _2) { return __lsx_vslt_w(_1, _2); } +// CHECK-LABEL: @vslt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vslt_d(v2i64 _1, v2i64 _2) { return __lsx_vslt_d(_1, _2); } +// CHECK-LABEL: @vslti_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vslti_h(v8i16 _1) { return __lsx_vslti_h(_1, 1); } +// CHECK-LABEL: @vslti_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vslti_w(v4i32 _1) { return __lsx_vslti_w(_1, 1); } +// CHECK-LABEL: @vslti_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vslti_d(v2i64 _1) { return __lsx_vslti_d(_1, 1); } +// CHECK-LABEL: @vslt_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vslt_bu(v16u8 _1, v16u8 _2) { return __lsx_vslt_bu(_1, _2); } +// CHECK-LABEL: @vslt_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vslt_hu(v8u16 _1, v8u16 _2) { return __lsx_vslt_hu(_1, _2); } +// CHECK-LABEL: @vslt_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vslt_wu(v4u32 _1, v4u32 _2) { return __lsx_vslt_wu(_1, _2); } +// CHECK-LABEL: @vslt_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vslt_du(v2u64 _1, v2u64 _2) { return __lsx_vslt_du(_1, _2); } +// CHECK-LABEL: @vslti_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vslti_bu(v16u8 _1) { return __lsx_vslti_bu(_1, 1); } +// CHECK-LABEL: @vslti_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vslti_hu(v8u16 _1) { return __lsx_vslti_hu(_1, 1); } +// CHECK-LABEL: @vslti_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vslti_wu(v4u32 _1) { return __lsx_vslti_wu(_1, 1); } +// CHECK-LABEL: @vslti_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vslti_du(v2u64 _1) { return __lsx_vslti_du(_1, 1); } +// CHECK-LABEL: @vsle_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsle_b(v16i8 _1, v16i8 _2) { return __lsx_vsle_b(_1, _2); } +// CHECK-LABEL: @vsle_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsle_h(v8i16 _1, v8i16 _2) { return __lsx_vsle_h(_1, _2); } +// CHECK-LABEL: @vsle_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsle_w(v4i32 _1, v4i32 _2) { return __lsx_vsle_w(_1, _2); } +// CHECK-LABEL: @vsle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsle_d(v2i64 _1, v2i64 _2) { return __lsx_vsle_d(_1, _2); } +// CHECK-LABEL: @vslei_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vslei_b(v16i8 _1) { return __lsx_vslei_b(_1, 1); } +// CHECK-LABEL: @vslei_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vslei_h(v8i16 _1) { return __lsx_vslei_h(_1, 1); } +// CHECK-LABEL: @vslei_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vslei_w(v4i32 _1) { return __lsx_vslei_w(_1, 1); } +// CHECK-LABEL: @vslei_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vslei_d(v2i64 _1) { return __lsx_vslei_d(_1, 1); } +// CHECK-LABEL: @vsle_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsle_bu(v16u8 _1, v16u8 _2) { return __lsx_vsle_bu(_1, _2); } +// CHECK-LABEL: @vsle_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsle_hu(v8u16 _1, v8u16 _2) { return __lsx_vsle_hu(_1, _2); } +// CHECK-LABEL: @vsle_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsle_wu(v4u32 _1, v4u32 _2) { return __lsx_vsle_wu(_1, _2); } +// CHECK-LABEL: @vsle_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsle_du(v2u64 _1, v2u64 _2) { return __lsx_vsle_du(_1, _2); } +// CHECK-LABEL: @vslei_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vslei_bu(v16u8 _1) { return __lsx_vslei_bu(_1, 1); } +// CHECK-LABEL: @vslei_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vslei_hu(v8u16 _1) { return __lsx_vslei_hu(_1, 1); } +// CHECK-LABEL: @vslei_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vslei_wu(v4u32 _1) { return __lsx_vslei_wu(_1, 1); } +// CHECK-LABEL: @vslei_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vslei_du(v2u64 _1) { return __lsx_vslei_du(_1, 1); } +// CHECK-LABEL: @vsat_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vsat_b(v16i8 _1) { return __lsx_vsat_b(_1, 1); } +// CHECK-LABEL: @vsat_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vsat_h(v8i16 _1) { return __lsx_vsat_h(_1, 1); } +// CHECK-LABEL: @vsat_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vsat_w(v4i32 _1) { return __lsx_vsat_w(_1, 1); } +// CHECK-LABEL: @vsat_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vsat_d(v2i64 _1) { return __lsx_vsat_d(_1, 1); } +// CHECK-LABEL: @vsat_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vsat_bu(v16u8 _1) { return __lsx_vsat_bu(_1, 1); } +// CHECK-LABEL: @vsat_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8u16 vsat_hu(v8u16 _1) { return __lsx_vsat_hu(_1, 1); } +// CHECK-LABEL: @vsat_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vsat_wu(v4u32 _1) { return __lsx_vsat_wu(_1, 1); } +// CHECK-LABEL: @vsat_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vsat_du(v2u64 _1) { return __lsx_vsat_du(_1, 1); } +// CHECK-LABEL: @vadda_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vadda_b(v16i8 _1, v16i8 _2) { return __lsx_vadda_b(_1, _2); } +// CHECK-LABEL: @vadda_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vadda_h(v8i16 _1, v8i16 _2) { return __lsx_vadda_h(_1, _2); } +// CHECK-LABEL: @vadda_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vadda_w(v4i32 _1, v4i32 _2) { return __lsx_vadda_w(_1, _2); } +// CHECK-LABEL: @vadda_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vadda_d(v2i64 _1, v2i64 _2) { return __lsx_vadda_d(_1, _2); } +// CHECK-LABEL: @vsadd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsadd_b(v16i8 _1, v16i8 _2) { return __lsx_vsadd_b(_1, _2); } +// CHECK-LABEL: @vsadd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsadd_h(v8i16 _1, v8i16 _2) { return __lsx_vsadd_h(_1, _2); } +// CHECK-LABEL: @vsadd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsadd_w(v4i32 _1, v4i32 _2) { return __lsx_vsadd_w(_1, _2); } +// CHECK-LABEL: @vsadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsadd_d(v2i64 _1, v2i64 _2) { return __lsx_vsadd_d(_1, _2); } +// CHECK-LABEL: @vsadd_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vsadd_bu(v16u8 _1, v16u8 _2) { return __lsx_vsadd_bu(_1, _2); } +// CHECK-LABEL: @vsadd_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vsadd_hu(v8u16 _1, v8u16 _2) { return __lsx_vsadd_hu(_1, _2); } +// CHECK-LABEL: @vsadd_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vsadd_wu(v4u32 _1, v4u32 _2) { return __lsx_vsadd_wu(_1, _2); } +// CHECK-LABEL: @vsadd_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vsadd_du(v2u64 _1, v2u64 _2) { return __lsx_vsadd_du(_1, _2); } +// CHECK-LABEL: @vavg_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vavg_b(v16i8 _1, v16i8 _2) { return __lsx_vavg_b(_1, _2); } +// CHECK-LABEL: @vavg_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vavg_h(v8i16 _1, v8i16 _2) { return __lsx_vavg_h(_1, _2); } +// CHECK-LABEL: @vavg_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vavg_w(v4i32 _1, v4i32 _2) { return __lsx_vavg_w(_1, _2); } +// CHECK-LABEL: @vavg_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vavg_d(v2i64 _1, v2i64 _2) { return __lsx_vavg_d(_1, _2); } +// CHECK-LABEL: @vavg_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vavg_bu(v16u8 _1, v16u8 _2) { return __lsx_vavg_bu(_1, _2); } +// CHECK-LABEL: @vavg_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vavg_hu(v8u16 _1, v8u16 _2) { return __lsx_vavg_hu(_1, _2); } +// CHECK-LABEL: @vavg_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vavg_wu(v4u32 _1, v4u32 _2) { return __lsx_vavg_wu(_1, _2); } +// CHECK-LABEL: @vavg_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vavg_du(v2u64 _1, v2u64 _2) { return __lsx_vavg_du(_1, _2); } +// CHECK-LABEL: @vavgr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vavgr_b(v16i8 _1, v16i8 _2) { return __lsx_vavgr_b(_1, _2); } +// CHECK-LABEL: @vavgr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vavgr_h(v8i16 _1, v8i16 _2) { return __lsx_vavgr_h(_1, _2); } +// CHECK-LABEL: @vavgr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vavgr_w(v4i32 _1, v4i32 _2) { return __lsx_vavgr_w(_1, _2); } +// CHECK-LABEL: @vavgr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vavgr_d(v2i64 _1, v2i64 _2) { return __lsx_vavgr_d(_1, _2); } +// CHECK-LABEL: @vavgr_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vavgr_bu(v16u8 _1, v16u8 _2) { return __lsx_vavgr_bu(_1, _2); } +// CHECK-LABEL: @vavgr_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vavgr_hu(v8u16 _1, v8u16 _2) { return __lsx_vavgr_hu(_1, _2); } +// CHECK-LABEL: @vavgr_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vavgr_wu(v4u32 _1, v4u32 _2) { return __lsx_vavgr_wu(_1, _2); } +// CHECK-LABEL: @vavgr_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vavgr_du(v2u64 _1, v2u64 _2) { return __lsx_vavgr_du(_1, _2); } +// CHECK-LABEL: @vssub_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vssub_b(v16i8 _1, v16i8 _2) { return __lsx_vssub_b(_1, _2); } +// CHECK-LABEL: @vssub_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vssub_h(v8i16 _1, v8i16 _2) { return __lsx_vssub_h(_1, _2); } +// CHECK-LABEL: @vssub_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vssub_w(v4i32 _1, v4i32 _2) { return __lsx_vssub_w(_1, _2); } +// CHECK-LABEL: @vssub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vssub_d(v2i64 _1, v2i64 _2) { return __lsx_vssub_d(_1, _2); } +// CHECK-LABEL: @vssub_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vssub_bu(v16u8 _1, v16u8 _2) { return __lsx_vssub_bu(_1, _2); } +// CHECK-LABEL: @vssub_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vssub_hu(v8u16 _1, v8u16 _2) { return __lsx_vssub_hu(_1, _2); } +// CHECK-LABEL: @vssub_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vssub_wu(v4u32 _1, v4u32 _2) { return __lsx_vssub_wu(_1, _2); } +// CHECK-LABEL: @vssub_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vssub_du(v2u64 _1, v2u64 _2) { return __lsx_vssub_du(_1, _2); } +// CHECK-LABEL: @vabsd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vabsd_b(v16i8 _1, v16i8 _2) { return __lsx_vabsd_b(_1, _2); } +// CHECK-LABEL: @vabsd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vabsd_h(v8i16 _1, v8i16 _2) { return __lsx_vabsd_h(_1, _2); } +// CHECK-LABEL: @vabsd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vabsd_w(v4i32 _1, v4i32 _2) { return __lsx_vabsd_w(_1, _2); } +// CHECK-LABEL: @vabsd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vabsd_d(v2i64 _1, v2i64 _2) { return __lsx_vabsd_d(_1, _2); } +// CHECK-LABEL: @vabsd_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vabsd_bu(v16u8 _1, v16u8 _2) { return __lsx_vabsd_bu(_1, _2); } +// CHECK-LABEL: @vabsd_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vabsd_hu(v8u16 _1, v8u16 _2) { return __lsx_vabsd_hu(_1, _2); } +// CHECK-LABEL: @vabsd_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vabsd_wu(v4u32 _1, v4u32 _2) { return __lsx_vabsd_wu(_1, _2); } +// CHECK-LABEL: @vabsd_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vabsd_du(v2u64 _1, v2u64 _2) { return __lsx_vabsd_du(_1, _2); } +// CHECK-LABEL: @vmul_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vmul_b(v16i8 _1, v16i8 _2) { return __lsx_vmul_b(_1, _2); } +// CHECK-LABEL: @vmul_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmul_h(v8i16 _1, v8i16 _2) { return __lsx_vmul_h(_1, _2); } +// CHECK-LABEL: @vmul_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmul_w(v4i32 _1, v4i32 _2) { return __lsx_vmul_w(_1, _2); } +// CHECK-LABEL: @vmul_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmul_d(v2i64 _1, v2i64 _2) { return __lsx_vmul_d(_1, _2); } +// CHECK-LABEL: @vmadd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v16i8 vmadd_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __lsx_vmadd_b(_1, _2, _3); +} +// CHECK-LABEL: @vmadd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8i16 vmadd_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __lsx_vmadd_h(_1, _2, _3); +} +// CHECK-LABEL: @vmadd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4i32 vmadd_w(v4i32 _1, v4i32 _2, v4i32 _3) { + return __lsx_vmadd_w(_1, _2, _3); +} +// CHECK-LABEL: @vmadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmadd_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __lsx_vmadd_d(_1, _2, _3); +} +// CHECK-LABEL: @vmsub_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v16i8 vmsub_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __lsx_vmsub_b(_1, _2, _3); +} +// CHECK-LABEL: @vmsub_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8i16 vmsub_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __lsx_vmsub_h(_1, _2, _3); +} +// CHECK-LABEL: @vmsub_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4i32 vmsub_w(v4i32 _1, v4i32 _2, v4i32 _3) { + return __lsx_vmsub_w(_1, _2, _3); +} +// CHECK-LABEL: @vmsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmsub_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __lsx_vmsub_d(_1, _2, _3); +} +// CHECK-LABEL: @vdiv_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vdiv_b(v16i8 _1, v16i8 _2) { return __lsx_vdiv_b(_1, _2); } +// CHECK-LABEL: @vdiv_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vdiv_h(v8i16 _1, v8i16 _2) { return __lsx_vdiv_h(_1, _2); } +// CHECK-LABEL: @vdiv_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vdiv_w(v4i32 _1, v4i32 _2) { return __lsx_vdiv_w(_1, _2); } +// CHECK-LABEL: @vdiv_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vdiv_d(v2i64 _1, v2i64 _2) { return __lsx_vdiv_d(_1, _2); } +// CHECK-LABEL: @vdiv_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vdiv_bu(v16u8 _1, v16u8 _2) { return __lsx_vdiv_bu(_1, _2); } +// CHECK-LABEL: @vdiv_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vdiv_hu(v8u16 _1, v8u16 _2) { return __lsx_vdiv_hu(_1, _2); } +// CHECK-LABEL: @vdiv_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vdiv_wu(v4u32 _1, v4u32 _2) { return __lsx_vdiv_wu(_1, _2); } +// CHECK-LABEL: @vdiv_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vdiv_du(v2u64 _1, v2u64 _2) { return __lsx_vdiv_du(_1, _2); } +// CHECK-LABEL: @vhaddw_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vhaddw_h_b(v16i8 _1, v16i8 _2) { return __lsx_vhaddw_h_b(_1, _2); } +// CHECK-LABEL: @vhaddw_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vhaddw_w_h(v8i16 _1, v8i16 _2) { return __lsx_vhaddw_w_h(_1, _2); } +// CHECK-LABEL: @vhaddw_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vhaddw_d_w(v4i32 _1, v4i32 _2) { return __lsx_vhaddw_d_w(_1, _2); } +// CHECK-LABEL: @vhaddw_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vhaddw_hu_bu(v16u8 _1, v16u8 _2) { return __lsx_vhaddw_hu_bu(_1, _2); } +// CHECK-LABEL: @vhaddw_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vhaddw_wu_hu(v8u16 _1, v8u16 _2) { return __lsx_vhaddw_wu_hu(_1, _2); } +// CHECK-LABEL: @vhaddw_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vhaddw_du_wu(v4u32 _1, v4u32 _2) { return __lsx_vhaddw_du_wu(_1, _2); } +// CHECK-LABEL: @vhsubw_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vhsubw_h_b(v16i8 _1, v16i8 _2) { return __lsx_vhsubw_h_b(_1, _2); } +// CHECK-LABEL: @vhsubw_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vhsubw_w_h(v8i16 _1, v8i16 _2) { return __lsx_vhsubw_w_h(_1, _2); } +// CHECK-LABEL: @vhsubw_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vhsubw_d_w(v4i32 _1, v4i32 _2) { return __lsx_vhsubw_d_w(_1, _2); } +// CHECK-LABEL: @vhsubw_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vhsubw_hu_bu(v16u8 _1, v16u8 _2) { return __lsx_vhsubw_hu_bu(_1, _2); } +// CHECK-LABEL: @vhsubw_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vhsubw_wu_hu(v8u16 _1, v8u16 _2) { return __lsx_vhsubw_wu_hu(_1, _2); } +// CHECK-LABEL: @vhsubw_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vhsubw_du_wu(v4u32 _1, v4u32 _2) { return __lsx_vhsubw_du_wu(_1, _2); } +// CHECK-LABEL: @vmod_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vmod_b(v16i8 _1, v16i8 _2) { return __lsx_vmod_b(_1, _2); } +// CHECK-LABEL: @vmod_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmod_h(v8i16 _1, v8i16 _2) { return __lsx_vmod_h(_1, _2); } +// CHECK-LABEL: @vmod_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmod_w(v4i32 _1, v4i32 _2) { return __lsx_vmod_w(_1, _2); } +// CHECK-LABEL: @vmod_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmod_d(v2i64 _1, v2i64 _2) { return __lsx_vmod_d(_1, _2); } +// CHECK-LABEL: @vmod_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vmod_bu(v16u8 _1, v16u8 _2) { return __lsx_vmod_bu(_1, _2); } +// CHECK-LABEL: @vmod_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vmod_hu(v8u16 _1, v8u16 _2) { return __lsx_vmod_hu(_1, _2); } +// CHECK-LABEL: @vmod_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vmod_wu(v4u32 _1, v4u32 _2) { return __lsx_vmod_wu(_1, _2); } +// CHECK-LABEL: @vmod_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vmod_du(v2u64 _1, v2u64 _2) { return __lsx_vmod_du(_1, _2); } +// CHECK-LABEL: @vreplve_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> [[TMP0]], i32 [[_2:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vreplve_b(v16i8 _1, int _2) { return __lsx_vreplve_b(_1, _2); } +// CHECK-LABEL: @vreplve_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> [[TMP0]], i32 [[_2:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vreplve_h(v8i16 _1, int _2) { return __lsx_vreplve_h(_1, _2); } +// CHECK-LABEL: @vreplve_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> [[TMP0]], i32 [[_2:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vreplve_w(v4i32 _1, int _2) { return __lsx_vreplve_w(_1, _2); } +// CHECK-LABEL: @vreplve_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> [[TMP0]], i32 [[_2:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vreplve_d(v2i64 _1, int _2) { return __lsx_vreplve_d(_1, _2); } +// CHECK-LABEL: @vreplvei_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vreplvei_b(v16i8 _1) { return __lsx_vreplvei_b(_1, 1); } +// CHECK-LABEL: @vreplvei_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vreplvei_h(v8i16 _1) { return __lsx_vreplvei_h(_1, 1); } +// CHECK-LABEL: @vreplvei_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vreplvei_w(v4i32 _1) { return __lsx_vreplvei_w(_1, 1); } +// CHECK-LABEL: @vreplvei_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vreplvei_d(v2i64 _1) { return __lsx_vreplvei_d(_1, 1); } +// CHECK-LABEL: @vpickev_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vpickev_b(v16i8 _1, v16i8 _2) { return __lsx_vpickev_b(_1, _2); } +// CHECK-LABEL: @vpickev_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vpickev_h(v8i16 _1, v8i16 _2) { return __lsx_vpickev_h(_1, _2); } +// CHECK-LABEL: @vpickev_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vpickev_w(v4i32 _1, v4i32 _2) { return __lsx_vpickev_w(_1, _2); } +// CHECK-LABEL: @vpickev_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vpickev_d(v2i64 _1, v2i64 _2) { return __lsx_vpickev_d(_1, _2); } +// CHECK-LABEL: @vpickod_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vpickod_b(v16i8 _1, v16i8 _2) { return __lsx_vpickod_b(_1, _2); } +// CHECK-LABEL: @vpickod_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vpickod_h(v8i16 _1, v8i16 _2) { return __lsx_vpickod_h(_1, _2); } +// CHECK-LABEL: @vpickod_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vpickod_w(v4i32 _1, v4i32 _2) { return __lsx_vpickod_w(_1, _2); } +// CHECK-LABEL: @vpickod_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vpickod_d(v2i64 _1, v2i64 _2) { return __lsx_vpickod_d(_1, _2); } +// CHECK-LABEL: @vilvh_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vilvh_b(v16i8 _1, v16i8 _2) { return __lsx_vilvh_b(_1, _2); } +// CHECK-LABEL: @vilvh_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vilvh_h(v8i16 _1, v8i16 _2) { return __lsx_vilvh_h(_1, _2); } +// CHECK-LABEL: @vilvh_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vilvh_w(v4i32 _1, v4i32 _2) { return __lsx_vilvh_w(_1, _2); } +// CHECK-LABEL: @vilvh_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vilvh_d(v2i64 _1, v2i64 _2) { return __lsx_vilvh_d(_1, _2); } +// CHECK-LABEL: @vilvl_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vilvl_b(v16i8 _1, v16i8 _2) { return __lsx_vilvl_b(_1, _2); } +// CHECK-LABEL: @vilvl_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vilvl_h(v8i16 _1, v8i16 _2) { return __lsx_vilvl_h(_1, _2); } +// CHECK-LABEL: @vilvl_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vilvl_w(v4i32 _1, v4i32 _2) { return __lsx_vilvl_w(_1, _2); } +// CHECK-LABEL: @vilvl_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vilvl_d(v2i64 _1, v2i64 _2) { return __lsx_vilvl_d(_1, _2); } +// CHECK-LABEL: @vpackev_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vpackev_b(v16i8 _1, v16i8 _2) { return __lsx_vpackev_b(_1, _2); } +// CHECK-LABEL: @vpackev_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vpackev_h(v8i16 _1, v8i16 _2) { return __lsx_vpackev_h(_1, _2); } +// CHECK-LABEL: @vpackev_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vpackev_w(v4i32 _1, v4i32 _2) { return __lsx_vpackev_w(_1, _2); } +// CHECK-LABEL: @vpackev_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vpackev_d(v2i64 _1, v2i64 _2) { return __lsx_vpackev_d(_1, _2); } +// CHECK-LABEL: @vpackod_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vpackod_b(v16i8 _1, v16i8 _2) { return __lsx_vpackod_b(_1, _2); } +// CHECK-LABEL: @vpackod_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vpackod_h(v8i16 _1, v8i16 _2) { return __lsx_vpackod_h(_1, _2); } +// CHECK-LABEL: @vpackod_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vpackod_w(v4i32 _1, v4i32 _2) { return __lsx_vpackod_w(_1, _2); } +// CHECK-LABEL: @vpackod_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vpackod_d(v2i64 _1, v2i64 _2) { return __lsx_vpackod_d(_1, _2); } +// CHECK-LABEL: @vshuf_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8i16 vshuf_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __lsx_vshuf_h(_1, _2, _3); +} +// CHECK-LABEL: @vshuf_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4i32 vshuf_w(v4i32 _1, v4i32 _2, v4i32 _3) { + return __lsx_vshuf_w(_1, _2, _3); +} +// CHECK-LABEL: @vshuf_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vshuf_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __lsx_vshuf_d(_1, _2, _3); +} +// CHECK-LABEL: @vand_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vand_v(v16u8 _1, v16u8 _2) { return __lsx_vand_v(_1, _2); } +// CHECK-LABEL: @vandi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vandi_b(v16u8 _1) { return __lsx_vandi_b(_1, 1); } +// CHECK-LABEL: @vor_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vor_v(v16u8 _1, v16u8 _2) { return __lsx_vor_v(_1, _2); } +// CHECK-LABEL: @vori_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vori_b(v16u8 _1) { return __lsx_vori_b(_1, 1); } +// CHECK-LABEL: @vnor_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vnor_v(v16u8 _1, v16u8 _2) { return __lsx_vnor_v(_1, _2); } +// CHECK-LABEL: @vnori_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vnori_b(v16u8 _1) { return __lsx_vnori_b(_1, 1); } +// CHECK-LABEL: @vxor_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vxor_v(v16u8 _1, v16u8 _2) { return __lsx_vxor_v(_1, _2); } +// CHECK-LABEL: @vxori_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vxori_b(v16u8 _1) { return __lsx_vxori_b(_1, 1); } +// CHECK-LABEL: @vbitsel_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v16u8 vbitsel_v(v16u8 _1, v16u8 _2, v16u8 _3) { + return __lsx_vbitsel_v(_1, _2, _3); +} +// CHECK-LABEL: @vbitseli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vbitseli_b(v16u8 _1, v16u8 _2) { return __lsx_vbitseli_b(_1, _2, 1); } +// CHECK-LABEL: @vshuf4i_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vshuf4i_b(v16i8 _1) { return __lsx_vshuf4i_b(_1, 1); } +// CHECK-LABEL: @vshuf4i_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vshuf4i_h(v8i16 _1) { return __lsx_vshuf4i_h(_1, 1); } +// CHECK-LABEL: @vshuf4i_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vshuf4i_w(v4i32 _1) { return __lsx_vshuf4i_w(_1, 1); } +// CHECK-LABEL: @vreplgr2vr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v16i8 vreplgr2vr_b(int _1) { return __lsx_vreplgr2vr_b(_1); } +// CHECK-LABEL: @vreplgr2vr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v8i16 vreplgr2vr_h(int _1) { return __lsx_vreplgr2vr_h(_1); } +// CHECK-LABEL: @vreplgr2vr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v4i32 vreplgr2vr_w(int _1) { return __lsx_vreplgr2vr_w(_1); } +// CHECK-LABEL: @vreplgr2vr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v2i64 vreplgr2vr_d(long _1) { return __lsx_vreplgr2vr_d(_1); } +// CHECK-LABEL: @vpcnt_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vpcnt_b(v16i8 _1) { return __lsx_vpcnt_b(_1); } +// CHECK-LABEL: @vpcnt_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vpcnt_h(v8i16 _1) { return __lsx_vpcnt_h(_1); } +// CHECK-LABEL: @vpcnt_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vpcnt_w(v4i32 _1) { return __lsx_vpcnt_w(_1); } +// CHECK-LABEL: @vpcnt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vpcnt_d(v2i64 _1) { return __lsx_vpcnt_d(_1); } +// CHECK-LABEL: @vclo_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vclo_b(v16i8 _1) { return __lsx_vclo_b(_1); } +// CHECK-LABEL: @vclo_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vclo_h(v8i16 _1) { return __lsx_vclo_h(_1); } +// CHECK-LABEL: @vclo_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vclo_w(v4i32 _1) { return __lsx_vclo_w(_1); } +// CHECK-LABEL: @vclo_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vclo_d(v2i64 _1) { return __lsx_vclo_d(_1); } +// CHECK-LABEL: @vclz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vclz_b(v16i8 _1) { return __lsx_vclz_b(_1); } +// CHECK-LABEL: @vclz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vclz_h(v8i16 _1) { return __lsx_vclz_h(_1); } +// CHECK-LABEL: @vclz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vclz_w(v4i32 _1) { return __lsx_vclz_w(_1); } +// CHECK-LABEL: @vclz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vclz_d(v2i64 _1) { return __lsx_vclz_d(_1); } +// CHECK-LABEL: @vpickve2gr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int vpickve2gr_b(v16i8 _1) { return __lsx_vpickve2gr_b(_1, 1); } +// CHECK-LABEL: @vpickve2gr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int vpickve2gr_h(v8i16 _1) { return __lsx_vpickve2gr_h(_1, 1); } +// CHECK-LABEL: @vpickve2gr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int vpickve2gr_w(v4i32 _1) { return __lsx_vpickve2gr_w(_1, 1); } +// CHECK-LABEL: @vpickve2gr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: ret i64 [[TMP1]] +// +long vpickve2gr_d(v2i64 _1) { return __lsx_vpickve2gr_d(_1, 1); } +// CHECK-LABEL: @vpickve2gr_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: ret i32 [[TMP1]] +// +unsigned int vpickve2gr_bu(v16i8 _1) { return __lsx_vpickve2gr_bu(_1, 1); } +// CHECK-LABEL: @vpickve2gr_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: ret i32 [[TMP1]] +// +unsigned int vpickve2gr_hu(v8i16 _1) { return __lsx_vpickve2gr_hu(_1, 1); } +// CHECK-LABEL: @vpickve2gr_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: ret i32 [[TMP1]] +// +unsigned int vpickve2gr_wu(v4i32 _1) { return __lsx_vpickve2gr_wu(_1, 1); } +// CHECK-LABEL: @vpickve2gr_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: ret i64 [[TMP1]] +// +unsigned long int vpickve2gr_du(v2i64 _1) { return __lsx_vpickve2gr_du(_1, 1); } +// CHECK-LABEL: @vinsgr2vr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> [[TMP0]], i32 1, i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vinsgr2vr_b(v16i8 _1) { return __lsx_vinsgr2vr_b(_1, 1, 1); } +// CHECK-LABEL: @vinsgr2vr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> [[TMP0]], i32 1, i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vinsgr2vr_h(v8i16 _1) { return __lsx_vinsgr2vr_h(_1, 1, 1); } +// CHECK-LABEL: @vinsgr2vr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> [[TMP0]], i32 1, i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vinsgr2vr_w(v4i32 _1) { return __lsx_vinsgr2vr_w(_1, 1, 1); } +// CHECK-LABEL: @vinsgr2vr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> [[TMP0]], i64 1, i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vinsgr2vr_d(v2i64 _1) { return __lsx_vinsgr2vr_d(_1, 1, 1); } +// CHECK-LABEL: @vfadd_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vfadd_s(v4f32 _1, v4f32 _2) { return __lsx_vfadd_s(_1, _2); } +// CHECK-LABEL: @vfadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2f64 vfadd_d(v2f64 _1, v2f64 _2) { return __lsx_vfadd_d(_1, _2); } +// CHECK-LABEL: @vfsub_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vfsub_s(v4f32 _1, v4f32 _2) { return __lsx_vfsub_s(_1, _2); } +// CHECK-LABEL: @vfsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2f64 vfsub_d(v2f64 _1, v2f64 _2) { return __lsx_vfsub_d(_1, _2); } +// CHECK-LABEL: @vfmul_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vfmul_s(v4f32 _1, v4f32 _2) { return __lsx_vfmul_s(_1, _2); } +// CHECK-LABEL: @vfmul_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2f64 vfmul_d(v2f64 _1, v2f64 _2) { return __lsx_vfmul_d(_1, _2); } +// CHECK-LABEL: @vfdiv_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vfdiv_s(v4f32 _1, v4f32 _2) { return __lsx_vfdiv_s(_1, _2); } +// CHECK-LABEL: @vfdiv_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2f64 vfdiv_d(v2f64 _1, v2f64 _2) { return __lsx_vfdiv_d(_1, _2); } +// CHECK-LABEL: @vfcvt_h_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vfcvt_h_s(v4f32 _1, v4f32 _2) { return __lsx_vfcvt_h_s(_1, _2); } +// CHECK-LABEL: @vfcvt_s_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vfcvt_s_d(v2f64 _1, v2f64 _2) { return __lsx_vfcvt_s_d(_1, _2); } +// CHECK-LABEL: @vfmin_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vfmin_s(v4f32 _1, v4f32 _2) { return __lsx_vfmin_s(_1, _2); } +// CHECK-LABEL: @vfmin_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2f64 vfmin_d(v2f64 _1, v2f64 _2) { return __lsx_vfmin_d(_1, _2); } +// CHECK-LABEL: @vfmina_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vfmina_s(v4f32 _1, v4f32 _2) { return __lsx_vfmina_s(_1, _2); } +// CHECK-LABEL: @vfmina_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2f64 vfmina_d(v2f64 _1, v2f64 _2) { return __lsx_vfmina_d(_1, _2); } +// CHECK-LABEL: @vfmax_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vfmax_s(v4f32 _1, v4f32 _2) { return __lsx_vfmax_s(_1, _2); } +// CHECK-LABEL: @vfmax_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2f64 vfmax_d(v2f64 _1, v2f64 _2) { return __lsx_vfmax_d(_1, _2); } +// CHECK-LABEL: @vfmaxa_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vfmaxa_s(v4f32 _1, v4f32 _2) { return __lsx_vfmaxa_s(_1, _2); } +// CHECK-LABEL: @vfmaxa_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2f64 vfmaxa_d(v2f64 _1, v2f64 _2) { return __lsx_vfmaxa_d(_1, _2); } +// CHECK-LABEL: @vfclass_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vfclass_s(v4f32 _1) { return __lsx_vfclass_s(_1); } +// CHECK-LABEL: @vfclass_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vfclass_d(v2f64 _1) { return __lsx_vfclass_d(_1); } +// CHECK-LABEL: @vfsqrt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vfsqrt_s(v4f32 _1) { return __lsx_vfsqrt_s(_1); } +// CHECK-LABEL: @vfsqrt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vfsqrt_d(v2f64 _1) { return __lsx_vfsqrt_d(_1); } +// CHECK-LABEL: @vfrecip_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vfrecip_s(v4f32 _1) { return __lsx_vfrecip_s(_1); } +// CHECK-LABEL: @vfrecip_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vfrecip_d(v2f64 _1) { return __lsx_vfrecip_d(_1); } +// CHECK-LABEL: @vfrint_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vfrint_s(v4f32 _1) { return __lsx_vfrint_s(_1); } +// CHECK-LABEL: @vfrint_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vfrint_d(v2f64 _1) { return __lsx_vfrint_d(_1); } +// CHECK-LABEL: @vfrsqrt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vfrsqrt_s(v4f32 _1) { return __lsx_vfrsqrt_s(_1); } +// CHECK-LABEL: @vfrsqrt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vfrsqrt_d(v2f64 _1) { return __lsx_vfrsqrt_d(_1); } +// CHECK-LABEL: @vflogb_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vflogb_s(v4f32 _1) { return __lsx_vflogb_s(_1); } +// CHECK-LABEL: @vflogb_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vflogb_d(v2f64 _1) { return __lsx_vflogb_d(_1); } +// CHECK-LABEL: @vfcvth_s_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vfcvth_s_h(v8i16 _1) { return __lsx_vfcvth_s_h(_1); } +// CHECK-LABEL: @vfcvth_d_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vfcvth_d_s(v4f32 _1) { return __lsx_vfcvth_d_s(_1); } +// CHECK-LABEL: @vfcvtl_s_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vfcvtl_s_h(v8i16 _1) { return __lsx_vfcvtl_s_h(_1); } +// CHECK-LABEL: @vfcvtl_d_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vfcvtl_d_s(v4f32 _1) { return __lsx_vfcvtl_d_s(_1); } +// CHECK-LABEL: @vftint_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vftint_w_s(v4f32 _1) { return __lsx_vftint_w_s(_1); } +// CHECK-LABEL: @vftint_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftint_l_d(v2f64 _1) { return __lsx_vftint_l_d(_1); } +// CHECK-LABEL: @vftint_wu_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vftint_wu_s(v4f32 _1) { return __lsx_vftint_wu_s(_1); } +// CHECK-LABEL: @vftint_lu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vftint_lu_d(v2f64 _1) { return __lsx_vftint_lu_d(_1); } +// CHECK-LABEL: @vftintrz_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vftintrz_w_s(v4f32 _1) { return __lsx_vftintrz_w_s(_1); } +// CHECK-LABEL: @vftintrz_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrz_l_d(v2f64 _1) { return __lsx_vftintrz_l_d(_1); } +// CHECK-LABEL: @vftintrz_wu_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vftintrz_wu_s(v4f32 _1) { return __lsx_vftintrz_wu_s(_1); } +// CHECK-LABEL: @vftintrz_lu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vftintrz_lu_d(v2f64 _1) { return __lsx_vftintrz_lu_d(_1); } +// CHECK-LABEL: @vffint_s_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vffint_s_w(v4i32 _1) { return __lsx_vffint_s_w(_1); } +// CHECK-LABEL: @vffint_d_l( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vffint_d_l(v2i64 _1) { return __lsx_vffint_d_l(_1); } +// CHECK-LABEL: @vffint_s_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vffint_s_wu(v4u32 _1) { return __lsx_vffint_s_wu(_1); } +// CHECK-LABEL: @vffint_d_lu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vffint_d_lu(v2u64 _1) { return __lsx_vffint_d_lu(_1); } +// CHECK-LABEL: @vandn_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vandn_v(v16u8 _1, v16u8 _2) { return __lsx_vandn_v(_1, _2); } +// CHECK-LABEL: @vneg_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vneg_b(v16i8 _1) { return __lsx_vneg_b(_1); } +// CHECK-LABEL: @vneg_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vneg_h(v8i16 _1) { return __lsx_vneg_h(_1); } +// CHECK-LABEL: @vneg_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vneg_w(v4i32 _1) { return __lsx_vneg_w(_1); } +// CHECK-LABEL: @vneg_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vneg_d(v2i64 _1) { return __lsx_vneg_d(_1); } +// CHECK-LABEL: @vmuh_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vmuh_b(v16i8 _1, v16i8 _2) { return __lsx_vmuh_b(_1, _2); } +// CHECK-LABEL: @vmuh_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmuh_h(v8i16 _1, v8i16 _2) { return __lsx_vmuh_h(_1, _2); } +// CHECK-LABEL: @vmuh_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmuh_w(v4i32 _1, v4i32 _2) { return __lsx_vmuh_w(_1, _2); } +// CHECK-LABEL: @vmuh_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmuh_d(v2i64 _1, v2i64 _2) { return __lsx_vmuh_d(_1, _2); } +// CHECK-LABEL: @vmuh_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vmuh_bu(v16u8 _1, v16u8 _2) { return __lsx_vmuh_bu(_1, _2); } +// CHECK-LABEL: @vmuh_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vmuh_hu(v8u16 _1, v8u16 _2) { return __lsx_vmuh_hu(_1, _2); } +// CHECK-LABEL: @vmuh_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vmuh_wu(v4u32 _1, v4u32 _2) { return __lsx_vmuh_wu(_1, _2); } +// CHECK-LABEL: @vmuh_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vmuh_du(v2u64 _1, v2u64 _2) { return __lsx_vmuh_du(_1, _2); } +// CHECK-LABEL: @vsllwil_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vsllwil_h_b(v16i8 _1) { return __lsx_vsllwil_h_b(_1, 1); } +// CHECK-LABEL: @vsllwil_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vsllwil_w_h(v8i16 _1) { return __lsx_vsllwil_w_h(_1, 1); } +// CHECK-LABEL: @vsllwil_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vsllwil_d_w(v4i32 _1) { return __lsx_vsllwil_d_w(_1, 1); } +// CHECK-LABEL: @vsllwil_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8u16 vsllwil_hu_bu(v16u8 _1) { return __lsx_vsllwil_hu_bu(_1, 1); } +// CHECK-LABEL: @vsllwil_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vsllwil_wu_hu(v8u16 _1) { return __lsx_vsllwil_wu_hu(_1, 1); } +// CHECK-LABEL: @vsllwil_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vsllwil_du_wu(v4u32 _1) { return __lsx_vsllwil_du_wu(_1, 1); } +// CHECK-LABEL: @vsran_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsran_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsran_b_h(_1, _2); } +// CHECK-LABEL: @vsran_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsran_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsran_h_w(_1, _2); } +// CHECK-LABEL: @vsran_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsran_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsran_w_d(_1, _2); } +// CHECK-LABEL: @vssran_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vssran_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssran_b_h(_1, _2); } +// CHECK-LABEL: @vssran_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vssran_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssran_h_w(_1, _2); } +// CHECK-LABEL: @vssran_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vssran_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssran_w_d(_1, _2); } +// CHECK-LABEL: @vssran_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vssran_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssran_bu_h(_1, _2); } +// CHECK-LABEL: @vssran_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vssran_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssran_hu_w(_1, _2); } +// CHECK-LABEL: @vssran_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vssran_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssran_wu_d(_1, _2); } +// CHECK-LABEL: @vsrarn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrarn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrarn_b_h(_1, _2); } +// CHECK-LABEL: @vsrarn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrarn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrarn_h_w(_1, _2); } +// CHECK-LABEL: @vsrarn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrarn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrarn_w_d(_1, _2); } +// CHECK-LABEL: @vssrarn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vssrarn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrarn_b_h(_1, _2); } +// CHECK-LABEL: @vssrarn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vssrarn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrarn_h_w(_1, _2); } +// CHECK-LABEL: @vssrarn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vssrarn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrarn_w_d(_1, _2); } +// CHECK-LABEL: @vssrarn_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vssrarn_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrarn_bu_h(_1, _2); } +// CHECK-LABEL: @vssrarn_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vssrarn_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrarn_hu_w(_1, _2); } +// CHECK-LABEL: @vssrarn_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vssrarn_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrarn_wu_d(_1, _2); } +// CHECK-LABEL: @vsrln_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrln_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrln_b_h(_1, _2); } +// CHECK-LABEL: @vsrln_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrln_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrln_h_w(_1, _2); } +// CHECK-LABEL: @vsrln_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrln_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrln_w_d(_1, _2); } +// CHECK-LABEL: @vssrln_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vssrln_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrln_bu_h(_1, _2); } +// CHECK-LABEL: @vssrln_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vssrln_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrln_hu_w(_1, _2); } +// CHECK-LABEL: @vssrln_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vssrln_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrln_wu_d(_1, _2); } +// CHECK-LABEL: @vsrlrn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrlrn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrlrn_b_h(_1, _2); } +// CHECK-LABEL: @vsrlrn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrlrn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrlrn_h_w(_1, _2); } +// CHECK-LABEL: @vsrlrn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrlrn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrlrn_w_d(_1, _2); } +// CHECK-LABEL: @vssrlrn_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vssrlrn_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrlrn_bu_h(_1, _2); } +// CHECK-LABEL: @vssrlrn_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vssrlrn_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrlrn_hu_w(_1, _2); } +// CHECK-LABEL: @vssrlrn_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vssrlrn_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrlrn_wu_d(_1, _2); } +// CHECK-LABEL: @vfrstpi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vfrstpi_b(v16i8 _1, v16i8 _2) { return __lsx_vfrstpi_b(_1, _2, 1); } +// CHECK-LABEL: @vfrstpi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vfrstpi_h(v8i16 _1, v8i16 _2) { return __lsx_vfrstpi_h(_1, _2, 1); } +// CHECK-LABEL: @vfrstp_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v16i8 vfrstp_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __lsx_vfrstp_b(_1, _2, _3); +} +// CHECK-LABEL: @vfrstp_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8i16 vfrstp_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __lsx_vfrstp_h(_1, _2, _3); +} +// CHECK-LABEL: @vshuf4i_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vshuf4i_d(v2i64 _1, v2i64 _2) { return __lsx_vshuf4i_d(_1, _2, 1); } +// CHECK-LABEL: @vbsrl_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vbsrl_v(v16i8 _1) { return __lsx_vbsrl_v(_1, 1); } +// CHECK-LABEL: @vbsll_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vbsll_v(v16i8 _1) { return __lsx_vbsll_v(_1, 1); } +// CHECK-LABEL: @vextrins_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vextrins_b(v16i8 _1, v16i8 _2) { return __lsx_vextrins_b(_1, _2, 1); } +// CHECK-LABEL: @vextrins_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vextrins_h(v8i16 _1, v8i16 _2) { return __lsx_vextrins_h(_1, _2, 1); } +// CHECK-LABEL: @vextrins_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vextrins_w(v4i32 _1, v4i32 _2) { return __lsx_vextrins_w(_1, _2, 1); } +// CHECK-LABEL: @vextrins_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vextrins_d(v2i64 _1, v2i64 _2) { return __lsx_vextrins_d(_1, _2, 1); } +// CHECK-LABEL: @vmskltz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vmskltz_b(v16i8 _1) { return __lsx_vmskltz_b(_1); } +// CHECK-LABEL: @vmskltz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vmskltz_h(v8i16 _1) { return __lsx_vmskltz_h(_1); } +// CHECK-LABEL: @vmskltz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vmskltz_w(v4i32 _1) { return __lsx_vmskltz_w(_1); } +// CHECK-LABEL: @vmskltz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vmskltz_d(v2i64 _1) { return __lsx_vmskltz_d(_1); } +// CHECK-LABEL: @vsigncov_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsigncov_b(v16i8 _1, v16i8 _2) { return __lsx_vsigncov_b(_1, _2); } +// CHECK-LABEL: @vsigncov_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsigncov_h(v8i16 _1, v8i16 _2) { return __lsx_vsigncov_h(_1, _2); } +// CHECK-LABEL: @vsigncov_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsigncov_w(v4i32 _1, v4i32 _2) { return __lsx_vsigncov_w(_1, _2); } +// CHECK-LABEL: @vsigncov_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsigncov_d(v2i64 _1, v2i64 _2) { return __lsx_vsigncov_d(_1, _2); } +// CHECK-LABEL: @vfmadd_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4f32 vfmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __lsx_vfmadd_s(_1, _2, _3); +} +// CHECK-LABEL: @vfmadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2f64 vfmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __lsx_vfmadd_d(_1, _2, _3); +} +// CHECK-LABEL: @vfmsub_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4f32 vfmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __lsx_vfmsub_s(_1, _2, _3); +} +// CHECK-LABEL: @vfmsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2f64 vfmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __lsx_vfmsub_d(_1, _2, _3); +} +// CHECK-LABEL: @vfnmadd_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4f32 vfnmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __lsx_vfnmadd_s(_1, _2, _3); +} +// CHECK-LABEL: @vfnmadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2f64 vfnmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __lsx_vfnmadd_d(_1, _2, _3); +} +// CHECK-LABEL: @vfnmsub_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4f32 vfnmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __lsx_vfnmsub_s(_1, _2, _3); +} +// CHECK-LABEL: @vfnmsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2f64 vfnmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __lsx_vfnmsub_d(_1, _2, _3); +} +// CHECK-LABEL: @vftintrne_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vftintrne_w_s(v4f32 _1) { return __lsx_vftintrne_w_s(_1); } +// CHECK-LABEL: @vftintrne_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrne_l_d(v2f64 _1) { return __lsx_vftintrne_l_d(_1); } +// CHECK-LABEL: @vftintrp_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vftintrp_w_s(v4f32 _1) { return __lsx_vftintrp_w_s(_1); } +// CHECK-LABEL: @vftintrp_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrp_l_d(v2f64 _1) { return __lsx_vftintrp_l_d(_1); } +// CHECK-LABEL: @vftintrm_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vftintrm_w_s(v4f32 _1) { return __lsx_vftintrm_w_s(_1); } +// CHECK-LABEL: @vftintrm_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrm_l_d(v2f64 _1) { return __lsx_vftintrm_l_d(_1); } +// CHECK-LABEL: @vftint_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vftint_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftint_w_d(_1, _2); } +// CHECK-LABEL: @vffint_s_l( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vffint_s_l(v2i64 _1, v2i64 _2) { return __lsx_vffint_s_l(_1, _2); } +// CHECK-LABEL: @vftintrz_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vftintrz_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrz_w_d(_1, _2); } +// CHECK-LABEL: @vftintrp_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vftintrp_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrp_w_d(_1, _2); } +// CHECK-LABEL: @vftintrm_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vftintrm_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrm_w_d(_1, _2); } +// CHECK-LABEL: @vftintrne_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vftintrne_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrne_w_d(_1, _2); } +// CHECK-LABEL: @vftintl_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintl_l_s(v4f32 _1) { return __lsx_vftintl_l_s(_1); } +// CHECK-LABEL: @vftinth_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftinth_l_s(v4f32 _1) { return __lsx_vftinth_l_s(_1); } +// CHECK-LABEL: @vffinth_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vffinth_d_w(v4i32 _1) { return __lsx_vffinth_d_w(_1); } +// CHECK-LABEL: @vffintl_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vffintl_d_w(v4i32 _1) { return __lsx_vffintl_d_w(_1); } +// CHECK-LABEL: @vftintrzl_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrzl_l_s(v4f32 _1) { return __lsx_vftintrzl_l_s(_1); } +// CHECK-LABEL: @vftintrzh_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrzh_l_s(v4f32 _1) { return __lsx_vftintrzh_l_s(_1); } +// CHECK-LABEL: @vftintrpl_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrpl_l_s(v4f32 _1) { return __lsx_vftintrpl_l_s(_1); } +// CHECK-LABEL: @vftintrph_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrph_l_s(v4f32 _1) { return __lsx_vftintrph_l_s(_1); } +// CHECK-LABEL: @vftintrml_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrml_l_s(v4f32 _1) { return __lsx_vftintrml_l_s(_1); } +// CHECK-LABEL: @vftintrmh_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrmh_l_s(v4f32 _1) { return __lsx_vftintrmh_l_s(_1); } +// CHECK-LABEL: @vftintrnel_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrnel_l_s(v4f32 _1) { return __lsx_vftintrnel_l_s(_1); } +// CHECK-LABEL: @vftintrneh_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrneh_l_s(v4f32 _1) { return __lsx_vftintrneh_l_s(_1); } +// CHECK-LABEL: @vfrintrne_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vfrintrne_s(v4f32 _1) { return __lsx_vfrintrne_s(_1); } +// CHECK-LABEL: @vfrintrne_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vfrintrne_d(v2f64 _1) { return __lsx_vfrintrne_d(_1); } +// CHECK-LABEL: @vfrintrz_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vfrintrz_s(v4f32 _1) { return __lsx_vfrintrz_s(_1); } +// CHECK-LABEL: @vfrintrz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vfrintrz_d(v2f64 _1) { return __lsx_vfrintrz_d(_1); } +// CHECK-LABEL: @vfrintrp_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vfrintrp_s(v4f32 _1) { return __lsx_vfrintrp_s(_1); } +// CHECK-LABEL: @vfrintrp_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vfrintrp_d(v2f64 _1) { return __lsx_vfrintrp_d(_1); } +// CHECK-LABEL: @vfrintrm_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vfrintrm_s(v4f32 _1) { return __lsx_vfrintrm_s(_1); } +// CHECK-LABEL: @vfrintrm_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vfrintrm_d(v2f64 _1) { return __lsx_vfrintrm_d(_1); } +// CHECK-LABEL: @vstelm_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i32 1, i32 1) +// CHECK-NEXT: ret void +// +void vstelm_b(v16i8 _1, void *_2) { return __lsx_vstelm_b(_1, _2, 1, 1); } +// CHECK-LABEL: @vstelm_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> [[TMP0]], ptr [[_2:%.*]], i32 2, i32 1) +// CHECK-NEXT: ret void +// +void vstelm_h(v8i16 _1, void *_2) { return __lsx_vstelm_h(_1, _2, 2, 1); } +// CHECK-LABEL: @vstelm_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> [[TMP0]], ptr [[_2:%.*]], i32 4, i32 1) +// CHECK-NEXT: ret void +// +void vstelm_w(v4i32 _1, void *_2) { return __lsx_vstelm_w(_1, _2, 4, 1); } +// CHECK-LABEL: @vstelm_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> [[TMP0]], ptr [[_2:%.*]], i32 8, i32 1) +// CHECK-NEXT: ret void +// +void vstelm_d(v2i64 _1, void *_2) { return __lsx_vstelm_d(_1, _2, 8, 1); } +// CHECK-LABEL: @vaddwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vaddwev_d_w(_1, _2); } +// CHECK-LABEL: @vaddwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vaddwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vaddwev_w_h(_1, _2); } +// CHECK-LABEL: @vaddwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vaddwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vaddwev_h_b(_1, _2); } +// CHECK-LABEL: @vaddwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vaddwod_d_w(_1, _2); } +// CHECK-LABEL: @vaddwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vaddwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vaddwod_w_h(_1, _2); } +// CHECK-LABEL: @vaddwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vaddwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vaddwod_h_b(_1, _2); } +// CHECK-LABEL: @vaddwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vaddwev_d_wu(_1, _2); } +// CHECK-LABEL: @vaddwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vaddwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vaddwev_w_hu(_1, _2); } +// CHECK-LABEL: @vaddwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vaddwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vaddwev_h_bu(_1, _2); } +// CHECK-LABEL: @vaddwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vaddwod_d_wu(_1, _2); } +// CHECK-LABEL: @vaddwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vaddwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vaddwod_w_hu(_1, _2); } +// CHECK-LABEL: @vaddwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vaddwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vaddwod_h_bu(_1, _2); } +// CHECK-LABEL: @vaddwev_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwev_d_wu_w(v4u32 _1, v4i32 _2) { + return __lsx_vaddwev_d_wu_w(_1, _2); +} +// CHECK-LABEL: @vaddwev_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vaddwev_w_hu_h(v8u16 _1, v8i16 _2) { + return __lsx_vaddwev_w_hu_h(_1, _2); +} +// CHECK-LABEL: @vaddwev_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vaddwev_h_bu_b(v16u8 _1, v16i8 _2) { + return __lsx_vaddwev_h_bu_b(_1, _2); +} +// CHECK-LABEL: @vaddwod_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwod_d_wu_w(v4u32 _1, v4i32 _2) { + return __lsx_vaddwod_d_wu_w(_1, _2); +} +// CHECK-LABEL: @vaddwod_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vaddwod_w_hu_h(v8u16 _1, v8i16 _2) { + return __lsx_vaddwod_w_hu_h(_1, _2); +} +// CHECK-LABEL: @vaddwod_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vaddwod_h_bu_b(v16u8 _1, v16i8 _2) { + return __lsx_vaddwod_h_bu_b(_1, _2); +} +// CHECK-LABEL: @vsubwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsubwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vsubwev_d_w(_1, _2); } +// CHECK-LABEL: @vsubwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsubwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vsubwev_w_h(_1, _2); } +// CHECK-LABEL: @vsubwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsubwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vsubwev_h_b(_1, _2); } +// CHECK-LABEL: @vsubwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsubwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vsubwod_d_w(_1, _2); } +// CHECK-LABEL: @vsubwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsubwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vsubwod_w_h(_1, _2); } +// CHECK-LABEL: @vsubwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsubwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vsubwod_h_b(_1, _2); } +// CHECK-LABEL: @vsubwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsubwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vsubwev_d_wu(_1, _2); } +// CHECK-LABEL: @vsubwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsubwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vsubwev_w_hu(_1, _2); } +// CHECK-LABEL: @vsubwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsubwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vsubwev_h_bu(_1, _2); } +// CHECK-LABEL: @vsubwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsubwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vsubwod_d_wu(_1, _2); } +// CHECK-LABEL: @vsubwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsubwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vsubwod_w_hu(_1, _2); } +// CHECK-LABEL: @vsubwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsubwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vsubwod_h_bu(_1, _2); } +// CHECK-LABEL: @vaddwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vaddwev_q_d(_1, _2); } +// CHECK-LABEL: @vaddwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vaddwod_q_d(_1, _2); } +// CHECK-LABEL: @vaddwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vaddwev_q_du(_1, _2); } +// CHECK-LABEL: @vaddwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vaddwod_q_du(_1, _2); } +// CHECK-LABEL: @vsubwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsubwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vsubwev_q_d(_1, _2); } +// CHECK-LABEL: @vsubwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsubwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vsubwod_q_d(_1, _2); } +// CHECK-LABEL: @vsubwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsubwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vsubwev_q_du(_1, _2); } +// CHECK-LABEL: @vsubwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsubwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vsubwod_q_du(_1, _2); } +// CHECK-LABEL: @vaddwev_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwev_q_du_d(v2u64 _1, v2i64 _2) { + return __lsx_vaddwev_q_du_d(_1, _2); +} +// CHECK-LABEL: @vaddwod_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwod_q_du_d(v2u64 _1, v2i64 _2) { + return __lsx_vaddwod_q_du_d(_1, _2); +} +// CHECK-LABEL: @vmulwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vmulwev_d_w(_1, _2); } +// CHECK-LABEL: @vmulwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmulwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vmulwev_w_h(_1, _2); } +// CHECK-LABEL: @vmulwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmulwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vmulwev_h_b(_1, _2); } +// CHECK-LABEL: @vmulwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vmulwod_d_w(_1, _2); } +// CHECK-LABEL: @vmulwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmulwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vmulwod_w_h(_1, _2); } +// CHECK-LABEL: @vmulwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmulwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vmulwod_h_b(_1, _2); } +// CHECK-LABEL: @vmulwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vmulwev_d_wu(_1, _2); } +// CHECK-LABEL: @vmulwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmulwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vmulwev_w_hu(_1, _2); } +// CHECK-LABEL: @vmulwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmulwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vmulwev_h_bu(_1, _2); } +// CHECK-LABEL: @vmulwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vmulwod_d_wu(_1, _2); } +// CHECK-LABEL: @vmulwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmulwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vmulwod_w_hu(_1, _2); } +// CHECK-LABEL: @vmulwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmulwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vmulwod_h_bu(_1, _2); } +// CHECK-LABEL: @vmulwev_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwev_d_wu_w(v4u32 _1, v4i32 _2) { + return __lsx_vmulwev_d_wu_w(_1, _2); +} +// CHECK-LABEL: @vmulwev_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmulwev_w_hu_h(v8u16 _1, v8i16 _2) { + return __lsx_vmulwev_w_hu_h(_1, _2); +} +// CHECK-LABEL: @vmulwev_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmulwev_h_bu_b(v16u8 _1, v16i8 _2) { + return __lsx_vmulwev_h_bu_b(_1, _2); +} +// CHECK-LABEL: @vmulwod_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwod_d_wu_w(v4u32 _1, v4i32 _2) { + return __lsx_vmulwod_d_wu_w(_1, _2); +} +// CHECK-LABEL: @vmulwod_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmulwod_w_hu_h(v8u16 _1, v8i16 _2) { + return __lsx_vmulwod_w_hu_h(_1, _2); +} +// CHECK-LABEL: @vmulwod_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmulwod_h_bu_b(v16u8 _1, v16i8 _2) { + return __lsx_vmulwod_h_bu_b(_1, _2); +} +// CHECK-LABEL: @vmulwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vmulwev_q_d(_1, _2); } +// CHECK-LABEL: @vmulwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vmulwod_q_d(_1, _2); } +// CHECK-LABEL: @vmulwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vmulwev_q_du(_1, _2); } +// CHECK-LABEL: @vmulwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vmulwod_q_du(_1, _2); } +// CHECK-LABEL: @vmulwev_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwev_q_du_d(v2u64 _1, v2i64 _2) { + return __lsx_vmulwev_q_du_d(_1, _2); +} +// CHECK-LABEL: @vmulwod_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwod_q_du_d(v2u64 _1, v2i64 _2) { + return __lsx_vmulwod_q_du_d(_1, _2); +} +// CHECK-LABEL: @vhaddw_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vhaddw_q_d(v2i64 _1, v2i64 _2) { return __lsx_vhaddw_q_d(_1, _2); } +// CHECK-LABEL: @vhaddw_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vhaddw_qu_du(v2u64 _1, v2u64 _2) { return __lsx_vhaddw_qu_du(_1, _2); } +// CHECK-LABEL: @vhsubw_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vhsubw_q_d(v2i64 _1, v2i64 _2) { return __lsx_vhsubw_q_d(_1, _2); } +// CHECK-LABEL: @vhsubw_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vhsubw_qu_du(v2u64 _1, v2u64 _2) { return __lsx_vhsubw_qu_du(_1, _2); } +// CHECK-LABEL: @vmaddwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmaddwev_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { + return __lsx_vmaddwev_d_w(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4i32 vmaddwev_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { + return __lsx_vmaddwev_w_h(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8i16 vmaddwev_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { + return __lsx_vmaddwev_h_b(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2u64 vmaddwev_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { + return __lsx_vmaddwev_d_wu(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4u32 vmaddwev_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { + return __lsx_vmaddwev_w_hu(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8u16 vmaddwev_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { + return __lsx_vmaddwev_h_bu(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmaddwod_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { + return __lsx_vmaddwod_d_w(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4i32 vmaddwod_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { + return __lsx_vmaddwod_w_h(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8i16 vmaddwod_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { + return __lsx_vmaddwod_h_b(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2u64 vmaddwod_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { + return __lsx_vmaddwod_d_wu(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4u32 vmaddwod_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { + return __lsx_vmaddwod_w_hu(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8u16 vmaddwod_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { + return __lsx_vmaddwod_h_bu(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmaddwev_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { + return __lsx_vmaddwev_d_wu_w(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4i32 vmaddwev_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { + return __lsx_vmaddwev_w_hu_h(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8i16 vmaddwev_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { + return __lsx_vmaddwev_h_bu_b(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmaddwod_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { + return __lsx_vmaddwod_d_wu_w(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4i32 vmaddwod_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { + return __lsx_vmaddwod_w_hu_h(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8i16 vmaddwod_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { + return __lsx_vmaddwod_h_bu_b(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmaddwev_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __lsx_vmaddwev_q_d(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmaddwod_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __lsx_vmaddwod_q_d(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2u64 vmaddwev_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { + return __lsx_vmaddwev_q_du(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2u64 vmaddwod_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { + return __lsx_vmaddwod_q_du(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmaddwev_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { + return __lsx_vmaddwev_q_du_d(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmaddwod_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { + return __lsx_vmaddwod_q_du_d(_1, _2, _3); +} +// CHECK-LABEL: @vrotr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vrotr_b(v16i8 _1, v16i8 _2) { return __lsx_vrotr_b(_1, _2); } +// CHECK-LABEL: @vrotr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vrotr_h(v8i16 _1, v8i16 _2) { return __lsx_vrotr_h(_1, _2); } +// CHECK-LABEL: @vrotr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vrotr_w(v4i32 _1, v4i32 _2) { return __lsx_vrotr_w(_1, _2); } +// CHECK-LABEL: @vrotr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vrotr_d(v2i64 _1, v2i64 _2) { return __lsx_vrotr_d(_1, _2); } +// CHECK-LABEL: @vadd_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vadd_q(v2i64 _1, v2i64 _2) { return __lsx_vadd_q(_1, _2); } +// CHECK-LABEL: @vsub_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsub_q(v2i64 _1, v2i64 _2) { return __lsx_vsub_q(_1, _2); } +// CHECK-LABEL: @vldrepl_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr [[_1:%.*]], i32 1) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v16i8 vldrepl_b(void *_1) { return __lsx_vldrepl_b(_1, 1); } +// CHECK-LABEL: @vldrepl_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(ptr [[_1:%.*]], i32 2) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v8i16 vldrepl_h(void *_1) { return __lsx_vldrepl_h(_1, 2); } +// CHECK-LABEL: @vldrepl_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(ptr [[_1:%.*]], i32 4) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v4i32 vldrepl_w(void *_1) { return __lsx_vldrepl_w(_1, 4); } +// CHECK-LABEL: @vldrepl_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(ptr [[_1:%.*]], i32 8) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v2i64 vldrepl_d(void *_1) { return __lsx_vldrepl_d(_1, 8); } +// CHECK-LABEL: @vmskgez_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vmskgez_b(v16i8 _1) { return __lsx_vmskgez_b(_1); } +// CHECK-LABEL: @vmsknz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vmsknz_b(v16i8 _1) { return __lsx_vmsknz_b(_1); } +// CHECK-LABEL: @vexth_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vexth_h_b(v16i8 _1) { return __lsx_vexth_h_b(_1); } +// CHECK-LABEL: @vexth_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vexth_w_h(v8i16 _1) { return __lsx_vexth_w_h(_1); } +// CHECK-LABEL: @vexth_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vexth_d_w(v4i32 _1) { return __lsx_vexth_d_w(_1); } +// CHECK-LABEL: @vexth_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vexth_q_d(v2i64 _1) { return __lsx_vexth_q_d(_1); } +// CHECK-LABEL: @vexth_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8u16 vexth_hu_bu(v16u8 _1) { return __lsx_vexth_hu_bu(_1); } +// CHECK-LABEL: @vexth_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vexth_wu_hu(v8u16 _1) { return __lsx_vexth_wu_hu(_1); } +// CHECK-LABEL: @vexth_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vexth_du_wu(v4u32 _1) { return __lsx_vexth_du_wu(_1); } +// CHECK-LABEL: @vexth_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vexth_qu_du(v2u64 _1) { return __lsx_vexth_qu_du(_1); } +// CHECK-LABEL: @vrotri_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vrotri_b(v16i8 _1) { return __lsx_vrotri_b(_1, 1); } +// CHECK-LABEL: @vrotri_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vrotri_h(v8i16 _1) { return __lsx_vrotri_h(_1, 1); } +// CHECK-LABEL: @vrotri_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vrotri_w(v4i32 _1) { return __lsx_vrotri_w(_1, 1); } +// CHECK-LABEL: @vrotri_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vrotri_d(v2i64 _1) { return __lsx_vrotri_d(_1, 1); } +// CHECK-LABEL: @vextl_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vextl_q_d(v2i64 _1) { return __lsx_vextl_q_d(_1); } +// CHECK-LABEL: @vsrlni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrlni_b_h(_1, _2, 1); } +// CHECK-LABEL: @vsrlni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrlni_h_w(_1, _2, 1); } +// CHECK-LABEL: @vsrlni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrlni_w_d(_1, _2, 1); } +// CHECK-LABEL: @vsrlni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrlni_d_q(_1, _2, 1); } +// CHECK-LABEL: @vsrlrni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrlrni_b_h(_1, _2, 1); } +// CHECK-LABEL: @vsrlrni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrlrni_h_w(_1, _2, 1); } +// CHECK-LABEL: @vsrlrni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrlrni_w_d(_1, _2, 1); } +// CHECK-LABEL: @vsrlrni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrlrni_d_q(_1, _2, 1); } +// CHECK-LABEL: @vssrlni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrlni_b_h(_1, _2, 1); } +// CHECK-LABEL: @vssrlni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrlni_h_w(_1, _2, 1); } +// CHECK-LABEL: @vssrlni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrlni_w_d(_1, _2, 1); } +// CHECK-LABEL: @vssrlni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrlni_d_q(_1, _2, 1); } +// CHECK-LABEL: @vssrlni_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2) { return __lsx_vssrlni_bu_h(_1, _2, 1); } +// CHECK-LABEL: @vssrlni_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2) { return __lsx_vssrlni_hu_w(_1, _2, 1); } +// CHECK-LABEL: @vssrlni_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2) { return __lsx_vssrlni_wu_d(_1, _2, 1); } +// CHECK-LABEL: @vssrlni_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2) { return __lsx_vssrlni_du_q(_1, _2, 1); } +// CHECK-LABEL: @vssrlrni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrlrni_b_h(_1, _2, 1); } +// CHECK-LABEL: @vssrlrni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrlrni_h_w(_1, _2, 1); } +// CHECK-LABEL: @vssrlrni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrlrni_w_d(_1, _2, 1); } +// CHECK-LABEL: @vssrlrni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrlrni_d_q(_1, _2, 1); } +// CHECK-LABEL: @vssrlrni_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2) { + return __lsx_vssrlrni_bu_h(_1, _2, 1); +} +// CHECK-LABEL: @vssrlrni_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2) { + return __lsx_vssrlrni_hu_w(_1, _2, 1); +} +// CHECK-LABEL: @vssrlrni_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2) { + return __lsx_vssrlrni_wu_d(_1, _2, 1); +} +// CHECK-LABEL: @vssrlrni_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2) { + return __lsx_vssrlrni_du_q(_1, _2, 1); +} +// CHECK-LABEL: @vsrani_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrani_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrani_b_h(_1, _2, 1); } +// CHECK-LABEL: @vsrani_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrani_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrani_h_w(_1, _2, 1); } +// CHECK-LABEL: @vsrani_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrani_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrani_w_d(_1, _2, 1); } +// CHECK-LABEL: @vsrani_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsrani_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrani_d_q(_1, _2, 1); } +// CHECK-LABEL: @vsrarni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrarni_b_h(_1, _2, 1); } +// CHECK-LABEL: @vsrarni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrarni_h_w(_1, _2, 1); } +// CHECK-LABEL: @vsrarni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrarni_w_d(_1, _2, 1); } +// CHECK-LABEL: @vsrarni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrarni_d_q(_1, _2, 1); } +// CHECK-LABEL: @vssrani_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vssrani_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrani_b_h(_1, _2, 1); } +// CHECK-LABEL: @vssrani_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vssrani_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrani_h_w(_1, _2, 1); } +// CHECK-LABEL: @vssrani_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vssrani_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrani_w_d(_1, _2, 1); } +// CHECK-LABEL: @vssrani_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vssrani_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrani_d_q(_1, _2, 1); } +// CHECK-LABEL: @vssrani_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2) { return __lsx_vssrani_bu_h(_1, _2, 1); } +// CHECK-LABEL: @vssrani_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2) { return __lsx_vssrani_hu_w(_1, _2, 1); } +// CHECK-LABEL: @vssrani_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2) { return __lsx_vssrani_wu_d(_1, _2, 1); } +// CHECK-LABEL: @vssrani_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vssrani_du_q(v2u64 _1, v2i64 _2) { return __lsx_vssrani_du_q(_1, _2, 1); } +// CHECK-LABEL: @vssrarni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrarni_b_h(_1, _2, 1); } +// CHECK-LABEL: @vssrarni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrarni_h_w(_1, _2, 1); } +// CHECK-LABEL: @vssrarni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrarni_w_d(_1, _2, 1); } +// CHECK-LABEL: @vssrarni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrarni_d_q(_1, _2, 1); } +// CHECK-LABEL: @vssrarni_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2) { + return __lsx_vssrarni_bu_h(_1, _2, 1); +} +// CHECK-LABEL: @vssrarni_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2) { + return __lsx_vssrarni_hu_w(_1, _2, 1); +} +// CHECK-LABEL: @vssrarni_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2) { + return __lsx_vssrarni_wu_d(_1, _2, 1); +} +// CHECK-LABEL: @vssrarni_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2) { + return __lsx_vssrarni_du_q(_1, _2, 1); +} +// CHECK-LABEL: @vpermi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vpermi_w(v4i32 _1, v4i32 _2) { return __lsx_vpermi_w(_1, _2, 1); } +// CHECK-LABEL: @vld( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vld(ptr [[_1:%.*]], i32 1) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v16i8 vld(void *_1) { return __lsx_vld(_1, 1); } +// CHECK-LABEL: @vst( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vst(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i32 1) +// CHECK-NEXT: ret void +// +void vst(v16i8 _1, void *_2) { return __lsx_vst(_1, _2, 1); } +// CHECK-LABEL: @vssrlrn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vssrlrn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrlrn_b_h(_1, _2); } +// CHECK-LABEL: @vssrlrn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vssrlrn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrlrn_h_w(_1, _2); } +// CHECK-LABEL: @vssrlrn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vssrlrn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrlrn_w_d(_1, _2); } +// CHECK-LABEL: @vssrln_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vssrln_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrln_b_h(_1, _2); } +// CHECK-LABEL: @vssrln_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vssrln_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrln_h_w(_1, _2); } +// CHECK-LABEL: @vssrln_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vssrln_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrln_w_d(_1, _2); } +// CHECK-LABEL: @vorn_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vorn_v(v16i8 _1, v16i8 _2) { return __lsx_vorn_v(_1, _2); } +// CHECK-LABEL: @vldi( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldi(i32 1) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v2i64 vldi() { return __lsx_vldi(1); } +// CHECK-LABEL: @vshuf_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v16i8 vshuf_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __lsx_vshuf_b(_1, _2, _3); +} +// CHECK-LABEL: @vldx( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldx(ptr [[_1:%.*]], i64 1) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v16i8 vldx(void *_1) { return __lsx_vldx(_1, 1); } +// CHECK-LABEL: @vstx( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstx(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i64 1) +// CHECK-NEXT: ret void +// +void vstx(v16i8 _1, void *_2) { return __lsx_vstx(_1, _2, 1); } +// CHECK-LABEL: @vextl_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vextl_qu_du(v2u64 _1) { return __lsx_vextl_qu_du(_1); } +// CHECK-LABEL: @bnz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bnz_b(v16u8 _1) { return __lsx_bnz_b(_1); } +// CHECK-LABEL: @bnz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bnz_d(v2u64 _1) { return __lsx_bnz_d(_1); } +// CHECK-LABEL: @bnz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bnz_h(v8u16 _1) { return __lsx_bnz_h(_1); } +// CHECK-LABEL: @bnz_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bnz_v(v16u8 _1) { return __lsx_bnz_v(_1); } +// CHECK-LABEL: @bnz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bnz_w(v4u32 _1) { return __lsx_bnz_w(_1); } +// CHECK-LABEL: @bz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bz_b(v16u8 _1) { return __lsx_bz_b(_1); } +// CHECK-LABEL: @bz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bz_d(v2u64 _1) { return __lsx_bz_d(_1); } +// CHECK-LABEL: @bz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bz_h(v8u16 _1) { return __lsx_bz_h(_1); } +// CHECK-LABEL: @bz_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bz_v(v16u8 _1) { return __lsx_bz_v(_1); } +// CHECK-LABEL: @bz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bz_w(v4u32 _1) { return __lsx_bz_w(_1); } +// CHECK-LABEL: @vfcmp_caf_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_caf_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_caf_d(_1, _2); } +// CHECK-LABEL: @vfcmp_caf_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_caf_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_caf_s(_1, _2); } +// CHECK-LABEL: @vfcmp_ceq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_ceq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_ceq_d(_1, _2); } +// CHECK-LABEL: @vfcmp_ceq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_ceq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_ceq_s(_1, _2); } +// CHECK-LABEL: @vfcmp_cle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_cle_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cle_d(_1, _2); } +// CHECK-LABEL: @vfcmp_cle_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_cle_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cle_s(_1, _2); } +// CHECK-LABEL: @vfcmp_clt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_clt_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_clt_d(_1, _2); } +// CHECK-LABEL: @vfcmp_clt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_clt_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_clt_s(_1, _2); } +// CHECK-LABEL: @vfcmp_cne_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_cne_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cne_d(_1, _2); } +// CHECK-LABEL: @vfcmp_cne_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_cne_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cne_s(_1, _2); } +// CHECK-LABEL: @vfcmp_cor_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_cor_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cor_d(_1, _2); } +// CHECK-LABEL: @vfcmp_cor_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_cor_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cor_s(_1, _2); } +// CHECK-LABEL: @vfcmp_cueq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_cueq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cueq_d(_1, _2); } +// CHECK-LABEL: @vfcmp_cueq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_cueq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cueq_s(_1, _2); } +// CHECK-LABEL: @vfcmp_cule_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_cule_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cule_d(_1, _2); } +// CHECK-LABEL: @vfcmp_cule_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_cule_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cule_s(_1, _2); } +// CHECK-LABEL: @vfcmp_cult_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_cult_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cult_d(_1, _2); } +// CHECK-LABEL: @vfcmp_cult_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_cult_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cult_s(_1, _2); } +// CHECK-LABEL: @vfcmp_cun_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_cun_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cun_d(_1, _2); } +// CHECK-LABEL: @vfcmp_cune_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_cune_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cune_d(_1, _2); } +// CHECK-LABEL: @vfcmp_cune_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_cune_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cune_s(_1, _2); } +// CHECK-LABEL: @vfcmp_cun_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_cun_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cun_s(_1, _2); } +// CHECK-LABEL: @vfcmp_saf_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_saf_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_saf_d(_1, _2); } +// CHECK-LABEL: @vfcmp_saf_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_saf_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_saf_s(_1, _2); } +// CHECK-LABEL: @vfcmp_seq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_seq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_seq_d(_1, _2); } +// CHECK-LABEL: @vfcmp_seq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_seq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_seq_s(_1, _2); } +// CHECK-LABEL: @vfcmp_sle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_sle_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sle_d(_1, _2); } +// CHECK-LABEL: @vfcmp_sle_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_sle_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sle_s(_1, _2); } +// CHECK-LABEL: @vfcmp_slt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_slt_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_slt_d(_1, _2); } +// CHECK-LABEL: @vfcmp_slt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_slt_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_slt_s(_1, _2); } +// CHECK-LABEL: @vfcmp_sne_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_sne_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sne_d(_1, _2); } +// CHECK-LABEL: @vfcmp_sne_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_sne_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sne_s(_1, _2); } +// CHECK-LABEL: @vfcmp_sor_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_sor_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sor_d(_1, _2); } +// CHECK-LABEL: @vfcmp_sor_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_sor_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sor_s(_1, _2); } +// CHECK-LABEL: @vfcmp_sueq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_sueq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sueq_d(_1, _2); } +// CHECK-LABEL: @vfcmp_sueq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_sueq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sueq_s(_1, _2); } +// CHECK-LABEL: @vfcmp_sule_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_sule_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sule_d(_1, _2); } +// CHECK-LABEL: @vfcmp_sule_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_sule_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sule_s(_1, _2); } +// CHECK-LABEL: @vfcmp_sult_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_sult_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sult_d(_1, _2); } +// CHECK-LABEL: @vfcmp_sult_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_sult_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sult_s(_1, _2); } +// CHECK-LABEL: @vfcmp_sun_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_sun_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sun_d(_1, _2); } +// CHECK-LABEL: @vfcmp_sune_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_sune_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sune_d(_1, _2); } +// CHECK-LABEL: @vfcmp_sune_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_sune_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sune_s(_1, _2); } +// CHECK-LABEL: @vfcmp_sun_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_sun_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sun_s(_1, _2); } +// CHECK-LABEL: @vrepli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 1) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v16i8 vrepli_b() { return __lsx_vrepli_b(1); } +// CHECK-LABEL: @vrepli_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 1) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v2i64 vrepli_d() { return __lsx_vrepli_d(1); } +// CHECK-LABEL: @vrepli_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 1) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v8i16 vrepli_h() { return __lsx_vrepli_h(1); } +// CHECK-LABEL: @vrepli_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 1) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v4i32 vrepli_w() { return __lsx_vrepli_w(1); } diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin-error.c b/clang/test/CodeGen/LoongArch/lsx/builtin-error.c new file mode 100644 index 0000000000000000000000000000000000000000..3fc5f73f11934e6a7aaf806d0d3a6f0c89853fcd --- /dev/null +++ b/clang/test/CodeGen/LoongArch/lsx/builtin-error.c @@ -0,0 +1,1382 @@ +// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -verify %s + +typedef signed char v16i8 __attribute__((vector_size(16), aligned(16))); +typedef signed char v16i8_b __attribute__((vector_size(16), aligned(1))); +typedef unsigned char v16u8 __attribute__((vector_size(16), aligned(16))); +typedef unsigned char v16u8_b __attribute__((vector_size(16), aligned(1))); +typedef short v8i16 __attribute__((vector_size(16), aligned(16))); +typedef short v8i16_h __attribute__((vector_size(16), aligned(2))); +typedef unsigned short v8u16 __attribute__((vector_size(16), aligned(16))); +typedef unsigned short v8u16_h __attribute__((vector_size(16), aligned(2))); +typedef int v4i32 __attribute__((vector_size(16), aligned(16))); +typedef int v4i32_w __attribute__((vector_size(16), aligned(4))); +typedef unsigned int v4u32 __attribute__((vector_size(16), aligned(16))); +typedef unsigned int v4u32_w __attribute__((vector_size(16), aligned(4))); +typedef long long v2i64 __attribute__((vector_size(16), aligned(16))); +typedef long long v2i64_d __attribute__((vector_size(16), aligned(8))); +typedef unsigned long long v2u64 __attribute__((vector_size(16), aligned(16))); +typedef unsigned long long v2u64_d __attribute__((vector_size(16), aligned(8))); +typedef float v4f32 __attribute__((vector_size(16), aligned(16))); +typedef float v4f32_w __attribute__((vector_size(16), aligned(4))); +typedef double v2f64 __attribute__((vector_size(16), aligned(16))); +typedef double v2f64_d __attribute__((vector_size(16), aligned(8))); + +typedef long long __m128i __attribute__((__vector_size__(16), __may_alias__)); +typedef float __m128 __attribute__((__vector_size__(16), __may_alias__)); +typedef double __m128d __attribute__((__vector_size__(16), __may_alias__)); + +v16i8 vslli_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vslli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vslli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vslli_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_b' must be a constant integer}} + return res; +} + +v8i16 vslli_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vslli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vslli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vslli_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_h' must be a constant integer}} + return res; +} + +v4i32 vslli_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vslli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslli_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_w' must be a constant integer}} + return res; +} + +v2i64 vslli_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vslli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vslli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vslli_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_d' must be a constant integer}} + return res; +} + +v16i8 vsrai_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vsrai_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsrai_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsrai_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_b' must be a constant integer}} + return res; +} + +v8i16 vsrai_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vsrai_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrai_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrai_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_h' must be a constant integer}} + return res; +} + +v4i32 vsrai_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vsrai_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrai_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrai_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_w' must be a constant integer}} + return res; +} + +v2i64 vsrai_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vsrai_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrai_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrai_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_d' must be a constant integer}} + return res; +} + +v16i8 vsrari_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vsrari_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsrari_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsrari_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_b' must be a constant integer}} + return res; +} + +v8i16 vsrari_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vsrari_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrari_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrari_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_h' must be a constant integer}} + return res; +} + +v4i32 vsrari_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vsrari_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrari_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrari_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_w' must be a constant integer}} + return res; +} + +v2i64 vsrari_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vsrari_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrari_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrari_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_d' must be a constant integer}} + return res; +} + +v16i8 vsrli_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vsrli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsrli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsrli_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_b' must be a constant integer}} + return res; +} + +v8i16 vsrli_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vsrli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrli_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_h' must be a constant integer}} + return res; +} + +v4i32 vsrli_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vsrli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrli_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_w' must be a constant integer}} + return res; +} + +v2i64 vsrli_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vsrli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrli_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_d' must be a constant integer}} + return res; +} + +v16i8 vsrlri_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vsrlri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsrlri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsrlri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_b' must be a constant integer}} + return res; +} + +v8i16 vsrlri_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vsrlri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrlri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrlri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_h' must be a constant integer}} + return res; +} + +v4i32 vsrlri_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vsrlri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrlri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrlri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_w' must be a constant integer}} + return res; +} + +v2i64 vsrlri_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vsrlri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrlri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrlri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_d' must be a constant integer}} + return res; +} + +v16u8 vbitclri_b(v16u8 _1, int var) { + v16u8 res = __builtin_lsx_vbitclri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vbitclri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vbitclri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_b' must be a constant integer}} + return res; +} + +v8u16 vbitclri_h(v8u16 _1, int var) { + v8u16 res = __builtin_lsx_vbitclri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vbitclri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vbitclri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_h' must be a constant integer}} + return res; +} + +v4u32 vbitclri_w(v4u32 _1, int var) { + v4u32 res = __builtin_lsx_vbitclri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vbitclri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vbitclri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_w' must be a constant integer}} + return res; +} + +v2u64 vbitclri_d(v2u64 _1, int var) { + v2u64 res = __builtin_lsx_vbitclri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vbitclri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vbitclri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_d' must be a constant integer}} + return res; +} + +v16u8 vbitseti_b(v16u8 _1, int var) { + v16u8 res = __builtin_lsx_vbitseti_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vbitseti_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vbitseti_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_b' must be a constant integer}} + return res; +} + +v8u16 vbitseti_h(v8u16 _1, int var) { + v8u16 res = __builtin_lsx_vbitseti_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vbitseti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vbitseti_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_h' must be a constant integer}} + return res; +} + +v4u32 vbitseti_w(v4u32 _1, int var) { + v4u32 res = __builtin_lsx_vbitseti_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vbitseti_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vbitseti_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_w' must be a constant integer}} + return res; +} + +v2u64 vbitseti_d(v2u64 _1, int var) { + v2u64 res = __builtin_lsx_vbitseti_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vbitseti_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vbitseti_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_d' must be a constant integer}} + return res; +} + +v16u8 vbitrevi_b(v16u8 _1, int var) { + v16u8 res = __builtin_lsx_vbitrevi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vbitrevi_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vbitrevi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_b' must be a constant integer}} + return res; +} + +v8u16 vbitrevi_h(v8u16 _1, int var) { + v8u16 res = __builtin_lsx_vbitrevi_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vbitrevi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vbitrevi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_h' must be a constant integer}} + return res; +} + +v4u32 vbitrevi_w(v4u32 _1, int var) { + v4u32 res = __builtin_lsx_vbitrevi_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vbitrevi_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vbitrevi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_w' must be a constant integer}} + return res; +} + +v2u64 vbitrevi_d(v2u64 _1, int var) { + v2u64 res = __builtin_lsx_vbitrevi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vbitrevi_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vbitrevi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_d' must be a constant integer}} + return res; +} + +v16i8 vaddi_bu(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vaddi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vaddi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vaddi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_bu' must be a constant integer}} + return res; +} + +v8i16 vaddi_hu(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vaddi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vaddi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vaddi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_hu' must be a constant integer}} + return res; +} + +v4i32 vaddi_wu(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vaddi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vaddi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vaddi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_wu' must be a constant integer}} + return res; +} + +v2i64 vaddi_du(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vaddi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vaddi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vaddi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_du' must be a constant integer}} + return res; +} + +v16i8 vsubi_bu(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vsubi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsubi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsubi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_bu' must be a constant integer}} + return res; +} + +v8i16 vsubi_hu(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vsubi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsubi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsubi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_hu' must be a constant integer}} + return res; +} + +v4i32 vsubi_wu(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vsubi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsubi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsubi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_wu' must be a constant integer}} + return res; +} + +v2i64 vsubi_du(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vsubi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsubi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsubi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_du' must be a constant integer}} + return res; +} + +v16i8 vmaxi_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vmaxi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmaxi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmaxi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_b' must be a constant integer}} + return res; +} + +v8i16 vmaxi_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vmaxi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmaxi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmaxi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_h' must be a constant integer}} + return res; +} + +v4i32 vmaxi_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vmaxi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmaxi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmaxi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_w' must be a constant integer}} + return res; +} + +v2i64 vmaxi_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vmaxi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmaxi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmaxi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_d' must be a constant integer}} + return res; +} + +v16u8 vmaxi_bu(v16u8 _1, int var) { + v16u8 res = __builtin_lsx_vmaxi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmaxi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmaxi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_bu' must be a constant integer}} + return res; +} + +v8u16 vmaxi_hu(v8u16 _1, int var) { + v8u16 res = __builtin_lsx_vmaxi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmaxi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmaxi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_hu' must be a constant integer}} + return res; +} + +v4u32 vmaxi_wu(v4u32 _1, int var) { + v4u32 res = __builtin_lsx_vmaxi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmaxi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmaxi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_wu' must be a constant integer}} + return res; +} + +v2u64 vmaxi_du(v2u64 _1, int var) { + v2u64 res = __builtin_lsx_vmaxi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmaxi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmaxi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_du' must be a constant integer}} + return res; +} + +v16i8 vmini_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vmini_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmini_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmini_b(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_b' must be a constant integer}} + return res; +} + +v8i16 vmini_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vmini_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmini_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmini_h(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_h' must be a constant integer}}} + return res; +} + +v4i32 vmini_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vmini_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmini_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmini_w(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_w' must be a constant integer}} + return res; +} + +v2i64 vmini_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vmini_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmini_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmini_d(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_d' must be a constant integer}} + return res; +} + +v16u8 vmini_bu(v16u8 _1, int var) { + v16u8 res = __builtin_lsx_vmini_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmini_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmini_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_bu' must be a constant integer}} + return res; +} + +v8u16 vmini_hu(v8u16 _1, int var) { + v8u16 res = __builtin_lsx_vmini_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmini_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmini_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_hu' must be a constant integer}} + return res; +} + +v4u32 vmini_wu(v4u32 _1, int var) { + v4u32 res = __builtin_lsx_vmini_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmini_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmini_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_wu' must be a constant integer}} + return res; +} + +v2u64 vmini_du(v2u64 _1, int var) { + v2u64 res = __builtin_lsx_vmini_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmini_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmini_du(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_du' must be a constant integer}} + return res; +} + +v16i8 vseqi_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vseqi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vseqi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vseqi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_b' must be a constant integer}} + return res; +} + +v8i16 vseqi_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vseqi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vseqi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vseqi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_h' must be a constant integer}} + return res; +} + +v4i32 vseqi_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vseqi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vseqi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vseqi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_w' must be a constant integer}} + return res; +} + +v2i64 vseqi_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vseqi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vseqi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vseqi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_d' must be a constant integer}} + return res; +} + +v16i8 vslti_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vslti_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslti_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslti_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_b' must be a constant integer}} + return res; +} + +v8i16 vslti_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vslti_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslti_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_h' must be a constant integer}} + return res; +} + +v4i32 vslti_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vslti_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslti_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslti_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_w' must be a constant integer}} + return res; +} + +v2i64 vslti_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vslti_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslti_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslti_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_d' must be a constant integer}} + return res; +} + +v16i8 vslti_bu(v16u8 _1, int var) { + v16i8 res = __builtin_lsx_vslti_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslti_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslti_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_bu' must be a constant integer}} + return res; +} + +v8i16 vslti_hu(v8u16 _1, int var) { + v8i16 res = __builtin_lsx_vslti_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslti_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslti_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_hu' must be a constant integer}} + return res; +} + +v4i32 vslti_wu(v4u32 _1, int var) { + v4i32 res = __builtin_lsx_vslti_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslti_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslti_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_wu' must be a constant integer}} + return res; +} + +v2i64 vslti_du(v2u64 _1, int var) { + v2i64 res = __builtin_lsx_vslti_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslti_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslti_du(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_du' must be a constant integer}} + return res; +} + +v16i8 vslei_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vslei_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslei_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_b' must be a constant integer}} + return res; +} + +v8i16 vslei_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vslei_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslei_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslei_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_h' must be a constant integer}} + return res; +} + +v4i32 vslei_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vslei_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslei_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslei_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_w' must be a constant integer}} + return res; +} + +v2i64 vslei_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vslei_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslei_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslei_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_d' must be a constant integer}} + return res; +} + +v16i8 vslei_bu(v16u8 _1, int var) { + v16i8 res = __builtin_lsx_vslei_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslei_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslei_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_bu' must be a constant integer}} + return res; +} + +v8i16 vslei_hu(v8u16 _1, int var) { + v8i16 res = __builtin_lsx_vslei_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslei_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslei_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_hu' must be a constant integer}} + return res; +} + +v4i32 vslei_wu(v4u32 _1, int var) { + v4i32 res = __builtin_lsx_vslei_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslei_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslei_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_wu' must be a constant integer}} + return res; +} + +v2i64 vslei_du(v2u64 _1, int var) { + v2i64 res = __builtin_lsx_vslei_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslei_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslei_du(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_du' must be a constant integer}} + return res; +} + +v16i8 vsat_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vsat_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsat_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsat_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_b' must be a constant integer}} + return res; +} + +v8i16 vsat_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vsat_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsat_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsat_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_h' must be a constant integer}} + return res; +} + +v4i32 vsat_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vsat_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsat_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsat_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_w' must be a constant integer}} + return res; +} + +v2i64 vsat_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vsat_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsat_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsat_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_d' must be a constant integer}} + return res; +} + +v16u8 vsat_bu(v16u8 _1, int var) { + v16u8 res = __builtin_lsx_vsat_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsat_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsat_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_bu' must be a constant integer}} + return res; +} + +v8u16 vsat_hu(v8u16 _1, int var) { + v8u16 res = __builtin_lsx_vsat_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsat_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsat_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_hu' must be a constant integer}} + return res; +} + +v4u32 vsat_wu(v4u32 _1, int var) { + v4u32 res = __builtin_lsx_vsat_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsat_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsat_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_wu' must be a constant integer}} + return res; +} + +v2u64 vsat_du(v2u64 _1, int var) { + v2u64 res = __builtin_lsx_vsat_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsat_du(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsat_du(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_du' must be a constant integer}} + return res; +} + +v16i8 vreplvei_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vreplvei_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vreplvei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vreplvei_b(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_b' must be a constant integer}} + return res; +} + +v8i16 vreplvei_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vreplvei_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vreplvei_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vreplvei_h(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_h' must be a constant integer}} + return res; +} + +v4i32 vreplvei_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vreplvei_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __builtin_lsx_vreplvei_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __builtin_lsx_vreplvei_w(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_w' must be a constant integer}} + return res; +} + +v2i64 vreplvei_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vreplvei_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} + res |= __builtin_lsx_vreplvei_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + res |= __builtin_lsx_vreplvei_d(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_d' must be a constant integer}} + return res; +} + +v16u8 vandi_b(v16u8 _1, int var) { + v16u8 res = __builtin_lsx_vandi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vandi_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vandi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vandi_b' must be a constant integer}} + return res; +} + +v16u8 vori_b(v16u8 _1, int var) { + v16u8 res = __builtin_lsx_vori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vori_b' must be a constant integer}} + return res; +} + +v16u8 vnori_b(v16u8 _1, int var) { + v16u8 res = __builtin_lsx_vnori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vnori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vnori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vnori_b' must be a constant integer}} + return res; +} + +v16u8 vxori_b(v16u8 _1, int var) { + v16u8 res = __builtin_lsx_vxori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vxori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vxori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vxori_b' must be a constant integer}} + return res; +} + +v16u8 vbitseli_b(v16u8 _1, v16u8 _2, int var) { + v16u8 res = __builtin_lsx_vbitseli_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vbitseli_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vbitseli_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vbitseli_b' must be a constant integer}} + return res; +} + +v16i8 vshuf4i_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vshuf4i_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vshuf4i_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vshuf4i_b(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_b' must be a constant integer}} + return res; +} + +v8i16 vshuf4i_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vshuf4i_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vshuf4i_h(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vshuf4i_h(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_h' must be a constant integer}} + return res; +} + +v4i32 vshuf4i_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vshuf4i_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vshuf4i_w(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vshuf4i_w(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_w' must be a constant integer}} + return res; +} + +int vpickve2gr_b(v16i8 _1, int var) { + int res = __builtin_lsx_vpickve2gr_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vpickve2gr_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vpickve2gr_b(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_b' must be a constant integer}} + return res; +} + +int vpickve2gr_h(v8i16 _1, int var) { + int res = __builtin_lsx_vpickve2gr_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vpickve2gr_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vpickve2gr_h(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_h' must be a constant integer}} + return res; +} + +int vpickve2gr_w(v4i32 _1, int var) { + int res = __builtin_lsx_vpickve2gr_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __builtin_lsx_vpickve2gr_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __builtin_lsx_vpickve2gr_w(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_w' must be a constant integer}} + return res; +} + +long vpickve2gr_d(v2i64 _1, int var) { + long res = __builtin_lsx_vpickve2gr_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} + res |= __builtin_lsx_vpickve2gr_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + res |= __builtin_lsx_vpickve2gr_d(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_d' must be a constant integer}} + return res; +} + +unsigned int vpickve2gr_bu(v16i8 _1, int var) { + unsigned int res = __builtin_lsx_vpickve2gr_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vpickve2gr_bu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vpickve2gr_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_bu' must be a constant integer}} + return res; +} + +unsigned int vpickve2gr_hu(v8i16 _1, int var) { + unsigned int res = __builtin_lsx_vpickve2gr_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vpickve2gr_hu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vpickve2gr_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_hu' must be a constant integer}} + return res; +} + +unsigned int vpickve2gr_wu(v4i32 _1, int var) { + unsigned int res = __builtin_lsx_vpickve2gr_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __builtin_lsx_vpickve2gr_wu(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __builtin_lsx_vpickve2gr_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_wu' must be a constant integer}} + return res; +} + +unsigned long int vpickve2gr_du(v2i64 _1, int var) { + unsigned long int res = __builtin_lsx_vpickve2gr_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} + res |= __builtin_lsx_vpickve2gr_du(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + res |= __builtin_lsx_vpickve2gr_du(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_du' must be a constant integer}} + return res; +} + +v16i8 vinsgr2vr_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vinsgr2vr_b(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vinsgr2vr_b(_1, 1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vinsgr2vr_b(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_b' must be a constant integer}} + return res; +} + +v8i16 vinsgr2vr_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vinsgr2vr_h(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vinsgr2vr_h(_1, 1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vinsgr2vr_h(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_h' must be a constant integer}} + return res; +} + +v4i32 vinsgr2vr_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vinsgr2vr_w(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __builtin_lsx_vinsgr2vr_w(_1, 1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __builtin_lsx_vinsgr2vr_w(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_w' must be a constant integer}} + return res; +} + +v2i64 vinsgr2vr_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vinsgr2vr_d(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} + res |= __builtin_lsx_vinsgr2vr_d(_1, 1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + res |= __builtin_lsx_vinsgr2vr_d(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_d' must be a constant integer}} + return res; +} + +v8i16 vsllwil_h_b(v16i8 _1, int var) { + v8i16 res = __builtin_lsx_vsllwil_h_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsllwil_h_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsllwil_h_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_h_b' must be a constant integer}} + return res; +} + +v4i32 vsllwil_w_h(v8i16 _1, int var) { + v4i32 res = __builtin_lsx_vsllwil_w_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsllwil_w_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsllwil_w_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_w_h' must be a constant integer}} + return res; +} + +v2i64 vsllwil_d_w(v4i32 _1, int var) { + v2i64 res = __builtin_lsx_vsllwil_d_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsllwil_d_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsllwil_d_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_d_w' must be a constant integer}} + return res; +} + +v8u16 vsllwil_hu_bu(v16u8 _1, int var) { + v8u16 res = __builtin_lsx_vsllwil_hu_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsllwil_hu_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsllwil_hu_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_hu_bu' must be a constant integer}} + return res; +} + +v4u32 vsllwil_wu_hu(v8u16 _1, int var) { + v4u32 res = __builtin_lsx_vsllwil_wu_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsllwil_wu_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsllwil_wu_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_wu_hu' must be a constant integer}} + return res; +} + +v2u64 vsllwil_du_wu(v4u32 _1, int var) { + v2u64 res = __builtin_lsx_vsllwil_du_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsllwil_du_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsllwil_du_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_du_wu' must be a constant integer}} + return res; +} + +v16i8 vfrstpi_b(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __builtin_lsx_vfrstpi_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vfrstpi_b(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vfrstpi_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vfrstpi_b' must be a constant integer}} + return res; +} + +v8i16 vfrstpi_h(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __builtin_lsx_vfrstpi_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vfrstpi_h(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vfrstpi_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vfrstpi_h' must be a constant integer}} + return res; +} + +v2i64 vshuf4i_d(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __builtin_lsx_vshuf4i_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vshuf4i_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vshuf4i_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_d' must be a constant integer}} + return res; +} + +v16i8 vbsrl_v(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vbsrl_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vbsrl_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vbsrl_v(_1, var); // expected-error {{argument to '__builtin_lsx_vbsrl_v' must be a constant integer}} + return res; +} + +v16i8 vbsll_v(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vbsll_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vbsll_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vbsll_v(_1, var); // expected-error {{argument to '__builtin_lsx_vbsll_v' must be a constant integer}} + return res; +} + +v16i8 vextrins_b(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __builtin_lsx_vextrins_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vextrins_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vextrins_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_b' must be a constant integer}} + return res; +} + +v8i16 vextrins_h(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __builtin_lsx_vextrins_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vextrins_h(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vextrins_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_h' must be a constant integer}} + return res; +} + +v4i32 vextrins_w(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __builtin_lsx_vextrins_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vextrins_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vextrins_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_w' must be a constant integer}} + return res; +} + +v2i64 vextrins_d(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __builtin_lsx_vextrins_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vextrins_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vextrins_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_d' must be a constant integer}} + return res; +} + +void vstelm_b_idx(v16i8 _1, void *_2, int var) { + __builtin_lsx_vstelm_b(_1, _2, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + __builtin_lsx_vstelm_b(_1, _2, 1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + __builtin_lsx_vstelm_b(_1, _2, 1, var); // expected-error {{argument to '__builtin_lsx_vstelm_b' must be a constant integer}} +} + +void vstelm_h_idx(v8i16 _1, void *_2, int var) { + __builtin_lsx_vstelm_h(_1, _2, 2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + __builtin_lsx_vstelm_h(_1, _2, 2, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + __builtin_lsx_vstelm_h(_1, _2, 2, var); // expected-error {{argument to '__builtin_lsx_vstelm_h' must be a constant integer}} +} + +void vstelm_w_idx(v4i32 _1, void *_2, int var) { + __builtin_lsx_vstelm_w(_1, _2, 4, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + __builtin_lsx_vstelm_w(_1, _2, 4, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + __builtin_lsx_vstelm_w(_1, _2, 4, var); // expected-error {{argument to '__builtin_lsx_vstelm_w' must be a constant integer}} +} + +void vstelm_d_idx(v2i64 _1, void *_2, int var) { + __builtin_lsx_vstelm_d(_1, _2, 8, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} + __builtin_lsx_vstelm_d(_1, _2, 8, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + __builtin_lsx_vstelm_d(_1, _2, 8, var); // expected-error {{argument to '__builtin_lsx_vstelm_d' must be a constant integer}} +} + +void vstelm_b(v16i8 _1, void *_2, int var) { + __builtin_lsx_vstelm_b(_1, _2, -129, 1); // expected-error {{argument value -129 is outside the valid range [-128, 127]}} + __builtin_lsx_vstelm_b(_1, _2, 128, 1); // expected-error {{argument value 128 is outside the valid range [-128, 127]}} + __builtin_lsx_vstelm_b(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_b' must be a constant integer}} +} + +void vstelm_h(v8i16 _1, void *_2, int var) { + __builtin_lsx_vstelm_h(_1, _2, -258, 1); // expected-error {{argument value -258 is outside the valid range [-256, 254]}} + __builtin_lsx_vstelm_h(_1, _2, 256, 1); // expected-error {{argument value 256 is outside the valid range [-256, 254]}} + __builtin_lsx_vstelm_h(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_h' must be a constant integer}} +} + +void vstelm_w(v4i32 _1, void *_2, int var) { + __builtin_lsx_vstelm_w(_1, _2, -516, 1); // expected-error {{argument value -516 is outside the valid range [-512, 508]}} + __builtin_lsx_vstelm_w(_1, _2, 512, 1); // expected-error {{argument value 512 is outside the valid range [-512, 508]}} + __builtin_lsx_vstelm_w(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_w' must be a constant integer}} +} + +void vstelm_d(v2i64 _1, void *_2, int var) { + __builtin_lsx_vstelm_d(_1, _2, -1032, 1); // expected-error {{argument value -1032 is outside the valid range [-1024, 1016]}} + __builtin_lsx_vstelm_d(_1, _2, 1024, 1); // expected-error {{argument value 1024 is outside the valid range [-1024, 1016]}} + __builtin_lsx_vstelm_d(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_d' must be a constant integer}} +} + +v16i8 vldrepl_b(void *_1, int var) { + v16i8 res = __builtin_lsx_vldrepl_b(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + res |= __builtin_lsx_vldrepl_b(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} + res |= __builtin_lsx_vldrepl_b(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_b' must be a constant integer}} + return res; +} + +v8i16 vldrepl_h(void *_1, int var) { + v8i16 res = __builtin_lsx_vldrepl_h(_1, -2050); // expected-error {{argument value -2050 is outside the valid range [-2048, 2046]}} + res |= __builtin_lsx_vldrepl_h(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2046]}} + res |= __builtin_lsx_vldrepl_h(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_h' must be a constant integer}} + return res; +} + +v4i32 vldrepl_w(void *_1, int var) { + v4i32 res = __builtin_lsx_vldrepl_w(_1, -2052); // expected-error {{argument value -2052 is outside the valid range [-2048, 2044]}} + res |= __builtin_lsx_vldrepl_w(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2044]}} + res |= __builtin_lsx_vldrepl_w(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_w' must be a constant integer}} + return res; +} + +v2i64 vldrepl_d(void *_1, int var) { + v2i64 res = __builtin_lsx_vldrepl_d(_1, -2056); // expected-error {{argument value -2056 is outside the valid range [-2048, 2040]}} + res |= __builtin_lsx_vldrepl_d(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2040]}} + res |= __builtin_lsx_vldrepl_d(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_d' must be a constant integer}} + return res; +} + +v16i8 vrotri_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vrotri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vrotri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vrotri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_b' must be a constant integer}} + return res; +} + +v8i16 vrotri_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vrotri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vrotri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vrotri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_h' must be a constant integer}} + return res; +} + +v4i32 vrotri_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vrotri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vrotri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vrotri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_w' must be a constant integer}} + return res; +} + +v2i64 vrotri_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vrotri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vrotri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vrotri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_d' must be a constant integer}} + return res; +} + +v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __builtin_lsx_vsrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_b_h' must be a constant integer}} + return res; +} + +v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __builtin_lsx_vsrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_h_w' must be a constant integer}} + return res; +} + +v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __builtin_lsx_vsrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_w_d' must be a constant integer}} + return res; +} + +v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __builtin_lsx_vsrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vsrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vsrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_d_q' must be a constant integer}} + return res; +} + +v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __builtin_lsx_vsrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_b_h' must be a constant integer}} + return res; +} + +v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __builtin_lsx_vsrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_h_w' must be a constant integer}} + return res; +} + +v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __builtin_lsx_vsrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_w_d' must be a constant integer}} + return res; +} + +v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __builtin_lsx_vsrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vsrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vsrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_d_q' must be a constant integer}} + return res; +} + +v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __builtin_lsx_vssrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_b_h' must be a constant integer}} + return res; +} + +v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __builtin_lsx_vssrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_h_w' must be a constant integer}} + return res; +} + +v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __builtin_lsx_vssrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_w_d' must be a constant integer}} + return res; +} + +v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __builtin_lsx_vssrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_d_q' must be a constant integer}} + return res; +} + +v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2, int var) { + v16u8 res = __builtin_lsx_vssrlni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrlni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrlni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_bu_h' must be a constant integer}} + return res; +} + +v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2, int var) { + v8u16 res = __builtin_lsx_vssrlni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrlni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrlni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_hu_w' must be a constant integer}} + return res; +} + +v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2, int var) { + v4u32 res = __builtin_lsx_vssrlni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrlni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrlni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_wu_d' must be a constant integer}} + return res; +} + +v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2, int var) { + v2u64 res = __builtin_lsx_vssrlni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrlni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrlni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_du_q' must be a constant integer}} + return res; +} + +v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __builtin_lsx_vssrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_b_h' must be a constant integer}} + return res; +} + +v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __builtin_lsx_vssrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_h_w' must be a constant integer}} + return res; +} + +v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __builtin_lsx_vssrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_w_d' must be a constant integer}} + return res; +} + +v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __builtin_lsx_vssrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_d_q' must be a constant integer}} + return res; +} + +v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2, int var) { + v16u8 res = __builtin_lsx_vssrlrni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrlrni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrlrni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_bu_h' must be a constant integer}} + return res; +} + +v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2, int var) { + v8u16 res = __builtin_lsx_vssrlrni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrlrni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrlrni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_hu_w' must be a constant integer}} + return res; +} + +v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2, int var) { + v4u32 res = __builtin_lsx_vssrlrni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrlrni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrlrni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_wu_d' must be a constant integer}} + return res; +} + +v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2, int var) { + v2u64 res = __builtin_lsx_vssrlrni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrlrni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrlrni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_du_q' must be a constant integer}} + return res; +} + +v16i8 vsrani_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __builtin_lsx_vsrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_b_h' must be a constant integer}} + return res; +} + +v8i16 vsrani_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __builtin_lsx_vsrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_h_w' must be a constant integer}} + return res; +} + +v4i32 vsrani_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __builtin_lsx_vsrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_w_d' must be a constant integer}} + return res; +} + +v2i64 vsrani_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __builtin_lsx_vsrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vsrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vsrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_d_q' must be a constant integer}} + return res; +} + +v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __builtin_lsx_vsrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_b_h' must be a constant integer}} + return res; +} + +v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __builtin_lsx_vsrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_h_w' must be a constant integer}} + return res; +} + +v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __builtin_lsx_vsrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_w_d' must be a constant integer}} + return res; +} + +v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __builtin_lsx_vsrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vsrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vsrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_d_q' must be a constant integer}} + return res; +} + +v16i8 vssrani_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __builtin_lsx_vssrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_b_h' must be a constant integer}} + return res; +} + +v8i16 vssrani_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __builtin_lsx_vssrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_h_w' must be a constant integer}} + return res; +} + +v4i32 vssrani_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __builtin_lsx_vssrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_w_d' must be a constant integer}} + return res; +} + +v2i64 vssrani_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __builtin_lsx_vssrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_d_q' must be a constant integer}} + return res; +} + +v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2, int var) { + v16u8 res = __builtin_lsx_vssrani_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrani_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrani_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_bu_h' must be a constant integer}} + return res; +} + +v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2, int var) { + v8u16 res = __builtin_lsx_vssrani_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrani_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrani_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_hu_w' must be a constant integer}} + return res; +} + +v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2, int var) { + v4u32 res = __builtin_lsx_vssrani_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrani_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrani_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_wu_d' must be a constant integer}} + return res; +} + +v2u64 vssrani_du_q(v2u64 _1, v2i64 _2, int var) { + v2u64 res = __builtin_lsx_vssrani_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrani_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrani_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_du_q' must be a constant integer}} + return res; +} + +v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __builtin_lsx_vssrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_b_h' must be a constant integer}} + return res; +} + +v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __builtin_lsx_vssrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_h_w' must be a constant integer}} + return res; +} + +v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __builtin_lsx_vssrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_w_d' must be a constant integer}} + return res; +} + +v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __builtin_lsx_vssrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_d_q' must be a constant integer}} + return res; +} + +v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2, int var) { + v16u8 res = __builtin_lsx_vssrarni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrarni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrarni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_bu_h' must be a constant integer}} + return res; +} + +v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2, int var) { + v8u16 res = __builtin_lsx_vssrarni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrarni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrarni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_hu_w' must be a constant integer}} + return res; +} + +v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2, int var) { + v4u32 res = __builtin_lsx_vssrarni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrarni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrarni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_wu_d' must be a constant integer}} + return res; +} + +v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2, int var) { + v2u64 res = __builtin_lsx_vssrarni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrarni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrarni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_du_q' must be a constant integer}} + return res; +} + +v4i32 vpermi_w(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __builtin_lsx_vpermi_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vpermi_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vpermi_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vpermi_w' must be a constant integer}} + return res; +} + +v16i8 vld(void *_1, int var) { + v16i8 res = __builtin_lsx_vld(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + res |= __builtin_lsx_vld(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} + res |= __builtin_lsx_vld(_1, var); // expected-error {{argument to '__builtin_lsx_vld' must be a constant integer}} + return res; +} + +void vst(v16i8 _1, void *_2, int var) { + __builtin_lsx_vst(_1, _2, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + __builtin_lsx_vst(_1, _2, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} + __builtin_lsx_vst(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vst' must be a constant integer}} +} + +v2i64 vldi(int var) { + v2i64 res = __builtin_lsx_vldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} + res |= __builtin_lsx_vldi(4096); // expected-error {{argument value 4096 is outside the valid range [-4096, 4095]}} + res |= __builtin_lsx_vldi(var); // expected-error {{argument to '__builtin_lsx_vldi' must be a constant integer}} + return res; +} + +v16i8 vrepli_b(int var) { + v16i8 res = __builtin_lsx_vrepli_b(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __builtin_lsx_vrepli_b(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __builtin_lsx_vrepli_b(var); // expected-error {{argument to '__builtin_lsx_vrepli_b' must be a constant integer}} + return res; +} + +v2i64 vrepli_d(int var) { + v2i64 res = __builtin_lsx_vrepli_d(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __builtin_lsx_vrepli_d(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __builtin_lsx_vrepli_d(var); // expected-error {{argument to '__builtin_lsx_vrepli_d' must be a constant integer}} + return res; +} + +v8i16 vrepli_h(int var) { + v8i16 res = __builtin_lsx_vrepli_h(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __builtin_lsx_vrepli_h(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __builtin_lsx_vrepli_h(var); // expected-error {{argument to '__builtin_lsx_vrepli_h' must be a constant integer}} + return res; +} + +v4i32 vrepli_w(int var) { + v4i32 res = __builtin_lsx_vrepli_w(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __builtin_lsx_vrepli_w(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __builtin_lsx_vrepli_w(var); // expected-error {{argument to '__builtin_lsx_vrepli_w' must be a constant integer}} + return res; +} diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin.c b/clang/test/CodeGen/LoongArch/lsx/builtin.c new file mode 100644 index 0000000000000000000000000000000000000000..05a3d13a7fb9aeb9eb089f9ad49167a31e633754 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/lsx/builtin.c @@ -0,0 +1,7101 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -O2 -emit-llvm %s -o - | FileCheck %s + +typedef signed char v16i8 __attribute__ ((vector_size(16), aligned(16))); +typedef signed char v16i8_b __attribute__ ((vector_size(16), aligned(1))); +typedef unsigned char v16u8 __attribute__ ((vector_size(16), aligned(16))); +typedef unsigned char v16u8_b __attribute__ ((vector_size(16), aligned(1))); +typedef short v8i16 __attribute__ ((vector_size(16), aligned(16))); +typedef short v8i16_h __attribute__ ((vector_size(16), aligned(2))); +typedef unsigned short v8u16 __attribute__ ((vector_size(16), aligned(16))); +typedef unsigned short v8u16_h __attribute__ ((vector_size(16), aligned(2))); +typedef int v4i32 __attribute__ ((vector_size(16), aligned(16))); +typedef int v4i32_w __attribute__ ((vector_size(16), aligned(4))); +typedef unsigned int v4u32 __attribute__ ((vector_size(16), aligned(16))); +typedef unsigned int v4u32_w __attribute__ ((vector_size(16), aligned(4))); +typedef long long v2i64 __attribute__ ((vector_size(16), aligned(16))); +typedef long long v2i64_d __attribute__ ((vector_size(16), aligned(8))); +typedef unsigned long long v2u64 __attribute__ ((vector_size(16), aligned(16))); +typedef unsigned long long v2u64_d __attribute__ ((vector_size(16), aligned(8))); +typedef float v4f32 __attribute__ ((vector_size(16), aligned(16))); +typedef float v4f32_w __attribute__ ((vector_size(16), aligned(4))); +typedef double v2f64 __attribute__ ((vector_size(16), aligned(16))); +typedef double v2f64_d __attribute__ ((vector_size(16), aligned(8))); + +typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); +typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); +typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__)); + + +// CHECK-LABEL: @vsll_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsll_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsll_b(_1, _2); } +// CHECK-LABEL: @vsll_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsll_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsll_h(_1, _2); } +// CHECK-LABEL: @vsll_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsll_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsll_w(_1, _2); } +// CHECK-LABEL: @vsll_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsll_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsll_d(_1, _2); } +// CHECK-LABEL: @vslli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vslli_b(v16i8 _1) { return __builtin_lsx_vslli_b(_1, 1); } +// CHECK-LABEL: @vslli_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vslli_h(v8i16 _1) { return __builtin_lsx_vslli_h(_1, 1); } +// CHECK-LABEL: @vslli_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vslli_w(v4i32 _1) { return __builtin_lsx_vslli_w(_1, 1); } +// CHECK-LABEL: @vslli_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vslli_d(v2i64 _1) { return __builtin_lsx_vslli_d(_1, 1); } +// CHECK-LABEL: @vsra_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsra_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsra_b(_1, _2); } +// CHECK-LABEL: @vsra_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsra_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsra_h(_1, _2); } +// CHECK-LABEL: @vsra_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsra_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsra_w(_1, _2); } +// CHECK-LABEL: @vsra_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsra_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsra_d(_1, _2); } +// CHECK-LABEL: @vsrai_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vsrai_b(v16i8 _1) { return __builtin_lsx_vsrai_b(_1, 1); } +// CHECK-LABEL: @vsrai_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vsrai_h(v8i16 _1) { return __builtin_lsx_vsrai_h(_1, 1); } +// CHECK-LABEL: @vsrai_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vsrai_w(v4i32 _1) { return __builtin_lsx_vsrai_w(_1, 1); } +// CHECK-LABEL: @vsrai_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vsrai_d(v2i64 _1) { return __builtin_lsx_vsrai_d(_1, 1); } +// CHECK-LABEL: @vsrar_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrar_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsrar_b(_1, _2); +} +// CHECK-LABEL: @vsrar_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrar_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrar_h(_1, _2); +} +// CHECK-LABEL: @vsrar_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrar_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrar_w(_1, _2); +} +// CHECK-LABEL: @vsrar_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsrar_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrar_d(_1, _2); +} +// CHECK-LABEL: @vsrari_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vsrari_b(v16i8 _1) { return __builtin_lsx_vsrari_b(_1, 1); } +// CHECK-LABEL: @vsrari_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vsrari_h(v8i16 _1) { return __builtin_lsx_vsrari_h(_1, 1); } +// CHECK-LABEL: @vsrari_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vsrari_w(v4i32 _1) { return __builtin_lsx_vsrari_w(_1, 1); } +// CHECK-LABEL: @vsrari_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vsrari_d(v2i64 _1) { return __builtin_lsx_vsrari_d(_1, 1); } +// CHECK-LABEL: @vsrl_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrl_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsrl_b(_1, _2); } +// CHECK-LABEL: @vsrl_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrl_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsrl_h(_1, _2); } +// CHECK-LABEL: @vsrl_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrl_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsrl_w(_1, _2); } +// CHECK-LABEL: @vsrl_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsrl_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsrl_d(_1, _2); } +// CHECK-LABEL: @vsrli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vsrli_b(v16i8 _1) { return __builtin_lsx_vsrli_b(_1, 1); } +// CHECK-LABEL: @vsrli_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vsrli_h(v8i16 _1) { return __builtin_lsx_vsrli_h(_1, 1); } +// CHECK-LABEL: @vsrli_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vsrli_w(v4i32 _1) { return __builtin_lsx_vsrli_w(_1, 1); } +// CHECK-LABEL: @vsrli_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vsrli_d(v2i64 _1) { return __builtin_lsx_vsrli_d(_1, 1); } +// CHECK-LABEL: @vsrlr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrlr_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsrlr_b(_1, _2); +} +// CHECK-LABEL: @vsrlr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrlr_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrlr_h(_1, _2); +} +// CHECK-LABEL: @vsrlr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrlr_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrlr_w(_1, _2); +} +// CHECK-LABEL: @vsrlr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsrlr_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrlr_d(_1, _2); +} +// CHECK-LABEL: @vsrlri_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vsrlri_b(v16i8 _1) { return __builtin_lsx_vsrlri_b(_1, 1); } +// CHECK-LABEL: @vsrlri_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vsrlri_h(v8i16 _1) { return __builtin_lsx_vsrlri_h(_1, 1); } +// CHECK-LABEL: @vsrlri_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vsrlri_w(v4i32 _1) { return __builtin_lsx_vsrlri_w(_1, 1); } +// CHECK-LABEL: @vsrlri_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vsrlri_d(v2i64 _1) { return __builtin_lsx_vsrlri_d(_1, 1); } +// CHECK-LABEL: @vbitclr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vbitclr_b(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vbitclr_b(_1, _2); +} +// CHECK-LABEL: @vbitclr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vbitclr_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vbitclr_h(_1, _2); +} +// CHECK-LABEL: @vbitclr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vbitclr_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vbitclr_w(_1, _2); +} +// CHECK-LABEL: @vbitclr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vbitclr_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vbitclr_d(_1, _2); +} +// CHECK-LABEL: @vbitclri_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vbitclri_b(v16u8 _1) { return __builtin_lsx_vbitclri_b(_1, 1); } +// CHECK-LABEL: @vbitclri_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8u16 vbitclri_h(v8u16 _1) { return __builtin_lsx_vbitclri_h(_1, 1); } +// CHECK-LABEL: @vbitclri_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vbitclri_w(v4u32 _1) { return __builtin_lsx_vbitclri_w(_1, 1); } +// CHECK-LABEL: @vbitclri_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vbitclri_d(v2u64 _1) { return __builtin_lsx_vbitclri_d(_1, 1); } +// CHECK-LABEL: @vbitset_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vbitset_b(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vbitset_b(_1, _2); +} +// CHECK-LABEL: @vbitset_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vbitset_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vbitset_h(_1, _2); +} +// CHECK-LABEL: @vbitset_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vbitset_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vbitset_w(_1, _2); +} +// CHECK-LABEL: @vbitset_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vbitset_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vbitset_d(_1, _2); +} +// CHECK-LABEL: @vbitseti_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vbitseti_b(v16u8 _1) { return __builtin_lsx_vbitseti_b(_1, 1); } +// CHECK-LABEL: @vbitseti_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8u16 vbitseti_h(v8u16 _1) { return __builtin_lsx_vbitseti_h(_1, 1); } +// CHECK-LABEL: @vbitseti_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vbitseti_w(v4u32 _1) { return __builtin_lsx_vbitseti_w(_1, 1); } +// CHECK-LABEL: @vbitseti_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vbitseti_d(v2u64 _1) { return __builtin_lsx_vbitseti_d(_1, 1); } +// CHECK-LABEL: @vbitrev_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vbitrev_b(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vbitrev_b(_1, _2); +} +// CHECK-LABEL: @vbitrev_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vbitrev_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vbitrev_h(_1, _2); +} +// CHECK-LABEL: @vbitrev_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vbitrev_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vbitrev_w(_1, _2); +} +// CHECK-LABEL: @vbitrev_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vbitrev_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vbitrev_d(_1, _2); +} +// CHECK-LABEL: @vbitrevi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vbitrevi_b(v16u8 _1) { return __builtin_lsx_vbitrevi_b(_1, 1); } +// CHECK-LABEL: @vbitrevi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8u16 vbitrevi_h(v8u16 _1) { return __builtin_lsx_vbitrevi_h(_1, 1); } +// CHECK-LABEL: @vbitrevi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vbitrevi_w(v4u32 _1) { return __builtin_lsx_vbitrevi_w(_1, 1); } +// CHECK-LABEL: @vbitrevi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vbitrevi_d(v2u64 _1) { return __builtin_lsx_vbitrevi_d(_1, 1); } +// CHECK-LABEL: @vadd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vadd_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vadd_b(_1, _2); } +// CHECK-LABEL: @vadd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vadd_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vadd_h(_1, _2); } +// CHECK-LABEL: @vadd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vadd_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vadd_w(_1, _2); } +// CHECK-LABEL: @vadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vadd_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vadd_d(_1, _2); } +// CHECK-LABEL: @vaddi_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vaddi_bu(v16i8 _1) { return __builtin_lsx_vaddi_bu(_1, 1); } +// CHECK-LABEL: @vaddi_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vaddi_hu(v8i16 _1) { return __builtin_lsx_vaddi_hu(_1, 1); } +// CHECK-LABEL: @vaddi_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vaddi_wu(v4i32 _1) { return __builtin_lsx_vaddi_wu(_1, 1); } +// CHECK-LABEL: @vaddi_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vaddi_du(v2i64 _1) { return __builtin_lsx_vaddi_du(_1, 1); } +// CHECK-LABEL: @vsub_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsub_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsub_b(_1, _2); } +// CHECK-LABEL: @vsub_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsub_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsub_h(_1, _2); } +// CHECK-LABEL: @vsub_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsub_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsub_w(_1, _2); } +// CHECK-LABEL: @vsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsub_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsub_d(_1, _2); } +// CHECK-LABEL: @vsubi_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vsubi_bu(v16i8 _1) { return __builtin_lsx_vsubi_bu(_1, 1); } +// CHECK-LABEL: @vsubi_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vsubi_hu(v8i16 _1) { return __builtin_lsx_vsubi_hu(_1, 1); } +// CHECK-LABEL: @vsubi_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vsubi_wu(v4i32 _1) { return __builtin_lsx_vsubi_wu(_1, 1); } +// CHECK-LABEL: @vsubi_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vsubi_du(v2i64 _1) { return __builtin_lsx_vsubi_du(_1, 1); } +// CHECK-LABEL: @vmax_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vmax_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmax_b(_1, _2); } +// CHECK-LABEL: @vmax_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmax_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmax_h(_1, _2); } +// CHECK-LABEL: @vmax_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmax_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmax_w(_1, _2); } +// CHECK-LABEL: @vmax_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmax_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmax_d(_1, _2); } +// CHECK-LABEL: @vmaxi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vmaxi_b(v16i8 _1) { return __builtin_lsx_vmaxi_b(_1, 1); } +// CHECK-LABEL: @vmaxi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vmaxi_h(v8i16 _1) { return __builtin_lsx_vmaxi_h(_1, 1); } +// CHECK-LABEL: @vmaxi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vmaxi_w(v4i32 _1) { return __builtin_lsx_vmaxi_w(_1, 1); } +// CHECK-LABEL: @vmaxi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vmaxi_d(v2i64 _1) { return __builtin_lsx_vmaxi_d(_1, 1); } +// CHECK-LABEL: @vmax_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vmax_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vmax_bu(_1, _2); +} +// CHECK-LABEL: @vmax_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vmax_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vmax_hu(_1, _2); +} +// CHECK-LABEL: @vmax_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vmax_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vmax_wu(_1, _2); +} +// CHECK-LABEL: @vmax_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vmax_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vmax_du(_1, _2); +} +// CHECK-LABEL: @vmaxi_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vmaxi_bu(v16u8 _1) { return __builtin_lsx_vmaxi_bu(_1, 1); } +// CHECK-LABEL: @vmaxi_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8u16 vmaxi_hu(v8u16 _1) { return __builtin_lsx_vmaxi_hu(_1, 1); } +// CHECK-LABEL: @vmaxi_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vmaxi_wu(v4u32 _1) { return __builtin_lsx_vmaxi_wu(_1, 1); } +// CHECK-LABEL: @vmaxi_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vmaxi_du(v2u64 _1) { return __builtin_lsx_vmaxi_du(_1, 1); } +// CHECK-LABEL: @vmin_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vmin_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmin_b(_1, _2); } +// CHECK-LABEL: @vmin_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmin_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmin_h(_1, _2); } +// CHECK-LABEL: @vmin_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmin_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmin_w(_1, _2); } +// CHECK-LABEL: @vmin_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmin_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmin_d(_1, _2); } +// CHECK-LABEL: @vmini_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vmini_b(v16i8 _1) { return __builtin_lsx_vmini_b(_1, 1); } +// CHECK-LABEL: @vmini_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vmini_h(v8i16 _1) { return __builtin_lsx_vmini_h(_1, 1); } +// CHECK-LABEL: @vmini_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vmini_w(v4i32 _1) { return __builtin_lsx_vmini_w(_1, 1); } +// CHECK-LABEL: @vmini_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vmini_d(v2i64 _1) { return __builtin_lsx_vmini_d(_1, 1); } +// CHECK-LABEL: @vmin_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vmin_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vmin_bu(_1, _2); +} +// CHECK-LABEL: @vmin_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vmin_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vmin_hu(_1, _2); +} +// CHECK-LABEL: @vmin_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vmin_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vmin_wu(_1, _2); +} +// CHECK-LABEL: @vmin_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vmin_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vmin_du(_1, _2); +} +// CHECK-LABEL: @vmini_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vmini_bu(v16u8 _1) { return __builtin_lsx_vmini_bu(_1, 1); } +// CHECK-LABEL: @vmini_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8u16 vmini_hu(v8u16 _1) { return __builtin_lsx_vmini_hu(_1, 1); } +// CHECK-LABEL: @vmini_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vmini_wu(v4u32 _1) { return __builtin_lsx_vmini_wu(_1, 1); } +// CHECK-LABEL: @vmini_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vmini_du(v2u64 _1) { return __builtin_lsx_vmini_du(_1, 1); } +// CHECK-LABEL: @vseq_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vseq_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vseq_b(_1, _2); } +// CHECK-LABEL: @vseq_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vseq_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vseq_h(_1, _2); } +// CHECK-LABEL: @vseq_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vseq_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vseq_w(_1, _2); } +// CHECK-LABEL: @vseq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vseq_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vseq_d(_1, _2); } +// CHECK-LABEL: @vseqi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vseqi_b(v16i8 _1) { return __builtin_lsx_vseqi_b(_1, 1); } +// CHECK-LABEL: @vseqi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vseqi_h(v8i16 _1) { return __builtin_lsx_vseqi_h(_1, 1); } +// CHECK-LABEL: @vseqi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vseqi_w(v4i32 _1) { return __builtin_lsx_vseqi_w(_1, 1); } +// CHECK-LABEL: @vseqi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vseqi_d(v2i64 _1) { return __builtin_lsx_vseqi_d(_1, 1); } +// CHECK-LABEL: @vslti_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vslti_b(v16i8 _1) { return __builtin_lsx_vslti_b(_1, 1); } +// CHECK-LABEL: @vslt_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vslt_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vslt_b(_1, _2); } +// CHECK-LABEL: @vslt_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vslt_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vslt_h(_1, _2); } +// CHECK-LABEL: @vslt_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vslt_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vslt_w(_1, _2); } +// CHECK-LABEL: @vslt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vslt_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vslt_d(_1, _2); } +// CHECK-LABEL: @vslti_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vslti_h(v8i16 _1) { return __builtin_lsx_vslti_h(_1, 1); } +// CHECK-LABEL: @vslti_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vslti_w(v4i32 _1) { return __builtin_lsx_vslti_w(_1, 1); } +// CHECK-LABEL: @vslti_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vslti_d(v2i64 _1) { return __builtin_lsx_vslti_d(_1, 1); } +// CHECK-LABEL: @vslt_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vslt_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vslt_bu(_1, _2); +} +// CHECK-LABEL: @vslt_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vslt_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vslt_hu(_1, _2); +} +// CHECK-LABEL: @vslt_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vslt_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vslt_wu(_1, _2); +} +// CHECK-LABEL: @vslt_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vslt_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vslt_du(_1, _2); +} +// CHECK-LABEL: @vslti_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vslti_bu(v16u8 _1) { return __builtin_lsx_vslti_bu(_1, 1); } +// CHECK-LABEL: @vslti_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vslti_hu(v8u16 _1) { return __builtin_lsx_vslti_hu(_1, 1); } +// CHECK-LABEL: @vslti_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vslti_wu(v4u32 _1) { return __builtin_lsx_vslti_wu(_1, 1); } +// CHECK-LABEL: @vslti_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vslti_du(v2u64 _1) { return __builtin_lsx_vslti_du(_1, 1); } +// CHECK-LABEL: @vsle_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsle_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsle_b(_1, _2); } +// CHECK-LABEL: @vsle_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsle_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsle_h(_1, _2); } +// CHECK-LABEL: @vsle_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsle_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsle_w(_1, _2); } +// CHECK-LABEL: @vsle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsle_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsle_d(_1, _2); } +// CHECK-LABEL: @vslei_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vslei_b(v16i8 _1) { return __builtin_lsx_vslei_b(_1, 1); } +// CHECK-LABEL: @vslei_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vslei_h(v8i16 _1) { return __builtin_lsx_vslei_h(_1, 1); } +// CHECK-LABEL: @vslei_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vslei_w(v4i32 _1) { return __builtin_lsx_vslei_w(_1, 1); } +// CHECK-LABEL: @vslei_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vslei_d(v2i64 _1) { return __builtin_lsx_vslei_d(_1, 1); } +// CHECK-LABEL: @vsle_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsle_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vsle_bu(_1, _2); +} +// CHECK-LABEL: @vsle_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsle_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vsle_hu(_1, _2); +} +// CHECK-LABEL: @vsle_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsle_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vsle_wu(_1, _2); +} +// CHECK-LABEL: @vsle_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsle_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vsle_du(_1, _2); +} +// CHECK-LABEL: @vslei_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vslei_bu(v16u8 _1) { return __builtin_lsx_vslei_bu(_1, 1); } +// CHECK-LABEL: @vslei_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vslei_hu(v8u16 _1) { return __builtin_lsx_vslei_hu(_1, 1); } +// CHECK-LABEL: @vslei_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vslei_wu(v4u32 _1) { return __builtin_lsx_vslei_wu(_1, 1); } +// CHECK-LABEL: @vslei_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vslei_du(v2u64 _1) { return __builtin_lsx_vslei_du(_1, 1); } +// CHECK-LABEL: @vsat_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vsat_b(v16i8 _1) { return __builtin_lsx_vsat_b(_1, 1); } +// CHECK-LABEL: @vsat_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vsat_h(v8i16 _1) { return __builtin_lsx_vsat_h(_1, 1); } +// CHECK-LABEL: @vsat_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vsat_w(v4i32 _1) { return __builtin_lsx_vsat_w(_1, 1); } +// CHECK-LABEL: @vsat_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vsat_d(v2i64 _1) { return __builtin_lsx_vsat_d(_1, 1); } +// CHECK-LABEL: @vsat_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vsat_bu(v16u8 _1) { return __builtin_lsx_vsat_bu(_1, 1); } +// CHECK-LABEL: @vsat_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8u16 vsat_hu(v8u16 _1) { return __builtin_lsx_vsat_hu(_1, 1); } +// CHECK-LABEL: @vsat_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vsat_wu(v4u32 _1) { return __builtin_lsx_vsat_wu(_1, 1); } +// CHECK-LABEL: @vsat_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vsat_du(v2u64 _1) { return __builtin_lsx_vsat_du(_1, 1); } +// CHECK-LABEL: @vadda_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vadda_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vadda_b(_1, _2); +} +// CHECK-LABEL: @vadda_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vadda_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vadda_h(_1, _2); +} +// CHECK-LABEL: @vadda_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vadda_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vadda_w(_1, _2); +} +// CHECK-LABEL: @vadda_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vadda_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vadda_d(_1, _2); +} +// CHECK-LABEL: @vsadd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsadd_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsadd_b(_1, _2); +} +// CHECK-LABEL: @vsadd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsadd_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsadd_h(_1, _2); +} +// CHECK-LABEL: @vsadd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsadd_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsadd_w(_1, _2); +} +// CHECK-LABEL: @vsadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsadd_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsadd_d(_1, _2); +} +// CHECK-LABEL: @vsadd_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vsadd_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vsadd_bu(_1, _2); +} +// CHECK-LABEL: @vsadd_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vsadd_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vsadd_hu(_1, _2); +} +// CHECK-LABEL: @vsadd_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vsadd_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vsadd_wu(_1, _2); +} +// CHECK-LABEL: @vsadd_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vsadd_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vsadd_du(_1, _2); +} +// CHECK-LABEL: @vavg_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vavg_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vavg_b(_1, _2); } +// CHECK-LABEL: @vavg_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vavg_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vavg_h(_1, _2); } +// CHECK-LABEL: @vavg_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vavg_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vavg_w(_1, _2); } +// CHECK-LABEL: @vavg_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vavg_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vavg_d(_1, _2); } +// CHECK-LABEL: @vavg_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vavg_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vavg_bu(_1, _2); +} +// CHECK-LABEL: @vavg_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vavg_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vavg_hu(_1, _2); +} +// CHECK-LABEL: @vavg_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vavg_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vavg_wu(_1, _2); +} +// CHECK-LABEL: @vavg_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vavg_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vavg_du(_1, _2); +} +// CHECK-LABEL: @vavgr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vavgr_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vavgr_b(_1, _2); +} +// CHECK-LABEL: @vavgr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vavgr_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vavgr_h(_1, _2); +} +// CHECK-LABEL: @vavgr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vavgr_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vavgr_w(_1, _2); +} +// CHECK-LABEL: @vavgr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vavgr_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vavgr_d(_1, _2); +} +// CHECK-LABEL: @vavgr_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vavgr_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vavgr_bu(_1, _2); +} +// CHECK-LABEL: @vavgr_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vavgr_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vavgr_hu(_1, _2); +} +// CHECK-LABEL: @vavgr_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vavgr_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vavgr_wu(_1, _2); +} +// CHECK-LABEL: @vavgr_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vavgr_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vavgr_du(_1, _2); +} +// CHECK-LABEL: @vssub_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vssub_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vssub_b(_1, _2); +} +// CHECK-LABEL: @vssub_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vssub_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssub_h(_1, _2); +} +// CHECK-LABEL: @vssub_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vssub_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssub_w(_1, _2); +} +// CHECK-LABEL: @vssub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vssub_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssub_d(_1, _2); +} +// CHECK-LABEL: @vssub_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vssub_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vssub_bu(_1, _2); +} +// CHECK-LABEL: @vssub_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vssub_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vssub_hu(_1, _2); +} +// CHECK-LABEL: @vssub_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vssub_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vssub_wu(_1, _2); +} +// CHECK-LABEL: @vssub_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vssub_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vssub_du(_1, _2); +} +// CHECK-LABEL: @vabsd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vabsd_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vabsd_b(_1, _2); +} +// CHECK-LABEL: @vabsd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vabsd_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vabsd_h(_1, _2); +} +// CHECK-LABEL: @vabsd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vabsd_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vabsd_w(_1, _2); +} +// CHECK-LABEL: @vabsd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vabsd_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vabsd_d(_1, _2); +} +// CHECK-LABEL: @vabsd_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vabsd_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vabsd_bu(_1, _2); +} +// CHECK-LABEL: @vabsd_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vabsd_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vabsd_hu(_1, _2); +} +// CHECK-LABEL: @vabsd_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vabsd_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vabsd_wu(_1, _2); +} +// CHECK-LABEL: @vabsd_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vabsd_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vabsd_du(_1, _2); +} +// CHECK-LABEL: @vmul_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vmul_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmul_b(_1, _2); } +// CHECK-LABEL: @vmul_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmul_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmul_h(_1, _2); } +// CHECK-LABEL: @vmul_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmul_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmul_w(_1, _2); } +// CHECK-LABEL: @vmul_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmul_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmul_d(_1, _2); } +// CHECK-LABEL: @vmadd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v16i8 vmadd_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __builtin_lsx_vmadd_b(_1, _2, _3); +} +// CHECK-LABEL: @vmadd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8i16 vmadd_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __builtin_lsx_vmadd_h(_1, _2, _3); +} +// CHECK-LABEL: @vmadd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4i32 vmadd_w(v4i32 _1, v4i32 _2, v4i32 _3) { + return __builtin_lsx_vmadd_w(_1, _2, _3); +} +// CHECK-LABEL: @vmadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmadd_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __builtin_lsx_vmadd_d(_1, _2, _3); +} +// CHECK-LABEL: @vmsub_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v16i8 vmsub_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __builtin_lsx_vmsub_b(_1, _2, _3); +} +// CHECK-LABEL: @vmsub_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8i16 vmsub_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __builtin_lsx_vmsub_h(_1, _2, _3); +} +// CHECK-LABEL: @vmsub_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4i32 vmsub_w(v4i32 _1, v4i32 _2, v4i32 _3) { + return __builtin_lsx_vmsub_w(_1, _2, _3); +} +// CHECK-LABEL: @vmsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmsub_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __builtin_lsx_vmsub_d(_1, _2, _3); +} +// CHECK-LABEL: @vdiv_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vdiv_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vdiv_b(_1, _2); } +// CHECK-LABEL: @vdiv_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vdiv_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vdiv_h(_1, _2); } +// CHECK-LABEL: @vdiv_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vdiv_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vdiv_w(_1, _2); } +// CHECK-LABEL: @vdiv_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vdiv_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vdiv_d(_1, _2); } +// CHECK-LABEL: @vdiv_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vdiv_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vdiv_bu(_1, _2); +} +// CHECK-LABEL: @vdiv_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vdiv_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vdiv_hu(_1, _2); +} +// CHECK-LABEL: @vdiv_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vdiv_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vdiv_wu(_1, _2); +} +// CHECK-LABEL: @vdiv_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vdiv_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vdiv_du(_1, _2); +} +// CHECK-LABEL: @vhaddw_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vhaddw_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vhaddw_h_b(_1, _2); +} +// CHECK-LABEL: @vhaddw_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vhaddw_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vhaddw_w_h(_1, _2); +} +// CHECK-LABEL: @vhaddw_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vhaddw_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vhaddw_d_w(_1, _2); +} +// CHECK-LABEL: @vhaddw_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vhaddw_hu_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vhaddw_hu_bu(_1, _2); +} +// CHECK-LABEL: @vhaddw_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vhaddw_wu_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vhaddw_wu_hu(_1, _2); +} +// CHECK-LABEL: @vhaddw_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vhaddw_du_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vhaddw_du_wu(_1, _2); +} +// CHECK-LABEL: @vhsubw_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vhsubw_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vhsubw_h_b(_1, _2); +} +// CHECK-LABEL: @vhsubw_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vhsubw_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vhsubw_w_h(_1, _2); +} +// CHECK-LABEL: @vhsubw_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vhsubw_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vhsubw_d_w(_1, _2); +} +// CHECK-LABEL: @vhsubw_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vhsubw_hu_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vhsubw_hu_bu(_1, _2); +} +// CHECK-LABEL: @vhsubw_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vhsubw_wu_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vhsubw_wu_hu(_1, _2); +} +// CHECK-LABEL: @vhsubw_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vhsubw_du_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vhsubw_du_wu(_1, _2); +} +// CHECK-LABEL: @vmod_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vmod_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmod_b(_1, _2); } +// CHECK-LABEL: @vmod_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmod_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmod_h(_1, _2); } +// CHECK-LABEL: @vmod_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmod_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmod_w(_1, _2); } +// CHECK-LABEL: @vmod_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmod_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmod_d(_1, _2); } +// CHECK-LABEL: @vmod_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vmod_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vmod_bu(_1, _2); +} +// CHECK-LABEL: @vmod_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vmod_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vmod_hu(_1, _2); +} +// CHECK-LABEL: @vmod_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vmod_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vmod_wu(_1, _2); +} +// CHECK-LABEL: @vmod_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vmod_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vmod_du(_1, _2); +} +// CHECK-LABEL: @vreplve_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> [[TMP0]], i32 [[_2:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vreplve_b(v16i8 _1, int _2) { + return __builtin_lsx_vreplve_b(_1, _2); +} +// CHECK-LABEL: @vreplve_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> [[TMP0]], i32 [[_2:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vreplve_h(v8i16 _1, int _2) { + return __builtin_lsx_vreplve_h(_1, _2); +} +// CHECK-LABEL: @vreplve_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> [[TMP0]], i32 [[_2:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vreplve_w(v4i32 _1, int _2) { + return __builtin_lsx_vreplve_w(_1, _2); +} +// CHECK-LABEL: @vreplve_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> [[TMP0]], i32 [[_2:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vreplve_d(v2i64 _1, int _2) { + return __builtin_lsx_vreplve_d(_1, _2); +} +// CHECK-LABEL: @vreplvei_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vreplvei_b(v16i8 _1) { return __builtin_lsx_vreplvei_b(_1, 1); } +// CHECK-LABEL: @vreplvei_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vreplvei_h(v8i16 _1) { return __builtin_lsx_vreplvei_h(_1, 1); } +// CHECK-LABEL: @vreplvei_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vreplvei_w(v4i32 _1) { return __builtin_lsx_vreplvei_w(_1, 1); } +// CHECK-LABEL: @vreplvei_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vreplvei_d(v2i64 _1) { return __builtin_lsx_vreplvei_d(_1, 1); } +// CHECK-LABEL: @vpickev_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vpickev_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vpickev_b(_1, _2); +} +// CHECK-LABEL: @vpickev_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vpickev_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vpickev_h(_1, _2); +} +// CHECK-LABEL: @vpickev_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vpickev_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vpickev_w(_1, _2); +} +// CHECK-LABEL: @vpickev_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vpickev_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vpickev_d(_1, _2); +} +// CHECK-LABEL: @vpickod_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vpickod_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vpickod_b(_1, _2); +} +// CHECK-LABEL: @vpickod_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vpickod_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vpickod_h(_1, _2); +} +// CHECK-LABEL: @vpickod_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vpickod_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vpickod_w(_1, _2); +} +// CHECK-LABEL: @vpickod_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vpickod_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vpickod_d(_1, _2); +} +// CHECK-LABEL: @vilvh_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vilvh_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vilvh_b(_1, _2); +} +// CHECK-LABEL: @vilvh_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vilvh_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vilvh_h(_1, _2); +} +// CHECK-LABEL: @vilvh_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vilvh_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vilvh_w(_1, _2); +} +// CHECK-LABEL: @vilvh_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vilvh_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vilvh_d(_1, _2); +} +// CHECK-LABEL: @vilvl_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vilvl_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vilvl_b(_1, _2); +} +// CHECK-LABEL: @vilvl_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vilvl_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vilvl_h(_1, _2); +} +// CHECK-LABEL: @vilvl_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vilvl_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vilvl_w(_1, _2); +} +// CHECK-LABEL: @vilvl_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vilvl_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vilvl_d(_1, _2); +} +// CHECK-LABEL: @vpackev_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vpackev_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vpackev_b(_1, _2); +} +// CHECK-LABEL: @vpackev_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vpackev_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vpackev_h(_1, _2); +} +// CHECK-LABEL: @vpackev_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vpackev_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vpackev_w(_1, _2); +} +// CHECK-LABEL: @vpackev_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vpackev_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vpackev_d(_1, _2); +} +// CHECK-LABEL: @vpackod_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vpackod_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vpackod_b(_1, _2); +} +// CHECK-LABEL: @vpackod_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vpackod_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vpackod_h(_1, _2); +} +// CHECK-LABEL: @vpackod_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vpackod_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vpackod_w(_1, _2); +} +// CHECK-LABEL: @vpackod_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vpackod_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vpackod_d(_1, _2); +} +// CHECK-LABEL: @vshuf_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8i16 vshuf_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __builtin_lsx_vshuf_h(_1, _2, _3); +} +// CHECK-LABEL: @vshuf_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4i32 vshuf_w(v4i32 _1, v4i32 _2, v4i32 _3) { + return __builtin_lsx_vshuf_w(_1, _2, _3); +} +// CHECK-LABEL: @vshuf_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vshuf_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __builtin_lsx_vshuf_d(_1, _2, _3); +} +// CHECK-LABEL: @vand_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vand_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vand_v(_1, _2); } +// CHECK-LABEL: @vandi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vandi_b(v16u8 _1) { return __builtin_lsx_vandi_b(_1, 1); } +// CHECK-LABEL: @vor_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vor_v(_1, _2); } +// CHECK-LABEL: @vori_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vori_b(v16u8 _1) { return __builtin_lsx_vori_b(_1, 1); } +// CHECK-LABEL: @vnor_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vnor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vnor_v(_1, _2); } +// CHECK-LABEL: @vnori_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vnori_b(v16u8 _1) { return __builtin_lsx_vnori_b(_1, 1); } +// CHECK-LABEL: @vxor_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vxor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vxor_v(_1, _2); } +// CHECK-LABEL: @vxori_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vxori_b(v16u8 _1) { return __builtin_lsx_vxori_b(_1, 1); } +// CHECK-LABEL: @vbitsel_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v16u8 vbitsel_v(v16u8 _1, v16u8 _2, v16u8 _3) { + return __builtin_lsx_vbitsel_v(_1, _2, _3); +} +// CHECK-LABEL: @vbitseli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vbitseli_b(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vbitseli_b(_1, _2, 1); +} +// CHECK-LABEL: @vshuf4i_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vshuf4i_b(v16i8 _1) { return __builtin_lsx_vshuf4i_b(_1, 1); } +// CHECK-LABEL: @vshuf4i_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vshuf4i_h(v8i16 _1) { return __builtin_lsx_vshuf4i_h(_1, 1); } +// CHECK-LABEL: @vshuf4i_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vshuf4i_w(v4i32 _1) { return __builtin_lsx_vshuf4i_w(_1, 1); } +// CHECK-LABEL: @vreplgr2vr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v16i8 vreplgr2vr_b(int _1) { return __builtin_lsx_vreplgr2vr_b(_1); } +// CHECK-LABEL: @vreplgr2vr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v8i16 vreplgr2vr_h(int _1) { return __builtin_lsx_vreplgr2vr_h(_1); } +// CHECK-LABEL: @vreplgr2vr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v4i32 vreplgr2vr_w(int _1) { return __builtin_lsx_vreplgr2vr_w(_1); } +// CHECK-LABEL: @vreplgr2vr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v2i64 vreplgr2vr_d(long _1) { return __builtin_lsx_vreplgr2vr_d(_1); } +// CHECK-LABEL: @vpcnt_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vpcnt_b(v16i8 _1) { return __builtin_lsx_vpcnt_b(_1); } +// CHECK-LABEL: @vpcnt_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vpcnt_h(v8i16 _1) { return __builtin_lsx_vpcnt_h(_1); } +// CHECK-LABEL: @vpcnt_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vpcnt_w(v4i32 _1) { return __builtin_lsx_vpcnt_w(_1); } +// CHECK-LABEL: @vpcnt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vpcnt_d(v2i64 _1) { return __builtin_lsx_vpcnt_d(_1); } +// CHECK-LABEL: @vclo_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vclo_b(v16i8 _1) { return __builtin_lsx_vclo_b(_1); } +// CHECK-LABEL: @vclo_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vclo_h(v8i16 _1) { return __builtin_lsx_vclo_h(_1); } +// CHECK-LABEL: @vclo_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vclo_w(v4i32 _1) { return __builtin_lsx_vclo_w(_1); } +// CHECK-LABEL: @vclo_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vclo_d(v2i64 _1) { return __builtin_lsx_vclo_d(_1); } +// CHECK-LABEL: @vclz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vclz_b(v16i8 _1) { return __builtin_lsx_vclz_b(_1); } +// CHECK-LABEL: @vclz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vclz_h(v8i16 _1) { return __builtin_lsx_vclz_h(_1); } +// CHECK-LABEL: @vclz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vclz_w(v4i32 _1) { return __builtin_lsx_vclz_w(_1); } +// CHECK-LABEL: @vclz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vclz_d(v2i64 _1) { return __builtin_lsx_vclz_d(_1); } +// CHECK-LABEL: @vpickve2gr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int vpickve2gr_b(v16i8 _1) { return __builtin_lsx_vpickve2gr_b(_1, 1); } +// CHECK-LABEL: @vpickve2gr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int vpickve2gr_h(v8i16 _1) { return __builtin_lsx_vpickve2gr_h(_1, 1); } +// CHECK-LABEL: @vpickve2gr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int vpickve2gr_w(v4i32 _1) { return __builtin_lsx_vpickve2gr_w(_1, 1); } +// CHECK-LABEL: @vpickve2gr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: ret i64 [[TMP1]] +// +long vpickve2gr_d(v2i64 _1) { return __builtin_lsx_vpickve2gr_d(_1, 1); } +// CHECK-LABEL: @vpickve2gr_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: ret i32 [[TMP1]] +// +unsigned int vpickve2gr_bu(v16i8 _1) { + return __builtin_lsx_vpickve2gr_bu(_1, 1); +} +// CHECK-LABEL: @vpickve2gr_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: ret i32 [[TMP1]] +// +unsigned int vpickve2gr_hu(v8i16 _1) { + return __builtin_lsx_vpickve2gr_hu(_1, 1); +} +// CHECK-LABEL: @vpickve2gr_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: ret i32 [[TMP1]] +// +unsigned int vpickve2gr_wu(v4i32 _1) { + return __builtin_lsx_vpickve2gr_wu(_1, 1); +} +// CHECK-LABEL: @vpickve2gr_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: ret i64 [[TMP1]] +// +unsigned long int vpickve2gr_du(v2i64 _1) { + return __builtin_lsx_vpickve2gr_du(_1, 1); +} +// CHECK-LABEL: @vinsgr2vr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> [[TMP0]], i32 1, i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vinsgr2vr_b(v16i8 _1) { + return __builtin_lsx_vinsgr2vr_b(_1, 1, 1); +} +// CHECK-LABEL: @vinsgr2vr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> [[TMP0]], i32 1, i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vinsgr2vr_h(v8i16 _1) { + return __builtin_lsx_vinsgr2vr_h(_1, 1, 1); +} +// CHECK-LABEL: @vinsgr2vr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> [[TMP0]], i32 1, i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vinsgr2vr_w(v4i32 _1) { + return __builtin_lsx_vinsgr2vr_w(_1, 1, 1); +} +// CHECK-LABEL: @vinsgr2vr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> [[TMP0]], i64 1, i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vinsgr2vr_d(v2i64 _1) { + return __builtin_lsx_vinsgr2vr_d(_1, 1, 1); +} +// CHECK-LABEL: @vfadd_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vfadd_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfadd_s(_1, _2); +} +// CHECK-LABEL: @vfadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2f64 vfadd_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfadd_d(_1, _2); +} +// CHECK-LABEL: @vfsub_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vfsub_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfsub_s(_1, _2); +} +// CHECK-LABEL: @vfsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2f64 vfsub_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfsub_d(_1, _2); +} +// CHECK-LABEL: @vfmul_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vfmul_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfmul_s(_1, _2); +} +// CHECK-LABEL: @vfmul_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2f64 vfmul_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfmul_d(_1, _2); +} +// CHECK-LABEL: @vfdiv_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vfdiv_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfdiv_s(_1, _2); +} +// CHECK-LABEL: @vfdiv_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2f64 vfdiv_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfdiv_d(_1, _2); +} +// CHECK-LABEL: @vfcvt_h_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vfcvt_h_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcvt_h_s(_1, _2); +} +// CHECK-LABEL: @vfcvt_s_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vfcvt_s_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcvt_s_d(_1, _2); +} +// CHECK-LABEL: @vfmin_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vfmin_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfmin_s(_1, _2); +} +// CHECK-LABEL: @vfmin_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2f64 vfmin_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfmin_d(_1, _2); +} +// CHECK-LABEL: @vfmina_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vfmina_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfmina_s(_1, _2); +} +// CHECK-LABEL: @vfmina_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2f64 vfmina_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfmina_d(_1, _2); +} +// CHECK-LABEL: @vfmax_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vfmax_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfmax_s(_1, _2); +} +// CHECK-LABEL: @vfmax_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2f64 vfmax_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfmax_d(_1, _2); +} +// CHECK-LABEL: @vfmaxa_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vfmaxa_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfmaxa_s(_1, _2); +} +// CHECK-LABEL: @vfmaxa_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2f64 vfmaxa_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfmaxa_d(_1, _2); +} +// CHECK-LABEL: @vfclass_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vfclass_s(v4f32 _1) { return __builtin_lsx_vfclass_s(_1); } +// CHECK-LABEL: @vfclass_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vfclass_d(v2f64 _1) { return __builtin_lsx_vfclass_d(_1); } +// CHECK-LABEL: @vfsqrt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vfsqrt_s(v4f32 _1) { return __builtin_lsx_vfsqrt_s(_1); } +// CHECK-LABEL: @vfsqrt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vfsqrt_d(v2f64 _1) { return __builtin_lsx_vfsqrt_d(_1); } +// CHECK-LABEL: @vfrecip_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vfrecip_s(v4f32 _1) { return __builtin_lsx_vfrecip_s(_1); } +// CHECK-LABEL: @vfrecip_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vfrecip_d(v2f64 _1) { return __builtin_lsx_vfrecip_d(_1); } +// CHECK-LABEL: @vfrint_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vfrint_s(v4f32 _1) { return __builtin_lsx_vfrint_s(_1); } +// CHECK-LABEL: @vfrint_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vfrint_d(v2f64 _1) { return __builtin_lsx_vfrint_d(_1); } +// CHECK-LABEL: @vfrsqrt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vfrsqrt_s(v4f32 _1) { return __builtin_lsx_vfrsqrt_s(_1); } +// CHECK-LABEL: @vfrsqrt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vfrsqrt_d(v2f64 _1) { return __builtin_lsx_vfrsqrt_d(_1); } +// CHECK-LABEL: @vflogb_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vflogb_s(v4f32 _1) { return __builtin_lsx_vflogb_s(_1); } +// CHECK-LABEL: @vflogb_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vflogb_d(v2f64 _1) { return __builtin_lsx_vflogb_d(_1); } +// CHECK-LABEL: @vfcvth_s_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vfcvth_s_h(v8i16 _1) { return __builtin_lsx_vfcvth_s_h(_1); } +// CHECK-LABEL: @vfcvth_d_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vfcvth_d_s(v4f32 _1) { return __builtin_lsx_vfcvth_d_s(_1); } +// CHECK-LABEL: @vfcvtl_s_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vfcvtl_s_h(v8i16 _1) { return __builtin_lsx_vfcvtl_s_h(_1); } +// CHECK-LABEL: @vfcvtl_d_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vfcvtl_d_s(v4f32 _1) { return __builtin_lsx_vfcvtl_d_s(_1); } +// CHECK-LABEL: @vftint_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vftint_w_s(v4f32 _1) { return __builtin_lsx_vftint_w_s(_1); } +// CHECK-LABEL: @vftint_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftint_l_d(v2f64 _1) { return __builtin_lsx_vftint_l_d(_1); } +// CHECK-LABEL: @vftint_wu_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vftint_wu_s(v4f32 _1) { return __builtin_lsx_vftint_wu_s(_1); } +// CHECK-LABEL: @vftint_lu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vftint_lu_d(v2f64 _1) { return __builtin_lsx_vftint_lu_d(_1); } +// CHECK-LABEL: @vftintrz_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vftintrz_w_s(v4f32 _1) { return __builtin_lsx_vftintrz_w_s(_1); } +// CHECK-LABEL: @vftintrz_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrz_l_d(v2f64 _1) { return __builtin_lsx_vftintrz_l_d(_1); } +// CHECK-LABEL: @vftintrz_wu_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vftintrz_wu_s(v4f32 _1) { return __builtin_lsx_vftintrz_wu_s(_1); } +// CHECK-LABEL: @vftintrz_lu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vftintrz_lu_d(v2f64 _1) { return __builtin_lsx_vftintrz_lu_d(_1); } +// CHECK-LABEL: @vffint_s_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vffint_s_w(v4i32 _1) { return __builtin_lsx_vffint_s_w(_1); } +// CHECK-LABEL: @vffint_d_l( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vffint_d_l(v2i64 _1) { return __builtin_lsx_vffint_d_l(_1); } +// CHECK-LABEL: @vffint_s_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vffint_s_wu(v4u32 _1) { return __builtin_lsx_vffint_s_wu(_1); } +// CHECK-LABEL: @vffint_d_lu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vffint_d_lu(v2u64 _1) { return __builtin_lsx_vffint_d_lu(_1); } +// CHECK-LABEL: @vandn_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vandn_v(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vandn_v(_1, _2); +} +// CHECK-LABEL: @vneg_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vneg_b(v16i8 _1) { return __builtin_lsx_vneg_b(_1); } +// CHECK-LABEL: @vneg_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vneg_h(v8i16 _1) { return __builtin_lsx_vneg_h(_1); } +// CHECK-LABEL: @vneg_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vneg_w(v4i32 _1) { return __builtin_lsx_vneg_w(_1); } +// CHECK-LABEL: @vneg_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vneg_d(v2i64 _1) { return __builtin_lsx_vneg_d(_1); } +// CHECK-LABEL: @vmuh_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vmuh_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmuh_b(_1, _2); } +// CHECK-LABEL: @vmuh_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmuh_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmuh_h(_1, _2); } +// CHECK-LABEL: @vmuh_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmuh_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmuh_w(_1, _2); } +// CHECK-LABEL: @vmuh_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmuh_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmuh_d(_1, _2); } +// CHECK-LABEL: @vmuh_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vmuh_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vmuh_bu(_1, _2); +} +// CHECK-LABEL: @vmuh_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vmuh_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vmuh_hu(_1, _2); +} +// CHECK-LABEL: @vmuh_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vmuh_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vmuh_wu(_1, _2); +} +// CHECK-LABEL: @vmuh_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vmuh_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vmuh_du(_1, _2); +} +// CHECK-LABEL: @vsllwil_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vsllwil_h_b(v16i8 _1) { return __builtin_lsx_vsllwil_h_b(_1, 1); } +// CHECK-LABEL: @vsllwil_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vsllwil_w_h(v8i16 _1) { return __builtin_lsx_vsllwil_w_h(_1, 1); } +// CHECK-LABEL: @vsllwil_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vsllwil_d_w(v4i32 _1) { return __builtin_lsx_vsllwil_d_w(_1, 1); } +// CHECK-LABEL: @vsllwil_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8u16 vsllwil_hu_bu(v16u8 _1) { + return __builtin_lsx_vsllwil_hu_bu(_1, 1); +} +// CHECK-LABEL: @vsllwil_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vsllwil_wu_hu(v8u16 _1) { + return __builtin_lsx_vsllwil_wu_hu(_1, 1); +} +// CHECK-LABEL: @vsllwil_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vsllwil_du_wu(v4u32 _1) { + return __builtin_lsx_vsllwil_du_wu(_1, 1); +} +// CHECK-LABEL: @vsran_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsran_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsran_b_h(_1, _2); +} +// CHECK-LABEL: @vsran_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsran_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsran_h_w(_1, _2); +} +// CHECK-LABEL: @vsran_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsran_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsran_w_d(_1, _2); +} +// CHECK-LABEL: @vssran_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vssran_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssran_b_h(_1, _2); +} +// CHECK-LABEL: @vssran_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vssran_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssran_h_w(_1, _2); +} +// CHECK-LABEL: @vssran_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vssran_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssran_w_d(_1, _2); +} +// CHECK-LABEL: @vssran_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vssran_bu_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vssran_bu_h(_1, _2); +} +// CHECK-LABEL: @vssran_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vssran_hu_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vssran_hu_w(_1, _2); +} +// CHECK-LABEL: @vssran_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vssran_wu_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vssran_wu_d(_1, _2); +} +// CHECK-LABEL: @vsrarn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrarn_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrarn_b_h(_1, _2); +} +// CHECK-LABEL: @vsrarn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrarn_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrarn_h_w(_1, _2); +} +// CHECK-LABEL: @vsrarn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrarn_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrarn_w_d(_1, _2); +} +// CHECK-LABEL: @vssrarn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vssrarn_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrarn_b_h(_1, _2); +} +// CHECK-LABEL: @vssrarn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vssrarn_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrarn_h_w(_1, _2); +} +// CHECK-LABEL: @vssrarn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vssrarn_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrarn_w_d(_1, _2); +} +// CHECK-LABEL: @vssrarn_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vssrarn_bu_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vssrarn_bu_h(_1, _2); +} +// CHECK-LABEL: @vssrarn_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vssrarn_hu_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vssrarn_hu_w(_1, _2); +} +// CHECK-LABEL: @vssrarn_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vssrarn_wu_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vssrarn_wu_d(_1, _2); +} +// CHECK-LABEL: @vsrln_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrln_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrln_b_h(_1, _2); +} +// CHECK-LABEL: @vsrln_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrln_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrln_h_w(_1, _2); +} +// CHECK-LABEL: @vsrln_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrln_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrln_w_d(_1, _2); +} +// CHECK-LABEL: @vssrln_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vssrln_bu_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vssrln_bu_h(_1, _2); +} +// CHECK-LABEL: @vssrln_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vssrln_hu_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vssrln_hu_w(_1, _2); +} +// CHECK-LABEL: @vssrln_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vssrln_wu_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vssrln_wu_d(_1, _2); +} +// CHECK-LABEL: @vsrlrn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrlrn_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrlrn_b_h(_1, _2); +} +// CHECK-LABEL: @vsrlrn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrlrn_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrlrn_h_w(_1, _2); +} +// CHECK-LABEL: @vsrlrn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrlrn_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrlrn_w_d(_1, _2); +} +// CHECK-LABEL: @vssrlrn_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vssrlrn_bu_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vssrlrn_bu_h(_1, _2); +} +// CHECK-LABEL: @vssrlrn_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vssrlrn_hu_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vssrlrn_hu_w(_1, _2); +} +// CHECK-LABEL: @vssrlrn_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vssrlrn_wu_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vssrlrn_wu_d(_1, _2); +} +// CHECK-LABEL: @vfrstpi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vfrstpi_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vfrstpi_b(_1, _2, 1); +} +// CHECK-LABEL: @vfrstpi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vfrstpi_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vfrstpi_h(_1, _2, 1); +} +// CHECK-LABEL: @vfrstp_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v16i8 vfrstp_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __builtin_lsx_vfrstp_b(_1, _2, _3); +} +// CHECK-LABEL: @vfrstp_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8i16 vfrstp_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __builtin_lsx_vfrstp_h(_1, _2, _3); +} +// CHECK-LABEL: @vshuf4i_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vshuf4i_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vshuf4i_d(_1, _2, 1); +} +// CHECK-LABEL: @vbsrl_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vbsrl_v(v16i8 _1) { return __builtin_lsx_vbsrl_v(_1, 1); } +// CHECK-LABEL: @vbsll_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vbsll_v(v16i8 _1) { return __builtin_lsx_vbsll_v(_1, 1); } +// CHECK-LABEL: @vextrins_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vextrins_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vextrins_b(_1, _2, 1); +} +// CHECK-LABEL: @vextrins_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vextrins_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vextrins_h(_1, _2, 1); +} +// CHECK-LABEL: @vextrins_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vextrins_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vextrins_w(_1, _2, 1); +} +// CHECK-LABEL: @vextrins_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vextrins_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vextrins_d(_1, _2, 1); +} +// CHECK-LABEL: @vmskltz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vmskltz_b(v16i8 _1) { return __builtin_lsx_vmskltz_b(_1); } +// CHECK-LABEL: @vmskltz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vmskltz_h(v8i16 _1) { return __builtin_lsx_vmskltz_h(_1); } +// CHECK-LABEL: @vmskltz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vmskltz_w(v4i32 _1) { return __builtin_lsx_vmskltz_w(_1); } +// CHECK-LABEL: @vmskltz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vmskltz_d(v2i64 _1) { return __builtin_lsx_vmskltz_d(_1); } +// CHECK-LABEL: @vsigncov_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsigncov_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsigncov_b(_1, _2); +} +// CHECK-LABEL: @vsigncov_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsigncov_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsigncov_h(_1, _2); +} +// CHECK-LABEL: @vsigncov_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsigncov_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsigncov_w(_1, _2); +} +// CHECK-LABEL: @vsigncov_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsigncov_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsigncov_d(_1, _2); +} +// CHECK-LABEL: @vfmadd_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4f32 vfmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __builtin_lsx_vfmadd_s(_1, _2, _3); +} +// CHECK-LABEL: @vfmadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2f64 vfmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __builtin_lsx_vfmadd_d(_1, _2, _3); +} +// CHECK-LABEL: @vfmsub_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4f32 vfmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __builtin_lsx_vfmsub_s(_1, _2, _3); +} +// CHECK-LABEL: @vfmsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2f64 vfmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __builtin_lsx_vfmsub_d(_1, _2, _3); +} +// CHECK-LABEL: @vfnmadd_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4f32 vfnmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __builtin_lsx_vfnmadd_s(_1, _2, _3); +} +// CHECK-LABEL: @vfnmadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2f64 vfnmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __builtin_lsx_vfnmadd_d(_1, _2, _3); +} +// CHECK-LABEL: @vfnmsub_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4f32 vfnmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __builtin_lsx_vfnmsub_s(_1, _2, _3); +} +// CHECK-LABEL: @vfnmsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2f64 vfnmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __builtin_lsx_vfnmsub_d(_1, _2, _3); +} +// CHECK-LABEL: @vftintrne_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vftintrne_w_s(v4f32 _1) { return __builtin_lsx_vftintrne_w_s(_1); } +// CHECK-LABEL: @vftintrne_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrne_l_d(v2f64 _1) { return __builtin_lsx_vftintrne_l_d(_1); } +// CHECK-LABEL: @vftintrp_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vftintrp_w_s(v4f32 _1) { return __builtin_lsx_vftintrp_w_s(_1); } +// CHECK-LABEL: @vftintrp_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrp_l_d(v2f64 _1) { return __builtin_lsx_vftintrp_l_d(_1); } +// CHECK-LABEL: @vftintrm_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vftintrm_w_s(v4f32 _1) { return __builtin_lsx_vftintrm_w_s(_1); } +// CHECK-LABEL: @vftintrm_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrm_l_d(v2f64 _1) { return __builtin_lsx_vftintrm_l_d(_1); } +// CHECK-LABEL: @vftint_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vftint_w_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vftint_w_d(_1, _2); +} +// CHECK-LABEL: @vffint_s_l( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vffint_s_l(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vffint_s_l(_1, _2); +} +// CHECK-LABEL: @vftintrz_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vftintrz_w_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vftintrz_w_d(_1, _2); +} +// CHECK-LABEL: @vftintrp_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vftintrp_w_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vftintrp_w_d(_1, _2); +} +// CHECK-LABEL: @vftintrm_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vftintrm_w_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vftintrm_w_d(_1, _2); +} +// CHECK-LABEL: @vftintrne_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vftintrne_w_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vftintrne_w_d(_1, _2); +} +// CHECK-LABEL: @vftintl_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintl_l_s(v4f32 _1) { return __builtin_lsx_vftintl_l_s(_1); } +// CHECK-LABEL: @vftinth_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftinth_l_s(v4f32 _1) { return __builtin_lsx_vftinth_l_s(_1); } +// CHECK-LABEL: @vffinth_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vffinth_d_w(v4i32 _1) { return __builtin_lsx_vffinth_d_w(_1); } +// CHECK-LABEL: @vffintl_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vffintl_d_w(v4i32 _1) { return __builtin_lsx_vffintl_d_w(_1); } +// CHECK-LABEL: @vftintrzl_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrzl_l_s(v4f32 _1) { return __builtin_lsx_vftintrzl_l_s(_1); } +// CHECK-LABEL: @vftintrzh_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrzh_l_s(v4f32 _1) { return __builtin_lsx_vftintrzh_l_s(_1); } +// CHECK-LABEL: @vftintrpl_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrpl_l_s(v4f32 _1) { return __builtin_lsx_vftintrpl_l_s(_1); } +// CHECK-LABEL: @vftintrph_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrph_l_s(v4f32 _1) { return __builtin_lsx_vftintrph_l_s(_1); } +// CHECK-LABEL: @vftintrml_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrml_l_s(v4f32 _1) { return __builtin_lsx_vftintrml_l_s(_1); } +// CHECK-LABEL: @vftintrmh_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrmh_l_s(v4f32 _1) { return __builtin_lsx_vftintrmh_l_s(_1); } +// CHECK-LABEL: @vftintrnel_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrnel_l_s(v4f32 _1) { + return __builtin_lsx_vftintrnel_l_s(_1); +} +// CHECK-LABEL: @vftintrneh_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrneh_l_s(v4f32 _1) { + return __builtin_lsx_vftintrneh_l_s(_1); +} +// CHECK-LABEL: @vfrintrne_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vfrintrne_s(v4f32 _1) { return __builtin_lsx_vfrintrne_s(_1); } +// CHECK-LABEL: @vfrintrne_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vfrintrne_d(v2f64 _1) { return __builtin_lsx_vfrintrne_d(_1); } +// CHECK-LABEL: @vfrintrz_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vfrintrz_s(v4f32 _1) { return __builtin_lsx_vfrintrz_s(_1); } +// CHECK-LABEL: @vfrintrz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vfrintrz_d(v2f64 _1) { return __builtin_lsx_vfrintrz_d(_1); } +// CHECK-LABEL: @vfrintrp_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vfrintrp_s(v4f32 _1) { return __builtin_lsx_vfrintrp_s(_1); } +// CHECK-LABEL: @vfrintrp_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vfrintrp_d(v2f64 _1) { return __builtin_lsx_vfrintrp_d(_1); } +// CHECK-LABEL: @vfrintrm_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vfrintrm_s(v4f32 _1) { return __builtin_lsx_vfrintrm_s(_1); } +// CHECK-LABEL: @vfrintrm_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vfrintrm_d(v2f64 _1) { return __builtin_lsx_vfrintrm_d(_1); } +// CHECK-LABEL: @vstelm_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i32 1, i32 1) +// CHECK-NEXT: ret void +// +void vstelm_b(v16i8 _1, void *_2) { + return __builtin_lsx_vstelm_b(_1, _2, 1, 1); +} +// CHECK-LABEL: @vstelm_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> [[TMP0]], ptr [[_2:%.*]], i32 2, i32 1) +// CHECK-NEXT: ret void +// +void vstelm_h(v8i16 _1, void *_2) { + return __builtin_lsx_vstelm_h(_1, _2, 2, 1); +} +// CHECK-LABEL: @vstelm_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> [[TMP0]], ptr [[_2:%.*]], i32 4, i32 1) +// CHECK-NEXT: ret void +// +void vstelm_w(v4i32 _1, void *_2) { + return __builtin_lsx_vstelm_w(_1, _2, 4, 1); +} +// CHECK-LABEL: @vstelm_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> [[TMP0]], ptr [[_2:%.*]], i32 8, i32 1) +// CHECK-NEXT: ret void +// +void vstelm_d(v2i64 _1, void *_2) { + return __builtin_lsx_vstelm_d(_1, _2, 8, 1); +} +// CHECK-LABEL: @vaddwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwev_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vaddwev_d_w(_1, _2); +} +// CHECK-LABEL: @vaddwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vaddwev_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vaddwev_w_h(_1, _2); +} +// CHECK-LABEL: @vaddwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vaddwev_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vaddwev_h_b(_1, _2); +} +// CHECK-LABEL: @vaddwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwod_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vaddwod_d_w(_1, _2); +} +// CHECK-LABEL: @vaddwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vaddwod_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vaddwod_w_h(_1, _2); +} +// CHECK-LABEL: @vaddwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vaddwod_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vaddwod_h_b(_1, _2); +} +// CHECK-LABEL: @vaddwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwev_d_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vaddwev_d_wu(_1, _2); +} +// CHECK-LABEL: @vaddwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vaddwev_w_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vaddwev_w_hu(_1, _2); +} +// CHECK-LABEL: @vaddwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vaddwev_h_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vaddwev_h_bu(_1, _2); +} +// CHECK-LABEL: @vaddwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwod_d_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vaddwod_d_wu(_1, _2); +} +// CHECK-LABEL: @vaddwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vaddwod_w_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vaddwod_w_hu(_1, _2); +} +// CHECK-LABEL: @vaddwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vaddwod_h_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vaddwod_h_bu(_1, _2); +} +// CHECK-LABEL: @vaddwev_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwev_d_wu_w(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vaddwev_d_wu_w(_1, _2); +} +// CHECK-LABEL: @vaddwev_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vaddwev_w_hu_h(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vaddwev_w_hu_h(_1, _2); +} +// CHECK-LABEL: @vaddwev_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vaddwev_h_bu_b(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vaddwev_h_bu_b(_1, _2); +} +// CHECK-LABEL: @vaddwod_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwod_d_wu_w(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vaddwod_d_wu_w(_1, _2); +} +// CHECK-LABEL: @vaddwod_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vaddwod_w_hu_h(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vaddwod_w_hu_h(_1, _2); +} +// CHECK-LABEL: @vaddwod_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vaddwod_h_bu_b(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vaddwod_h_bu_b(_1, _2); +} +// CHECK-LABEL: @vsubwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsubwev_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsubwev_d_w(_1, _2); +} +// CHECK-LABEL: @vsubwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsubwev_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsubwev_w_h(_1, _2); +} +// CHECK-LABEL: @vsubwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsubwev_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsubwev_h_b(_1, _2); +} +// CHECK-LABEL: @vsubwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsubwod_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsubwod_d_w(_1, _2); +} +// CHECK-LABEL: @vsubwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsubwod_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsubwod_w_h(_1, _2); +} +// CHECK-LABEL: @vsubwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsubwod_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsubwod_h_b(_1, _2); +} +// CHECK-LABEL: @vsubwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsubwev_d_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vsubwev_d_wu(_1, _2); +} +// CHECK-LABEL: @vsubwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsubwev_w_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vsubwev_w_hu(_1, _2); +} +// CHECK-LABEL: @vsubwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsubwev_h_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vsubwev_h_bu(_1, _2); +} +// CHECK-LABEL: @vsubwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsubwod_d_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vsubwod_d_wu(_1, _2); +} +// CHECK-LABEL: @vsubwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsubwod_w_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vsubwod_w_hu(_1, _2); +} +// CHECK-LABEL: @vsubwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsubwod_h_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vsubwod_h_bu(_1, _2); +} +// CHECK-LABEL: @vaddwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwev_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vaddwev_q_d(_1, _2); +} +// CHECK-LABEL: @vaddwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwod_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vaddwod_q_d(_1, _2); +} +// CHECK-LABEL: @vaddwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwev_q_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vaddwev_q_du(_1, _2); +} +// CHECK-LABEL: @vaddwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwod_q_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vaddwod_q_du(_1, _2); +} +// CHECK-LABEL: @vsubwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsubwev_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsubwev_q_d(_1, _2); +} +// CHECK-LABEL: @vsubwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsubwod_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsubwod_q_d(_1, _2); +} +// CHECK-LABEL: @vsubwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsubwev_q_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vsubwev_q_du(_1, _2); +} +// CHECK-LABEL: @vsubwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsubwod_q_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vsubwod_q_du(_1, _2); +} +// CHECK-LABEL: @vaddwev_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwev_q_du_d(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vaddwev_q_du_d(_1, _2); +} +// CHECK-LABEL: @vaddwod_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwod_q_du_d(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vaddwod_q_du_d(_1, _2); +} +// CHECK-LABEL: @vmulwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwev_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vmulwev_d_w(_1, _2); +} +// CHECK-LABEL: @vmulwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmulwev_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vmulwev_w_h(_1, _2); +} +// CHECK-LABEL: @vmulwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmulwev_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vmulwev_h_b(_1, _2); +} +// CHECK-LABEL: @vmulwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwod_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vmulwod_d_w(_1, _2); +} +// CHECK-LABEL: @vmulwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmulwod_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vmulwod_w_h(_1, _2); +} +// CHECK-LABEL: @vmulwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmulwod_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vmulwod_h_b(_1, _2); +} +// CHECK-LABEL: @vmulwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwev_d_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vmulwev_d_wu(_1, _2); +} +// CHECK-LABEL: @vmulwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmulwev_w_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vmulwev_w_hu(_1, _2); +} +// CHECK-LABEL: @vmulwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmulwev_h_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vmulwev_h_bu(_1, _2); +} +// CHECK-LABEL: @vmulwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwod_d_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vmulwod_d_wu(_1, _2); +} +// CHECK-LABEL: @vmulwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmulwod_w_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vmulwod_w_hu(_1, _2); +} +// CHECK-LABEL: @vmulwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmulwod_h_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vmulwod_h_bu(_1, _2); +} +// CHECK-LABEL: @vmulwev_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwev_d_wu_w(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vmulwev_d_wu_w(_1, _2); +} +// CHECK-LABEL: @vmulwev_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmulwev_w_hu_h(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vmulwev_w_hu_h(_1, _2); +} +// CHECK-LABEL: @vmulwev_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmulwev_h_bu_b(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vmulwev_h_bu_b(_1, _2); +} +// CHECK-LABEL: @vmulwod_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwod_d_wu_w(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vmulwod_d_wu_w(_1, _2); +} +// CHECK-LABEL: @vmulwod_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmulwod_w_hu_h(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vmulwod_w_hu_h(_1, _2); +} +// CHECK-LABEL: @vmulwod_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmulwod_h_bu_b(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vmulwod_h_bu_b(_1, _2); +} +// CHECK-LABEL: @vmulwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwev_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vmulwev_q_d(_1, _2); +} +// CHECK-LABEL: @vmulwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwod_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vmulwod_q_d(_1, _2); +} +// CHECK-LABEL: @vmulwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwev_q_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vmulwev_q_du(_1, _2); +} +// CHECK-LABEL: @vmulwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwod_q_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vmulwod_q_du(_1, _2); +} +// CHECK-LABEL: @vmulwev_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwev_q_du_d(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vmulwev_q_du_d(_1, _2); +} +// CHECK-LABEL: @vmulwod_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwod_q_du_d(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vmulwod_q_du_d(_1, _2); +} +// CHECK-LABEL: @vhaddw_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vhaddw_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vhaddw_q_d(_1, _2); +} +// CHECK-LABEL: @vhaddw_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vhaddw_qu_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vhaddw_qu_du(_1, _2); +} +// CHECK-LABEL: @vhsubw_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vhsubw_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vhsubw_q_d(_1, _2); +} +// CHECK-LABEL: @vhsubw_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vhsubw_qu_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vhsubw_qu_du(_1, _2); +} +// CHECK-LABEL: @vmaddwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmaddwev_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { + return __builtin_lsx_vmaddwev_d_w(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4i32 vmaddwev_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { + return __builtin_lsx_vmaddwev_w_h(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8i16 vmaddwev_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { + return __builtin_lsx_vmaddwev_h_b(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2u64 vmaddwev_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { + return __builtin_lsx_vmaddwev_d_wu(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4u32 vmaddwev_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { + return __builtin_lsx_vmaddwev_w_hu(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8u16 vmaddwev_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { + return __builtin_lsx_vmaddwev_h_bu(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmaddwod_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { + return __builtin_lsx_vmaddwod_d_w(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4i32 vmaddwod_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { + return __builtin_lsx_vmaddwod_w_h(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8i16 vmaddwod_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { + return __builtin_lsx_vmaddwod_h_b(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2u64 vmaddwod_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { + return __builtin_lsx_vmaddwod_d_wu(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4u32 vmaddwod_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { + return __builtin_lsx_vmaddwod_w_hu(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8u16 vmaddwod_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { + return __builtin_lsx_vmaddwod_h_bu(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmaddwev_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { + return __builtin_lsx_vmaddwev_d_wu_w(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4i32 vmaddwev_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { + return __builtin_lsx_vmaddwev_w_hu_h(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8i16 vmaddwev_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { + return __builtin_lsx_vmaddwev_h_bu_b(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmaddwod_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { + return __builtin_lsx_vmaddwod_d_wu_w(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4i32 vmaddwod_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { + return __builtin_lsx_vmaddwod_w_hu_h(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8i16 vmaddwod_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { + return __builtin_lsx_vmaddwod_h_bu_b(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmaddwev_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __builtin_lsx_vmaddwev_q_d(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmaddwod_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __builtin_lsx_vmaddwod_q_d(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2u64 vmaddwev_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { + return __builtin_lsx_vmaddwev_q_du(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2u64 vmaddwod_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { + return __builtin_lsx_vmaddwod_q_du(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmaddwev_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { + return __builtin_lsx_vmaddwev_q_du_d(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmaddwod_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { + return __builtin_lsx_vmaddwod_q_du_d(_1, _2, _3); +} +// CHECK-LABEL: @vrotr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vrotr_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vrotr_b(_1, _2); +} +// CHECK-LABEL: @vrotr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vrotr_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vrotr_h(_1, _2); +} +// CHECK-LABEL: @vrotr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vrotr_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vrotr_w(_1, _2); +} +// CHECK-LABEL: @vrotr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vrotr_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vrotr_d(_1, _2); +} +// CHECK-LABEL: @vadd_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vadd_q(v2i64 _1, v2i64 _2) { return __builtin_lsx_vadd_q(_1, _2); } +// CHECK-LABEL: @vsub_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsub_q(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsub_q(_1, _2); } +// CHECK-LABEL: @vldrepl_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr [[_1:%.*]], i32 1) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v16i8 vldrepl_b(void *_1) { return __builtin_lsx_vldrepl_b(_1, 1); } +// CHECK-LABEL: @vldrepl_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(ptr [[_1:%.*]], i32 2) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v8i16 vldrepl_h(void *_1) { return __builtin_lsx_vldrepl_h(_1, 2); } +// CHECK-LABEL: @vldrepl_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(ptr [[_1:%.*]], i32 4) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v4i32 vldrepl_w(void *_1) { return __builtin_lsx_vldrepl_w(_1, 4); } +// CHECK-LABEL: @vldrepl_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(ptr [[_1:%.*]], i32 8) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v2i64 vldrepl_d(void *_1) { return __builtin_lsx_vldrepl_d(_1, 8); } +// CHECK-LABEL: @vmskgez_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vmskgez_b(v16i8 _1) { return __builtin_lsx_vmskgez_b(_1); } +// CHECK-LABEL: @vmsknz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vmsknz_b(v16i8 _1) { return __builtin_lsx_vmsknz_b(_1); } +// CHECK-LABEL: @vexth_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vexth_h_b(v16i8 _1) { return __builtin_lsx_vexth_h_b(_1); } +// CHECK-LABEL: @vexth_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vexth_w_h(v8i16 _1) { return __builtin_lsx_vexth_w_h(_1); } +// CHECK-LABEL: @vexth_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vexth_d_w(v4i32 _1) { return __builtin_lsx_vexth_d_w(_1); } +// CHECK-LABEL: @vexth_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vexth_q_d(v2i64 _1) { return __builtin_lsx_vexth_q_d(_1); } +// CHECK-LABEL: @vexth_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8u16 vexth_hu_bu(v16u8 _1) { return __builtin_lsx_vexth_hu_bu(_1); } +// CHECK-LABEL: @vexth_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vexth_wu_hu(v8u16 _1) { return __builtin_lsx_vexth_wu_hu(_1); } +// CHECK-LABEL: @vexth_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vexth_du_wu(v4u32 _1) { return __builtin_lsx_vexth_du_wu(_1); } +// CHECK-LABEL: @vexth_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vexth_qu_du(v2u64 _1) { return __builtin_lsx_vexth_qu_du(_1); } +// CHECK-LABEL: @vrotri_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vrotri_b(v16i8 _1) { return __builtin_lsx_vrotri_b(_1, 1); } +// CHECK-LABEL: @vrotri_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vrotri_h(v8i16 _1) { return __builtin_lsx_vrotri_h(_1, 1); } +// CHECK-LABEL: @vrotri_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vrotri_w(v4i32 _1) { return __builtin_lsx_vrotri_w(_1, 1); } +// CHECK-LABEL: @vrotri_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vrotri_d(v2i64 _1) { return __builtin_lsx_vrotri_d(_1, 1); } +// CHECK-LABEL: @vextl_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vextl_q_d(v2i64 _1) { return __builtin_lsx_vextl_q_d(_1); } +// CHECK-LABEL: @vsrlni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsrlni_b_h(_1, _2, 1); +} +// CHECK-LABEL: @vsrlni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrlni_h_w(_1, _2, 1); +} +// CHECK-LABEL: @vsrlni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrlni_w_d(_1, _2, 1); +} +// CHECK-LABEL: @vsrlni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrlni_d_q(_1, _2, 1); +} +// CHECK-LABEL: @vsrlrni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsrlrni_b_h(_1, _2, 1); +} +// CHECK-LABEL: @vsrlrni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrlrni_h_w(_1, _2, 1); +} +// CHECK-LABEL: @vsrlrni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrlrni_w_d(_1, _2, 1); +} +// CHECK-LABEL: @vsrlrni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrlrni_d_q(_1, _2, 1); +} +// CHECK-LABEL: @vssrlni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vssrlni_b_h(_1, _2, 1); +} +// CHECK-LABEL: @vssrlni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrlni_h_w(_1, _2, 1); +} +// CHECK-LABEL: @vssrlni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrlni_w_d(_1, _2, 1); +} +// CHECK-LABEL: @vssrlni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrlni_d_q(_1, _2, 1); +} +// CHECK-LABEL: @vssrlni_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vssrlni_bu_h(_1, _2, 1); +} +// CHECK-LABEL: @vssrlni_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vssrlni_hu_w(_1, _2, 1); +} +// CHECK-LABEL: @vssrlni_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vssrlni_wu_d(_1, _2, 1); +} +// CHECK-LABEL: @vssrlni_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vssrlni_du_q(_1, _2, 1); +} +// CHECK-LABEL: @vssrlrni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vssrlrni_b_h(_1, _2, 1); +} +// CHECK-LABEL: @vssrlrni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrlrni_h_w(_1, _2, 1); +} +// CHECK-LABEL: @vssrlrni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrlrni_w_d(_1, _2, 1); +} +// CHECK-LABEL: @vssrlrni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrlrni_d_q(_1, _2, 1); +} +// CHECK-LABEL: @vssrlrni_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vssrlrni_bu_h(_1, _2, 1); +} +// CHECK-LABEL: @vssrlrni_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vssrlrni_hu_w(_1, _2, 1); +} +// CHECK-LABEL: @vssrlrni_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vssrlrni_wu_d(_1, _2, 1); +} +// CHECK-LABEL: @vssrlrni_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vssrlrni_du_q(_1, _2, 1); +} +// CHECK-LABEL: @vsrani_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrani_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsrani_b_h(_1, _2, 1); +} +// CHECK-LABEL: @vsrani_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrani_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrani_h_w(_1, _2, 1); +} +// CHECK-LABEL: @vsrani_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrani_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrani_w_d(_1, _2, 1); +} +// CHECK-LABEL: @vsrani_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsrani_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrani_d_q(_1, _2, 1); +} +// CHECK-LABEL: @vsrarni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsrarni_b_h(_1, _2, 1); +} +// CHECK-LABEL: @vsrarni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrarni_h_w(_1, _2, 1); +} +// CHECK-LABEL: @vsrarni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrarni_w_d(_1, _2, 1); +} +// CHECK-LABEL: @vsrarni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrarni_d_q(_1, _2, 1); +} +// CHECK-LABEL: @vssrani_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vssrani_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vssrani_b_h(_1, _2, 1); +} +// CHECK-LABEL: @vssrani_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vssrani_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrani_h_w(_1, _2, 1); +} +// CHECK-LABEL: @vssrani_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vssrani_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrani_w_d(_1, _2, 1); +} +// CHECK-LABEL: @vssrani_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vssrani_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrani_d_q(_1, _2, 1); +} +// CHECK-LABEL: @vssrani_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vssrani_bu_h(_1, _2, 1); +} +// CHECK-LABEL: @vssrani_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vssrani_hu_w(_1, _2, 1); +} +// CHECK-LABEL: @vssrani_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vssrani_wu_d(_1, _2, 1); +} +// CHECK-LABEL: @vssrani_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vssrani_du_q(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vssrani_du_q(_1, _2, 1); +} +// CHECK-LABEL: @vssrarni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vssrarni_b_h(_1, _2, 1); +} +// CHECK-LABEL: @vssrarni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrarni_h_w(_1, _2, 1); +} +// CHECK-LABEL: @vssrarni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrarni_w_d(_1, _2, 1); +} +// CHECK-LABEL: @vssrarni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrarni_d_q(_1, _2, 1); +} +// CHECK-LABEL: @vssrarni_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vssrarni_bu_h(_1, _2, 1); +} +// CHECK-LABEL: @vssrarni_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vssrarni_hu_w(_1, _2, 1); +} +// CHECK-LABEL: @vssrarni_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vssrarni_wu_d(_1, _2, 1); +} +// CHECK-LABEL: @vssrarni_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vssrarni_du_q(_1, _2, 1); +} +// CHECK-LABEL: @vpermi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vpermi_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vpermi_w(_1, _2, 1); +} +// CHECK-LABEL: @vld( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vld(ptr [[_1:%.*]], i32 1) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v16i8 vld(void *_1) { return __builtin_lsx_vld(_1, 1); } +// CHECK-LABEL: @vst( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vst(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i32 1) +// CHECK-NEXT: ret void +// +void vst(v16i8 _1, void *_2) { return __builtin_lsx_vst(_1, _2, 1); } +// CHECK-LABEL: @vssrlrn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vssrlrn_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrlrn_b_h(_1, _2); +} +// CHECK-LABEL: @vssrlrn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vssrlrn_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrlrn_h_w(_1, _2); +} +// CHECK-LABEL: @vssrlrn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vssrlrn_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrlrn_w_d(_1, _2); +} +// CHECK-LABEL: @vssrln_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vssrln_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrln_b_h(_1, _2); +} +// CHECK-LABEL: @vssrln_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vssrln_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrln_h_w(_1, _2); +} +// CHECK-LABEL: @vssrln_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vssrln_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrln_w_d(_1, _2); +} +// CHECK-LABEL: @vorn_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vorn_v(v16i8 _1, v16i8 _2) { return __builtin_lsx_vorn_v(_1, _2); } +// CHECK-LABEL: @vldi( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldi(i32 1) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v2i64 vldi() { return __builtin_lsx_vldi(1); } +// CHECK-LABEL: @vshuf_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v16i8 vshuf_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __builtin_lsx_vshuf_b(_1, _2, _3); +} +// CHECK-LABEL: @vldx( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldx(ptr [[_1:%.*]], i64 1) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v16i8 vldx(void *_1) { return __builtin_lsx_vldx(_1, 1); } +// CHECK-LABEL: @vstx( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstx(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i64 1) +// CHECK-NEXT: ret void +// +void vstx(v16i8 _1, void *_2) { return __builtin_lsx_vstx(_1, _2, 1); } +// CHECK-LABEL: @vextl_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vextl_qu_du(v2u64 _1) { return __builtin_lsx_vextl_qu_du(_1); } +// CHECK-LABEL: @bnz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bnz_b(v16u8 _1) { return __builtin_lsx_bnz_b(_1); } +// CHECK-LABEL: @bnz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bnz_d(v2u64 _1) { return __builtin_lsx_bnz_d(_1); } +// CHECK-LABEL: @bnz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bnz_h(v8u16 _1) { return __builtin_lsx_bnz_h(_1); } +// CHECK-LABEL: @bnz_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bnz_v(v16u8 _1) { return __builtin_lsx_bnz_v(_1); } +// CHECK-LABEL: @bnz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bnz_w(v4u32 _1) { return __builtin_lsx_bnz_w(_1); } +// CHECK-LABEL: @bz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bz_b(v16u8 _1) { return __builtin_lsx_bz_b(_1); } +// CHECK-LABEL: @bz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bz_d(v2u64 _1) { return __builtin_lsx_bz_d(_1); } +// CHECK-LABEL: @bz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bz_h(v8u16 _1) { return __builtin_lsx_bz_h(_1); } +// CHECK-LABEL: @bz_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bz_v(v16u8 _1) { return __builtin_lsx_bz_v(_1); } +// CHECK-LABEL: @bz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bz_w(v4u32 _1) { return __builtin_lsx_bz_w(_1); } +// CHECK-LABEL: @vfcmp_caf_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_caf_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_caf_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_caf_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_caf_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_caf_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_ceq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_ceq_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_ceq_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_ceq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_ceq_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_ceq_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_cle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_cle_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cle_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_cle_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_cle_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cle_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_clt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_clt_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_clt_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_clt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_clt_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_clt_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_cne_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_cne_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cne_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_cne_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_cne_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cne_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_cor_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_cor_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cor_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_cor_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_cor_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cor_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_cueq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_cueq_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cueq_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_cueq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_cueq_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cueq_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_cule_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_cule_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cule_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_cule_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_cule_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cule_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_cult_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_cult_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cult_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_cult_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_cult_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cult_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_cun_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_cun_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cun_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_cune_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_cune_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cune_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_cune_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_cune_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cune_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_cun_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_cun_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cun_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_saf_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_saf_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_saf_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_saf_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_saf_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_saf_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_seq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_seq_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_seq_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_seq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_seq_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_seq_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_sle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_sle_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sle_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_sle_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_sle_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sle_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_slt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_slt_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_slt_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_slt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_slt_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_slt_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_sne_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_sne_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sne_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_sne_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_sne_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sne_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_sor_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_sor_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sor_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_sor_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_sor_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sor_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_sueq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_sueq_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sueq_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_sueq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_sueq_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sueq_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_sule_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_sule_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sule_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_sule_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_sule_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sule_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_sult_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_sult_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sult_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_sult_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_sult_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sult_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_sun_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_sun_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sun_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_sune_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_sune_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sune_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_sune_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_sune_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sune_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_sun_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_sun_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sun_s(_1, _2); +} +// CHECK-LABEL: @vrepli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 1) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v16i8 vrepli_b() { return __builtin_lsx_vrepli_b(1); } +// CHECK-LABEL: @vrepli_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 1) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v2i64 vrepli_d() { return __builtin_lsx_vrepli_d(1); } +// CHECK-LABEL: @vrepli_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 1) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v8i16 vrepli_h() { return __builtin_lsx_vrepli_h(1); } +// CHECK-LABEL: @vrepli_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 1) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v4i32 vrepli_w() { return __builtin_lsx_vrepli_w(1); } diff --git a/clang/test/Driver/loongarch-mlasx-error.c b/clang/test/Driver/loongarch-mlasx-error.c new file mode 100644 index 0000000000000000000000000000000000000000..e66f277f7c292f1e64c9e85a0701cfe9d2056974 --- /dev/null +++ b/clang/test/Driver/loongarch-mlasx-error.c @@ -0,0 +1,15 @@ +// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -msingle-float 2>&1 \ +// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s +// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -msoft-float 2>&1 \ +// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s +// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -mfpu=32 2>&1 \ +// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s +// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -mfpu=0 2>&1 \ +// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s +// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -mfpu=none 2>&1 \ +// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s +// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -mno-lsx 2>&1 \ +// RUN: FileCheck --check-prefix=ERROR_LASX_FPU128 %s + +// ERROR_LASX_FPU64: error: wrong fpu width; LASX depends on 64-bit FPU. +// ERROR_LASX_FPU128: error: invalid option combination; LASX depends on LSX. diff --git a/clang/test/Driver/loongarch-mlasx.c b/clang/test/Driver/loongarch-mlasx.c new file mode 100644 index 0000000000000000000000000000000000000000..0b934f125c9e462b5d7176367deb7ee90acd8b87 --- /dev/null +++ b/clang/test/Driver/loongarch-mlasx.c @@ -0,0 +1,37 @@ +/// Test -m[no-]lasx options. + +// RUN: %clang --target=loongarch64 -mlasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-LASX +// RUN: %clang --target=loongarch64 -mno-lasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-NOLASX +// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-NOLASX +// RUN: %clang --target=loongarch64 -mno-lasx -mlasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-LASX +// RUN: %clang --target=loongarch64 -mlsx -mlasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-LASX +// RUN: %clang --target=loongarch64 -mlasx -mlsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-LASX + +// RUN: %clang --target=loongarch64 -mlasx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-LASX +// RUN: %clang --target=loongarch64 -mno-lasx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-NOLASX +// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-NOLASX +// RUN: %clang --target=loongarch64 -mno-lasx -mlasx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-LASX +// RUN: %clang --target=loongarch64 -mlsx -mlasx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-LASX +// RUN: %clang --target=loongarch64 -mlasx -mlsx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-LASX + +// CC1-LASX: "-target-feature" "+lsx" "-target-feature" "+lasx" +// CC1-NOLASX: "-target-feature" "-lasx" + +// IR-LASX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+lasx{{(,.*)?}}" +// IR-NOLASX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-lasx{{(,.*)?}}" + +int foo(void){ + return 3; +} diff --git a/clang/test/Driver/loongarch-mlsx-error.c b/clang/test/Driver/loongarch-mlsx-error.c new file mode 100644 index 0000000000000000000000000000000000000000..bd6b8e2718bf6086d756f673aa5b347dac3ffeba --- /dev/null +++ b/clang/test/Driver/loongarch-mlsx-error.c @@ -0,0 +1,12 @@ +// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -msingle-float 2>&1 \ +// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s +// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -msoft-float 2>&1 \ +// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s +// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -mfpu=32 2>&1 \ +// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s +// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -mfpu=0 2>&1 \ +// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s +// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -mfpu=none 2>&1 \ +// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s + +// ERROR_LSX_FPU64: error: wrong fpu width; LSX depends on 64-bit FPU. diff --git a/clang/test/Driver/loongarch-mlsx.c b/clang/test/Driver/loongarch-mlsx.c new file mode 100644 index 0000000000000000000000000000000000000000..7d4307b078e1a06fb6a453ff685f6d598dbce970 --- /dev/null +++ b/clang/test/Driver/loongarch-mlsx.c @@ -0,0 +1,41 @@ +/// Test -m[no-]lsx options. + +// RUN: %clang --target=loongarch64 -mlsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-LSX +// RUN: %clang --target=loongarch64 -mno-lsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-NOLSX +// RUN: %clang --target=loongarch64 -mlsx -mno-lsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-NOLSX +// RUN: %clang --target=loongarch64 -mno-lsx -mlsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-LSX +// RUN: %clang --target=loongarch64 -mlsx -mno-lasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-LSX +// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-LSX +// RUN: %clang --target=loongarch64 -mno-lsx -mno-lasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-NOLSX + +// RUN: %clang --target=loongarch64 -mlsx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-LSX +// RUN: %clang --target=loongarch64 -mno-lsx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-NOLSX +// RUN: %clang --target=loongarch64 -mlsx -mno-lsx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-NOLSX +// RUN: %clang --target=loongarch64 -mno-lsx -mlsx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-LSX +// RUN: %clang --target=loongarch64 -mlsx -mno-lasx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-LSX +// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-LSX +// RUN: %clang --target=loongarch64 -mno-lsx -mno-lasx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-NOLSX + +// CC1-LSX: "-target-feature" "+lsx" +// CC1-NOLSX: "-target-feature" "-lsx" + +// IR-LSX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+lsx{{(,.*)?}}" +// IR-NOLSX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-lsx{{(,.*)?}}" + +int foo(void){ + return 3; +} diff --git a/clang/test/Preprocessor/init-loongarch.c b/clang/test/Preprocessor/init-loongarch.c index 4ef42a921ec0339cf465f7e5a5b057f898b285e7..e235a728302153a62617eae9c1eb17e87347fe34 100644 --- a/clang/test/Preprocessor/init-loongarch.c +++ b/clang/test/Preprocessor/init-loongarch.c @@ -807,3 +807,38 @@ // ARCH-TUNE: #define __loongarch_arch "[[ARCH]]" // ARCH-TUNE: #define __loongarch_tune "[[TUNE]]" + +// RUN: %clang --target=loongarch64 -mlsx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s +// RUN: %clang --target=loongarch64 -mno-lsx -mlsx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s +// RUN: %clang --target=loongarch64 -mlsx -mno-lasx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s +// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s +// MLSX-NOT: #define __loongarch_asx +// MLSX: #define __loongarch_sx 1 + +// RUN: %clang --target=loongarch64 -mlasx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s +// RUN: %clang --target=loongarch64 -mno-lasx -mlasx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s +// RUN: %clang --target=loongarch64 -mlsx -mlasx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s +// RUN: %clang --target=loongarch64 -mlasx -mlsx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s +// MLASX: #define __loongarch_asx 1 +// MLASX: #define __loongarch_sx 1 + +// RUN: %clang --target=loongarch64 -mno-lsx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s +// RUN: %clang --target=loongarch64 -mlsx -mno-lsx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s +// RUN: %clang --target=loongarch64 -mno-lsx -mno-lasx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s +// RUN: %clang --target=loongarch64 -mno-lasx -mno-lsx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s +// RUN: %clang --target=loongarch64 -mno-lasx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s +// MNO-LSX-NOT: #define __loongarch_asx +// MNO-LSX-NOT: #define __loongarch_sx diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td index 5edce3c529e1217514b67f6ef3f1d4a9d8a8fc61..685deaec7709bd7896a65fedcd121b6a3d410a45 100644 --- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td +++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td @@ -51,74 +51,1122 @@ defm int_loongarch_masked_cmpxchg : MaskedAtomicRMWFiveOpIntrinsics; //===----------------------------------------------------------------------===// // LoongArch BASE -def int_loongarch_break : Intrinsic<[], [llvm_i32_ty], [ImmArg>]>; -def int_loongarch_cacop_d : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], - [ImmArg>, ImmArg>]>; -def int_loongarch_cacop_w : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [ImmArg>, ImmArg>]>; -def int_loongarch_dbar : Intrinsic<[], [llvm_i32_ty], [ImmArg>]>; -def int_loongarch_ibar : Intrinsic<[], [llvm_i32_ty], [ImmArg>]>; -def int_loongarch_movfcsr2gr : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], - [ImmArg>]>; -def int_loongarch_movgr2fcsr : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], - [ImmArg>]>; -def int_loongarch_syscall : Intrinsic<[], [llvm_i32_ty], [ImmArg>]>; - -def int_loongarch_crc_w_b_w : Intrinsic<[llvm_i32_ty], - [llvm_i32_ty, llvm_i32_ty]>; -def int_loongarch_crc_w_h_w : Intrinsic<[llvm_i32_ty], - [llvm_i32_ty, llvm_i32_ty]>; -def int_loongarch_crc_w_w_w : Intrinsic<[llvm_i32_ty], - [llvm_i32_ty, llvm_i32_ty]>; -def int_loongarch_crc_w_d_w : Intrinsic<[llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty]>; - -def int_loongarch_crcc_w_b_w : Intrinsic<[llvm_i32_ty], - [llvm_i32_ty, llvm_i32_ty]>; -def int_loongarch_crcc_w_h_w : Intrinsic<[llvm_i32_ty], - [llvm_i32_ty, llvm_i32_ty]>; -def int_loongarch_crcc_w_w_w : Intrinsic<[llvm_i32_ty], - [llvm_i32_ty, llvm_i32_ty]>; -def int_loongarch_crcc_w_d_w : Intrinsic<[llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty]>; - -def int_loongarch_csrrd_w : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], - [ImmArg>]>; -def int_loongarch_csrrd_d : Intrinsic<[llvm_i64_ty], [llvm_i32_ty], - [ImmArg>]>; -def int_loongarch_csrwr_w : Intrinsic<[llvm_i32_ty], - [llvm_i32_ty, llvm_i32_ty], - [ImmArg>]>; -def int_loongarch_csrwr_d : Intrinsic<[llvm_i64_ty], - [llvm_i64_ty, llvm_i32_ty], - [ImmArg>]>; -def int_loongarch_csrxchg_w : Intrinsic<[llvm_i32_ty], - [llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty], - [ImmArg>]>; -def int_loongarch_csrxchg_d : Intrinsic<[llvm_i64_ty], - [llvm_i64_ty, llvm_i64_ty, - llvm_i32_ty], - [ImmArg>]>; - -def int_loongarch_iocsrrd_b : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>; -def int_loongarch_iocsrrd_h : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>; -def int_loongarch_iocsrrd_w : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>; -def int_loongarch_iocsrrd_d : Intrinsic<[llvm_i64_ty], [llvm_i32_ty]>; - -def int_loongarch_iocsrwr_b : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty]>; -def int_loongarch_iocsrwr_h : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty]>; -def int_loongarch_iocsrwr_w : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty]>; -def int_loongarch_iocsrwr_d : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty]>; - -def int_loongarch_cpucfg : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>; - -def int_loongarch_asrtle_d : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty]>; -def int_loongarch_asrtgt_d : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty]>; - -def int_loongarch_lddir_d : Intrinsic<[llvm_i64_ty], - [llvm_i64_ty, llvm_i64_ty], - [ImmArg>]>; -def int_loongarch_ldpte_d : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty], - [ImmArg>]>; +class BaseInt ret_types, list param_types, + list intr_properties = []> + : Intrinsic, + ClangBuiltin; + +def int_loongarch_break : BaseInt<[], [llvm_i32_ty], [ImmArg>]>; +def int_loongarch_cacop_d : BaseInt<[], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], + [ImmArg>, ImmArg>]>; +def int_loongarch_cacop_w : BaseInt<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [ImmArg>, ImmArg>]>; +def int_loongarch_dbar : BaseInt<[], [llvm_i32_ty], [ImmArg>]>; + +def int_loongarch_ibar : BaseInt<[], [llvm_i32_ty], [ImmArg>]>; +def int_loongarch_movfcsr2gr : BaseInt<[llvm_i32_ty], [llvm_i32_ty], + [ImmArg>]>; +def int_loongarch_movgr2fcsr : BaseInt<[], [llvm_i32_ty, llvm_i32_ty], + [ImmArg>]>; +def int_loongarch_syscall : BaseInt<[], [llvm_i32_ty], [ImmArg>]>; + +def int_loongarch_crc_w_b_w : BaseInt<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty]>; +def int_loongarch_crc_w_h_w : BaseInt<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty]>; +def int_loongarch_crc_w_w_w : BaseInt<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty]>; +def int_loongarch_crc_w_d_w : BaseInt<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty]>; + +def int_loongarch_crcc_w_b_w : BaseInt<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty]>; +def int_loongarch_crcc_w_h_w : BaseInt<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty]>; +def int_loongarch_crcc_w_w_w : BaseInt<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty]>; +def int_loongarch_crcc_w_d_w : BaseInt<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty]>; + +def int_loongarch_csrrd_w : BaseInt<[llvm_i32_ty], [llvm_i32_ty], + [ImmArg>]>; +def int_loongarch_csrrd_d : BaseInt<[llvm_i64_ty], [llvm_i32_ty], + [ImmArg>]>; +def int_loongarch_csrwr_w : BaseInt<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], + [ImmArg>]>; +def int_loongarch_csrwr_d : BaseInt<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty], + [ImmArg>]>; +def int_loongarch_csrxchg_w : BaseInt<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [ImmArg>]>; +def int_loongarch_csrxchg_d : BaseInt<[llvm_i64_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], + [ImmArg>]>; + +def int_loongarch_iocsrrd_b : BaseInt<[llvm_i32_ty], [llvm_i32_ty]>; +def int_loongarch_iocsrrd_h : BaseInt<[llvm_i32_ty], [llvm_i32_ty]>; +def int_loongarch_iocsrrd_w : BaseInt<[llvm_i32_ty], [llvm_i32_ty]>; +def int_loongarch_iocsrrd_d : BaseInt<[llvm_i64_ty], [llvm_i32_ty]>; + +def int_loongarch_iocsrwr_b : BaseInt<[], [llvm_i32_ty, llvm_i32_ty]>; +def int_loongarch_iocsrwr_h : BaseInt<[], [llvm_i32_ty, llvm_i32_ty]>; +def int_loongarch_iocsrwr_w : BaseInt<[], [llvm_i32_ty, llvm_i32_ty]>; +def int_loongarch_iocsrwr_d : BaseInt<[], [llvm_i64_ty, llvm_i32_ty]>; + +def int_loongarch_cpucfg : BaseInt<[llvm_i32_ty], [llvm_i32_ty]>; + +def int_loongarch_asrtle_d : BaseInt<[], [llvm_i64_ty, llvm_i64_ty]>; +def int_loongarch_asrtgt_d : BaseInt<[], [llvm_i64_ty, llvm_i64_ty]>; + +def int_loongarch_lddir_d : BaseInt<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], + [ImmArg>]>; +def int_loongarch_ldpte_d : BaseInt<[], [llvm_i64_ty, llvm_i64_ty], + [ImmArg>]>; +} // TargetPrefix = "loongarch" + +/// Vector intrinsic + +class VecInt ret_types, list param_types, + list intr_properties = []> + : Intrinsic, + ClangBuiltin; + +//===----------------------------------------------------------------------===// +// LSX + +let TargetPrefix = "loongarch" in { + +foreach inst = ["vadd_b", "vsub_b", + "vsadd_b", "vsadd_bu", "vssub_b", "vssub_bu", + "vavg_b", "vavg_bu", "vavgr_b", "vavgr_bu", + "vabsd_b", "vabsd_bu", "vadda_b", + "vmax_b", "vmax_bu", "vmin_b", "vmin_bu", + "vmul_b", "vmuh_b", "vmuh_bu", + "vdiv_b", "vdiv_bu", "vmod_b", "vmod_bu", "vsigncov_b", + "vand_v", "vor_v", "vxor_v", "vnor_v", "vandn_v", "vorn_v", + "vsll_b", "vsrl_b", "vsra_b", "vrotr_b", "vsrlr_b", "vsrar_b", + "vbitclr_b", "vbitset_b", "vbitrev_b", + "vseq_b", "vsle_b", "vsle_bu", "vslt_b", "vslt_bu", + "vpackev_b", "vpackod_b", "vpickev_b", "vpickod_b", + "vilvl_b", "vilvh_b"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + +foreach inst = ["vadd_h", "vsub_h", + "vsadd_h", "vsadd_hu", "vssub_h", "vssub_hu", + "vavg_h", "vavg_hu", "vavgr_h", "vavgr_hu", + "vabsd_h", "vabsd_hu", "vadda_h", + "vmax_h", "vmax_hu", "vmin_h", "vmin_hu", + "vmul_h", "vmuh_h", "vmuh_hu", + "vdiv_h", "vdiv_hu", "vmod_h", "vmod_hu", "vsigncov_h", + "vsll_h", "vsrl_h", "vsra_h", "vrotr_h", "vsrlr_h", "vsrar_h", + "vbitclr_h", "vbitset_h", "vbitrev_h", + "vseq_h", "vsle_h", "vsle_hu", "vslt_h", "vslt_hu", + "vpackev_h", "vpackod_h", "vpickev_h", "vpickod_h", + "vilvl_h", "vilvh_h"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], + [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + +foreach inst = ["vadd_w", "vsub_w", + "vsadd_w", "vsadd_wu", "vssub_w", "vssub_wu", + "vavg_w", "vavg_wu", "vavgr_w", "vavgr_wu", + "vabsd_w", "vabsd_wu", "vadda_w", + "vmax_w", "vmax_wu", "vmin_w", "vmin_wu", + "vmul_w", "vmuh_w", "vmuh_wu", + "vdiv_w", "vdiv_wu", "vmod_w", "vmod_wu", "vsigncov_w", + "vsll_w", "vsrl_w", "vsra_w", "vrotr_w", "vsrlr_w", "vsrar_w", + "vbitclr_w", "vbitset_w", "vbitrev_w", + "vseq_w", "vsle_w", "vsle_wu", "vslt_w", "vslt_wu", + "vpackev_w", "vpackod_w", "vpickev_w", "vpickod_w", + "vilvl_w", "vilvh_w"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + +foreach inst = ["vadd_d", "vadd_q", "vsub_d", "vsub_q", + "vsadd_d", "vsadd_du", "vssub_d", "vssub_du", + "vhaddw_q_d", "vhaddw_qu_du", "vhsubw_q_d", "vhsubw_qu_du", + "vaddwev_q_d", "vaddwod_q_d", "vsubwev_q_d", "vsubwod_q_d", + "vaddwev_q_du", "vaddwod_q_du", "vsubwev_q_du", "vsubwod_q_du", + "vaddwev_q_du_d", "vaddwod_q_du_d", + "vavg_d", "vavg_du", "vavgr_d", "vavgr_du", + "vabsd_d", "vabsd_du", "vadda_d", + "vmax_d", "vmax_du", "vmin_d", "vmin_du", + "vmul_d", "vmuh_d", "vmuh_du", + "vmulwev_q_d", "vmulwod_q_d", "vmulwev_q_du", "vmulwod_q_du", + "vmulwev_q_du_d", "vmulwod_q_du_d", + "vdiv_d", "vdiv_du", "vmod_d", "vmod_du", "vsigncov_d", + "vsll_d", "vsrl_d", "vsra_d", "vrotr_d", "vsrlr_d", "vsrar_d", + "vbitclr_d", "vbitset_d", "vbitrev_d", + "vseq_d", "vsle_d", "vsle_du", "vslt_d", "vslt_du", + "vpackev_d", "vpackod_d", "vpickev_d", "vpickod_d", + "vilvl_d", "vilvh_d"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + +foreach inst = ["vaddi_bu", "vsubi_bu", + "vmaxi_b", "vmaxi_bu", "vmini_b", "vmini_bu", + "vsat_b", "vsat_bu", + "vandi_b", "vori_b", "vxori_b", "vnori_b", + "vslli_b", "vsrli_b", "vsrai_b", "vrotri_b", + "vsrlri_b", "vsrari_b", + "vbitclri_b", "vbitseti_b", "vbitrevi_b", + "vseqi_b", "vslei_b", "vslei_bu", "vslti_b", "vslti_bu", + "vreplvei_b", "vbsll_v", "vbsrl_v", "vshuf4i_b"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["vaddi_hu", "vsubi_hu", + "vmaxi_h", "vmaxi_hu", "vmini_h", "vmini_hu", + "vsat_h", "vsat_hu", + "vslli_h", "vsrli_h", "vsrai_h", "vrotri_h", + "vsrlri_h", "vsrari_h", + "vbitclri_h", "vbitseti_h", "vbitrevi_h", + "vseqi_h", "vslei_h", "vslei_hu", "vslti_h", "vslti_hu", + "vreplvei_h", "vshuf4i_h"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], + [llvm_v8i16_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["vaddi_wu", "vsubi_wu", + "vmaxi_w", "vmaxi_wu", "vmini_w", "vmini_wu", + "vsat_w", "vsat_wu", + "vslli_w", "vsrli_w", "vsrai_w", "vrotri_w", + "vsrlri_w", "vsrari_w", + "vbitclri_w", "vbitseti_w", "vbitrevi_w", + "vseqi_w", "vslei_w", "vslei_wu", "vslti_w", "vslti_wu", + "vreplvei_w", "vshuf4i_w"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["vaddi_du", "vsubi_du", + "vmaxi_d", "vmaxi_du", "vmini_d", "vmini_du", + "vsat_d", "vsat_du", + "vslli_d", "vsrli_d", "vsrai_d", "vrotri_d", + "vsrlri_d", "vsrari_d", + "vbitclri_d", "vbitseti_d", "vbitrevi_d", + "vseqi_d", "vslei_d", "vslei_du", "vslti_d", "vslti_du", + "vreplvei_d"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + +foreach inst = ["vhaddw_h_b", "vhaddw_hu_bu", "vhsubw_h_b", "vhsubw_hu_bu", + "vaddwev_h_b", "vaddwod_h_b", "vsubwev_h_b", "vsubwod_h_b", + "vaddwev_h_bu", "vaddwod_h_bu", "vsubwev_h_bu", "vsubwod_h_bu", + "vaddwev_h_bu_b", "vaddwod_h_bu_b", + "vmulwev_h_b", "vmulwod_h_b", "vmulwev_h_bu", "vmulwod_h_bu", + "vmulwev_h_bu_b", "vmulwod_h_bu_b"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], + [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + +foreach inst = ["vhaddw_w_h", "vhaddw_wu_hu", "vhsubw_w_h", "vhsubw_wu_hu", + "vaddwev_w_h", "vaddwod_w_h", "vsubwev_w_h", "vsubwod_w_h", + "vaddwev_w_hu", "vaddwod_w_hu", "vsubwev_w_hu", "vsubwod_w_hu", + "vaddwev_w_hu_h", "vaddwod_w_hu_h", + "vmulwev_w_h", "vmulwod_w_h", "vmulwev_w_hu", "vmulwod_w_hu", + "vmulwev_w_hu_h", "vmulwod_w_hu_h"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], + [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + +foreach inst = ["vhaddw_d_w", "vhaddw_du_wu", "vhsubw_d_w", "vhsubw_du_wu", + "vaddwev_d_w", "vaddwod_d_w", "vsubwev_d_w", "vsubwod_d_w", + "vaddwev_d_wu", "vaddwod_d_wu", "vsubwev_d_wu", "vsubwod_d_wu", + "vaddwev_d_wu_w", "vaddwod_d_wu_w", + "vmulwev_d_w", "vmulwod_d_w", "vmulwev_d_wu", "vmulwod_d_wu", + "vmulwev_d_wu_w", "vmulwod_d_wu_w"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], + [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + +foreach inst = ["vsrln_b_h", "vsran_b_h", "vsrlrn_b_h", "vsrarn_b_h", + "vssrln_b_h", "vssran_b_h", "vssrln_bu_h", "vssran_bu_h", + "vssrlrn_b_h", "vssrarn_b_h", "vssrlrn_bu_h", "vssrarn_bu_h"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v16i8_ty], + [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + +foreach inst = ["vsrln_h_w", "vsran_h_w", "vsrlrn_h_w", "vsrarn_h_w", + "vssrln_h_w", "vssran_h_w", "vssrln_hu_w", "vssran_hu_w", + "vssrlrn_h_w", "vssrarn_h_w", "vssrlrn_hu_w", "vssrarn_hu_w"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], + [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + +foreach inst = ["vsrln_w_d", "vsran_w_d", "vsrlrn_w_d", "vsrarn_w_d", + "vssrln_w_d", "vssran_w_d", "vssrln_wu_d", "vssran_wu_d", + "vssrlrn_w_d", "vssrarn_w_d", "vssrlrn_wu_d", "vssrarn_wu_d"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], + [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + +foreach inst = ["vmadd_b", "vmsub_b", "vfrstp_b", "vbitsel_v", "vshuf_b"] in + def int_loongarch_lsx_#inst + : VecInt<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; +foreach inst = ["vmadd_h", "vmsub_h", "vfrstp_h", "vshuf_h"] in + def int_loongarch_lsx_#inst + : VecInt<[llvm_v8i16_ty], + [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; +foreach inst = ["vmadd_w", "vmsub_w", "vshuf_w"] in + def int_loongarch_lsx_#inst + : VecInt<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; +foreach inst = ["vmadd_d", "vmsub_d", "vshuf_d"] in + def int_loongarch_lsx_#inst + : VecInt<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + +foreach inst = ["vsrlni_b_h", "vsrani_b_h", "vsrlrni_b_h", "vsrarni_b_h", + "vssrlni_b_h", "vssrani_b_h", "vssrlni_bu_h", "vssrani_bu_h", + "vssrlrni_b_h", "vssrarni_b_h", "vssrlrni_bu_h", "vssrarni_bu_h", + "vfrstpi_b", "vbitseli_b", "vextrins_b"] in + def int_loongarch_lsx_#inst + : VecInt<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["vsrlni_h_w", "vsrani_h_w", "vsrlrni_h_w", "vsrarni_h_w", + "vssrlni_h_w", "vssrani_h_w", "vssrlni_hu_w", "vssrani_hu_w", + "vssrlrni_h_w", "vssrarni_h_w", "vssrlrni_hu_w", "vssrarni_hu_w", + "vfrstpi_h", "vextrins_h"] in + def int_loongarch_lsx_#inst + : VecInt<[llvm_v8i16_ty], + [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["vsrlni_w_d", "vsrani_w_d", "vsrlrni_w_d", "vsrarni_w_d", + "vssrlni_w_d", "vssrani_w_d", "vssrlni_wu_d", "vssrani_wu_d", + "vssrlrni_w_d", "vssrarni_w_d", "vssrlrni_wu_d", "vssrarni_wu_d", + "vpermi_w", "vextrins_w"] in + def int_loongarch_lsx_#inst + : VecInt<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["vsrlni_d_q", "vsrani_d_q", "vsrlrni_d_q", "vsrarni_d_q", + "vssrlni_d_q", "vssrani_d_q", "vssrlni_du_q", "vssrani_du_q", + "vssrlrni_d_q", "vssrarni_d_q", "vssrlrni_du_q", "vssrarni_du_q", + "vshuf4i_d", "vextrins_d"] in + def int_loongarch_lsx_#inst + : VecInt<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + +foreach inst = ["vmaddwev_h_b", "vmaddwod_h_b", "vmaddwev_h_bu", + "vmaddwod_h_bu", "vmaddwev_h_bu_b", "vmaddwod_h_bu_b"] in + def int_loongarch_lsx_#inst + : VecInt<[llvm_v8i16_ty], + [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; +foreach inst = ["vmaddwev_w_h", "vmaddwod_w_h", "vmaddwev_w_hu", + "vmaddwod_w_hu", "vmaddwev_w_hu_h", "vmaddwod_w_hu_h"] in + def int_loongarch_lsx_#inst + : VecInt<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; +foreach inst = ["vmaddwev_d_w", "vmaddwod_d_w", "vmaddwev_d_wu", + "vmaddwod_d_wu", "vmaddwev_d_wu_w", "vmaddwod_d_wu_w"] in + def int_loongarch_lsx_#inst + : VecInt<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; +foreach inst = ["vmaddwev_q_d", "vmaddwod_q_d", "vmaddwev_q_du", + "vmaddwod_q_du", "vmaddwev_q_du_d", "vmaddwod_q_du_d"] in + def int_loongarch_lsx_#inst + : VecInt<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + +foreach inst = ["vsllwil_h_b", "vsllwil_hu_bu"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], + [llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["vsllwil_w_h", "vsllwil_wu_hu"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], + [llvm_v8i16_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["vsllwil_d_w", "vsllwil_du_wu"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], + [llvm_v4i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + +foreach inst = ["vneg_b", "vmskltz_b", "vmskgez_b", "vmsknz_b", + "vclo_b", "vclz_b", "vpcnt_b"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v16i8_ty], [llvm_v16i8_ty], + [IntrNoMem]>; +foreach inst = ["vneg_h", "vmskltz_h", "vclo_h", "vclz_h", "vpcnt_h"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], [llvm_v8i16_ty], + [IntrNoMem]>; +foreach inst = ["vneg_w", "vmskltz_w", "vclo_w", "vclz_w", "vpcnt_w"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], [llvm_v4i32_ty], + [IntrNoMem]>; +foreach inst = ["vneg_d", "vexth_q_d", "vexth_qu_du", "vmskltz_d", + "vextl_q_d", "vextl_qu_du", "vclo_d", "vclz_d", "vpcnt_d"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], [llvm_v2i64_ty], + [IntrNoMem]>; + +foreach inst = ["vexth_h_b", "vexth_hu_bu"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], [llvm_v16i8_ty], + [IntrNoMem]>; +foreach inst = ["vexth_w_h", "vexth_wu_hu"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], [llvm_v8i16_ty], + [IntrNoMem]>; +foreach inst = ["vexth_d_w", "vexth_du_wu"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], [llvm_v4i32_ty], + [IntrNoMem]>; + +def int_loongarch_lsx_vldi : VecInt<[llvm_v2i64_ty], [llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +def int_loongarch_lsx_vrepli_b : VecInt<[llvm_v16i8_ty], [llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +def int_loongarch_lsx_vrepli_h : VecInt<[llvm_v8i16_ty], [llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +def int_loongarch_lsx_vrepli_w : VecInt<[llvm_v4i32_ty], [llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +def int_loongarch_lsx_vrepli_d : VecInt<[llvm_v2i64_ty], [llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + +def int_loongarch_lsx_vreplgr2vr_b : VecInt<[llvm_v16i8_ty], [llvm_i32_ty], + [IntrNoMem]>; +def int_loongarch_lsx_vreplgr2vr_h : VecInt<[llvm_v8i16_ty], [llvm_i32_ty], + [IntrNoMem]>; +def int_loongarch_lsx_vreplgr2vr_w : VecInt<[llvm_v4i32_ty], [llvm_i32_ty], + [IntrNoMem]>; +def int_loongarch_lsx_vreplgr2vr_d : VecInt<[llvm_v2i64_ty], [llvm_i64_ty], + [IntrNoMem]>; + +def int_loongarch_lsx_vinsgr2vr_b + : VecInt<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +def int_loongarch_lsx_vinsgr2vr_h + : VecInt<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +def int_loongarch_lsx_vinsgr2vr_w + : VecInt<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +def int_loongarch_lsx_vinsgr2vr_d + : VecInt<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i64_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + +def int_loongarch_lsx_vreplve_b + : VecInt<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vreplve_h + : VecInt<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vreplve_w + : VecInt<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vreplve_d + : VecInt<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +foreach inst = ["vpickve2gr_b", "vpickve2gr_bu" ] in + def int_loongarch_lsx_#inst : VecInt<[llvm_i32_ty], + [llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["vpickve2gr_h", "vpickve2gr_hu" ] in + def int_loongarch_lsx_#inst : VecInt<[llvm_i32_ty], + [llvm_v8i16_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["vpickve2gr_w", "vpickve2gr_wu" ] in + def int_loongarch_lsx_#inst : VecInt<[llvm_i32_ty], + [llvm_v4i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["vpickve2gr_d", "vpickve2gr_du" ] in + def int_loongarch_lsx_#inst : VecInt<[llvm_i64_ty], + [llvm_v2i64_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + +def int_loongarch_lsx_bz_b : VecInt<[llvm_i32_ty], [llvm_v16i8_ty], + [IntrNoMem]>; +def int_loongarch_lsx_bz_h : VecInt<[llvm_i32_ty], [llvm_v8i16_ty], + [IntrNoMem]>; +def int_loongarch_lsx_bz_w : VecInt<[llvm_i32_ty], [llvm_v4i32_ty], + [IntrNoMem]>; +def int_loongarch_lsx_bz_d : VecInt<[llvm_i32_ty], [llvm_v2i64_ty], + [IntrNoMem]>; +def int_loongarch_lsx_bz_v : VecInt<[llvm_i32_ty], [llvm_v16i8_ty], + [IntrNoMem]>; + +def int_loongarch_lsx_bnz_v : VecInt<[llvm_i32_ty], [llvm_v16i8_ty], + [IntrNoMem]>; +def int_loongarch_lsx_bnz_b : VecInt<[llvm_i32_ty], [llvm_v16i8_ty], + [IntrNoMem]>; +def int_loongarch_lsx_bnz_h : VecInt<[llvm_i32_ty], [llvm_v8i16_ty], + [IntrNoMem]>; +def int_loongarch_lsx_bnz_w : VecInt<[llvm_i32_ty], [llvm_v4i32_ty], + [IntrNoMem]>; +def int_loongarch_lsx_bnz_d : VecInt<[llvm_i32_ty], [llvm_v2i64_ty], + [IntrNoMem]>; + +// LSX Float + +foreach inst = ["vfadd_s", "vfsub_s", "vfmul_s", "vfdiv_s", + "vfmax_s", "vfmin_s", "vfmaxa_s", "vfmina_s"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; +foreach inst = ["vfadd_d", "vfsub_d", "vfmul_d", "vfdiv_d", + "vfmax_d", "vfmin_d", "vfmaxa_d", "vfmina_d"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty], + [IntrNoMem]>; + +foreach inst = ["vfmadd_s", "vfmsub_s", "vfnmadd_s", "vfnmsub_s"] in + def int_loongarch_lsx_#inst + : VecInt<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; +foreach inst = ["vfmadd_d", "vfmsub_d", "vfnmadd_d", "vfnmsub_d"] in + def int_loongarch_lsx_#inst + : VecInt<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], + [IntrNoMem]>; + +foreach inst = ["vflogb_s", "vfsqrt_s", "vfrecip_s", "vfrsqrt_s", "vfrint_s", + "vfrintrne_s", "vfrintrz_s", "vfrintrp_s", "vfrintrm_s"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], [llvm_v4f32_ty], + [IntrNoMem]>; +foreach inst = ["vflogb_d", "vfsqrt_d", "vfrecip_d", "vfrsqrt_d", "vfrint_d", + "vfrintrne_d", "vfrintrz_d", "vfrintrp_d", "vfrintrm_d"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v2f64_ty], [llvm_v2f64_ty], + [IntrNoMem]>; + +foreach inst = ["vfcvtl_s_h", "vfcvth_s_h"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], [llvm_v8i16_ty], + [IntrNoMem]>; +foreach inst = ["vfcvtl_d_s", "vfcvth_d_s"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v2f64_ty], [llvm_v4f32_ty], + [IntrNoMem]>; + +foreach inst = ["vftintrne_w_s", "vftintrz_w_s", "vftintrp_w_s", "vftintrm_w_s", + "vftint_w_s", "vftintrz_wu_s", "vftint_wu_s", "vfclass_s"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], [llvm_v4f32_ty], + [IntrNoMem]>; +foreach inst = ["vftintrne_l_d", "vftintrz_l_d", "vftintrp_l_d", "vftintrm_l_d", + "vftint_l_d", "vftintrz_lu_d", "vftint_lu_d", "vfclass_d"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], [llvm_v2f64_ty], + [IntrNoMem]>; + +foreach inst = ["vftintrnel_l_s", "vftintrneh_l_s", "vftintrzl_l_s", + "vftintrzh_l_s", "vftintrpl_l_s", "vftintrph_l_s", + "vftintrml_l_s", "vftintrmh_l_s", "vftintl_l_s", + "vftinth_l_s"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], [llvm_v4f32_ty], + [IntrNoMem]>; + +foreach inst = ["vffint_s_w", "vffint_s_wu"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], [llvm_v4i32_ty], + [IntrNoMem]>; +foreach inst = ["vffint_d_l", "vffint_d_lu"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v2f64_ty], [llvm_v2i64_ty], + [IntrNoMem]>; + +foreach inst = ["vffintl_d_w", "vffinth_d_w"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v2f64_ty], [llvm_v4i32_ty], + [IntrNoMem]>; + +foreach inst = ["vffint_s_l"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], + [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; +foreach inst = ["vftintrne_w_d", "vftintrz_w_d", "vftintrp_w_d", "vftintrm_w_d", + "vftint_w_d"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], + [llvm_v2f64_ty, llvm_v2f64_ty], + [IntrNoMem]>; + +foreach inst = ["vfcvt_h_s"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], + [llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; +foreach inst = ["vfcvt_s_d"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], + [llvm_v2f64_ty, llvm_v2f64_ty], + [IntrNoMem]>; + +foreach inst = ["vfcmp_caf_s", "vfcmp_cun_s", "vfcmp_ceq_s", "vfcmp_cueq_s", + "vfcmp_clt_s", "vfcmp_cult_s", "vfcmp_cle_s", "vfcmp_cule_s", + "vfcmp_cne_s", "vfcmp_cor_s", "vfcmp_cune_s", + "vfcmp_saf_s", "vfcmp_sun_s", "vfcmp_seq_s", "vfcmp_sueq_s", + "vfcmp_slt_s", "vfcmp_sult_s", "vfcmp_sle_s", "vfcmp_sule_s", + "vfcmp_sne_s", "vfcmp_sor_s", "vfcmp_sune_s"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; +foreach inst = ["vfcmp_caf_d", "vfcmp_cun_d", "vfcmp_ceq_d", "vfcmp_cueq_d", + "vfcmp_clt_d", "vfcmp_cult_d", "vfcmp_cle_d", "vfcmp_cule_d", + "vfcmp_cne_d", "vfcmp_cor_d", "vfcmp_cune_d", + "vfcmp_saf_d", "vfcmp_sun_d", "vfcmp_seq_d", "vfcmp_sueq_d", + "vfcmp_slt_d", "vfcmp_sult_d", "vfcmp_sle_d", "vfcmp_sule_d", + "vfcmp_sne_d", "vfcmp_sor_d", "vfcmp_sune_d"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty], + [IntrNoMem]>; + +// LSX load/store +def int_loongarch_lsx_vld + : VecInt<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly, ImmArg>]>; +def int_loongarch_lsx_vldx + : VecInt<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i64_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_loongarch_lsx_vldrepl_b + : VecInt<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly, ImmArg>]>; +def int_loongarch_lsx_vldrepl_h + : VecInt<[llvm_v8i16_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly, ImmArg>]>; +def int_loongarch_lsx_vldrepl_w + : VecInt<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly, ImmArg>]>; +def int_loongarch_lsx_vldrepl_d + : VecInt<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly, ImmArg>]>; + +def int_loongarch_lsx_vst + : VecInt<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty], + [IntrWriteMem, IntrArgMemOnly, ImmArg>]>; +def int_loongarch_lsx_vstx + : VecInt<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i64_ty], + [IntrWriteMem, IntrArgMemOnly]>; +def int_loongarch_lsx_vstelm_b + : VecInt<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], + [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; +def int_loongarch_lsx_vstelm_h + : VecInt<[], [llvm_v8i16_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], + [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; +def int_loongarch_lsx_vstelm_w + : VecInt<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], + [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; +def int_loongarch_lsx_vstelm_d + : VecInt<[], [llvm_v2i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], + [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; + +} // TargetPrefix = "loongarch" + +//===----------------------------------------------------------------------===// +// LASX + +let TargetPrefix = "loongarch" in { +foreach inst = ["xvadd_b", "xvsub_b", + "xvsadd_b", "xvsadd_bu", "xvssub_b", "xvssub_bu", + "xvavg_b", "xvavg_bu", "xvavgr_b", "xvavgr_bu", + "xvabsd_b", "xvabsd_bu", "xvadda_b", + "xvmax_b", "xvmax_bu", "xvmin_b", "xvmin_bu", + "xvmul_b", "xvmuh_b", "xvmuh_bu", + "xvdiv_b", "xvdiv_bu", "xvmod_b", "xvmod_bu", "xvsigncov_b", + "xvand_v", "xvor_v", "xvxor_v", "xvnor_v", "xvandn_v", "xvorn_v", + "xvsll_b", "xvsrl_b", "xvsra_b", "xvrotr_b", "xvsrlr_b", "xvsrar_b", + "xvbitclr_b", "xvbitset_b", "xvbitrev_b", + "xvseq_b", "xvsle_b", "xvsle_bu", "xvslt_b", "xvslt_bu", + "xvpackev_b", "xvpackod_b", "xvpickev_b", "xvpickod_b", + "xvilvl_b", "xvilvh_b"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v32i8_ty], + [llvm_v32i8_ty, llvm_v32i8_ty], + [IntrNoMem]>; + +foreach inst = ["xvadd_h", "xvsub_h", + "xvsadd_h", "xvsadd_hu", "xvssub_h", "xvssub_hu", + "xvavg_h", "xvavg_hu", "xvavgr_h", "xvavgr_hu", + "xvabsd_h", "xvabsd_hu", "xvadda_h", + "xvmax_h", "xvmax_hu", "xvmin_h", "xvmin_hu", + "xvmul_h", "xvmuh_h", "xvmuh_hu", + "xvdiv_h", "xvdiv_hu", "xvmod_h", "xvmod_hu", "xvsigncov_h", + "xvsll_h", "xvsrl_h", "xvsra_h", "xvrotr_h", "xvsrlr_h", "xvsrar_h", + "xvbitclr_h", "xvbitset_h", "xvbitrev_h", + "xvseq_h", "xvsle_h", "xvsle_hu", "xvslt_h", "xvslt_hu", + "xvpackev_h", "xvpackod_h", "xvpickev_h", "xvpickod_h", + "xvilvl_h", "xvilvh_h"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], + [llvm_v16i16_ty, llvm_v16i16_ty], + [IntrNoMem]>; + +foreach inst = ["xvadd_w", "xvsub_w", + "xvsadd_w", "xvsadd_wu", "xvssub_w", "xvssub_wu", + "xvavg_w", "xvavg_wu", "xvavgr_w", "xvavgr_wu", + "xvabsd_w", "xvabsd_wu", "xvadda_w", + "xvmax_w", "xvmax_wu", "xvmin_w", "xvmin_wu", + "xvmul_w", "xvmuh_w", "xvmuh_wu", + "xvdiv_w", "xvdiv_wu", "xvmod_w", "xvmod_wu", "xvsigncov_w", + "xvsll_w", "xvsrl_w", "xvsra_w", "xvrotr_w", "xvsrlr_w", "xvsrar_w", + "xvbitclr_w", "xvbitset_w", "xvbitrev_w", + "xvseq_w", "xvsle_w", "xvsle_wu", "xvslt_w", "xvslt_wu", + "xvpackev_w", "xvpackod_w", "xvpickev_w", "xvpickod_w", + "xvilvl_w", "xvilvh_w", "xvperm_w"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], + [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; + +foreach inst = ["xvadd_d", "xvadd_q", "xvsub_d", "xvsub_q", + "xvsadd_d", "xvsadd_du", "xvssub_d", "xvssub_du", + "xvhaddw_q_d", "xvhaddw_qu_du", "xvhsubw_q_d", "xvhsubw_qu_du", + "xvaddwev_q_d", "xvaddwod_q_d", "xvsubwev_q_d", "xvsubwod_q_d", + "xvaddwev_q_du", "xvaddwod_q_du", "xvsubwev_q_du", "xvsubwod_q_du", + "xvaddwev_q_du_d", "xvaddwod_q_du_d", + "xvavg_d", "xvavg_du", "xvavgr_d", "xvavgr_du", + "xvabsd_d", "xvabsd_du", "xvadda_d", + "xvmax_d", "xvmax_du", "xvmin_d", "xvmin_du", + "xvmul_d", "xvmuh_d", "xvmuh_du", + "xvmulwev_q_d", "xvmulwod_q_d", "xvmulwev_q_du", "xvmulwod_q_du", + "xvmulwev_q_du_d", "xvmulwod_q_du_d", + "xvdiv_d", "xvdiv_du", "xvmod_d", "xvmod_du", "xvsigncov_d", + "xvsll_d", "xvsrl_d", "xvsra_d", "xvrotr_d", "xvsrlr_d", "xvsrar_d", + "xvbitclr_d", "xvbitset_d", "xvbitrev_d", + "xvseq_d", "xvsle_d", "xvsle_du", "xvslt_d", "xvslt_du", + "xvpackev_d", "xvpackod_d", "xvpickev_d", "xvpickod_d", + "xvilvl_d", "xvilvh_d"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; + +foreach inst = ["xvaddi_bu", "xvsubi_bu", + "xvmaxi_b", "xvmaxi_bu", "xvmini_b", "xvmini_bu", + "xvsat_b", "xvsat_bu", + "xvandi_b", "xvori_b", "xvxori_b", "xvnori_b", + "xvslli_b", "xvsrli_b", "xvsrai_b", "xvrotri_b", + "xvsrlri_b", "xvsrari_b", + "xvbitclri_b", "xvbitseti_b", "xvbitrevi_b", + "xvseqi_b", "xvslei_b", "xvslei_bu", "xvslti_b", "xvslti_bu", + "xvrepl128vei_b", "xvbsll_v", "xvbsrl_v", "xvshuf4i_b"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v32i8_ty], + [llvm_v32i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["xvaddi_hu", "xvsubi_hu", + "xvmaxi_h", "xvmaxi_hu", "xvmini_h", "xvmini_hu", + "xvsat_h", "xvsat_hu", + "xvslli_h", "xvsrli_h", "xvsrai_h", "xvrotri_h", + "xvsrlri_h", "xvsrari_h", + "xvbitclri_h", "xvbitseti_h", "xvbitrevi_h", + "xvseqi_h", "xvslei_h", "xvslei_hu", "xvslti_h", "xvslti_hu", + "xvrepl128vei_h", "xvshuf4i_h"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], + [llvm_v16i16_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["xvaddi_wu", "xvsubi_wu", + "xvmaxi_w", "xvmaxi_wu", "xvmini_w", "xvmini_wu", + "xvsat_w", "xvsat_wu", + "xvslli_w", "xvsrli_w", "xvsrai_w", "xvrotri_w", + "xvsrlri_w", "xvsrari_w", + "xvbitclri_w", "xvbitseti_w", "xvbitrevi_w", + "xvseqi_w", "xvslei_w", "xvslei_wu", "xvslti_w", "xvslti_wu", + "xvrepl128vei_w", "xvshuf4i_w", "xvpickve_w"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], + [llvm_v8i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["xvaddi_du", "xvsubi_du", + "xvmaxi_d", "xvmaxi_du", "xvmini_d", "xvmini_du", + "xvsat_d", "xvsat_du", + "xvslli_d", "xvsrli_d", "xvsrai_d", "xvrotri_d", + "xvsrlri_d", "xvsrari_d", + "xvbitclri_d", "xvbitseti_d", "xvbitrevi_d", + "xvseqi_d", "xvslei_d", "xvslei_du", "xvslti_d", "xvslti_du", + "xvrepl128vei_d", "xvpermi_d", "xvpickve_d"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + +foreach inst = ["xvhaddw_h_b", "xvhaddw_hu_bu", "xvhsubw_h_b", "xvhsubw_hu_bu", + "xvaddwev_h_b", "xvaddwod_h_b", "xvsubwev_h_b", "xvsubwod_h_b", + "xvaddwev_h_bu", "xvaddwod_h_bu", "xvsubwev_h_bu", "xvsubwod_h_bu", + "xvaddwev_h_bu_b", "xvaddwod_h_bu_b", + "xvmulwev_h_b", "xvmulwod_h_b", "xvmulwev_h_bu", "xvmulwod_h_bu", + "xvmulwev_h_bu_b", "xvmulwod_h_bu_b"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], + [llvm_v32i8_ty, llvm_v32i8_ty], + [IntrNoMem]>; + +foreach inst = ["xvhaddw_w_h", "xvhaddw_wu_hu", "xvhsubw_w_h", "xvhsubw_wu_hu", + "xvaddwev_w_h", "xvaddwod_w_h", "xvsubwev_w_h", "xvsubwod_w_h", + "xvaddwev_w_hu", "xvaddwod_w_hu", "xvsubwev_w_hu", "xvsubwod_w_hu", + "xvaddwev_w_hu_h", "xvaddwod_w_hu_h", + "xvmulwev_w_h", "xvmulwod_w_h", "xvmulwev_w_hu", "xvmulwod_w_hu", + "xvmulwev_w_hu_h", "xvmulwod_w_hu_h"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], + [llvm_v16i16_ty, llvm_v16i16_ty], + [IntrNoMem]>; + +foreach inst = ["xvhaddw_d_w", "xvhaddw_du_wu", "xvhsubw_d_w", "xvhsubw_du_wu", + "xvaddwev_d_w", "xvaddwod_d_w", "xvsubwev_d_w", "xvsubwod_d_w", + "xvaddwev_d_wu", "xvaddwod_d_wu", "xvsubwev_d_wu", "xvsubwod_d_wu", + "xvaddwev_d_wu_w", "xvaddwod_d_wu_w", + "xvmulwev_d_w", "xvmulwod_d_w", "xvmulwev_d_wu", "xvmulwod_d_wu", + "xvmulwev_d_wu_w", "xvmulwod_d_wu_w"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], + [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; + +foreach inst = ["xvsrln_b_h", "xvsran_b_h", "xvsrlrn_b_h", "xvsrarn_b_h", + "xvssrln_b_h", "xvssran_b_h", "xvssrln_bu_h", "xvssran_bu_h", + "xvssrlrn_b_h", "xvssrarn_b_h", "xvssrlrn_bu_h", "xvssrarn_bu_h"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v32i8_ty], + [llvm_v16i16_ty, llvm_v16i16_ty], + [IntrNoMem]>; + +foreach inst = ["xvsrln_h_w", "xvsran_h_w", "xvsrlrn_h_w", "xvsrarn_h_w", + "xvssrln_h_w", "xvssran_h_w", "xvssrln_hu_w", "xvssran_hu_w", + "xvssrlrn_h_w", "xvssrarn_h_w", "xvssrlrn_hu_w", "xvssrarn_hu_w"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], + [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; + +foreach inst = ["xvsrln_w_d", "xvsran_w_d", "xvsrlrn_w_d", "xvsrarn_w_d", + "xvssrln_w_d", "xvssran_w_d", "xvssrln_wu_d", "xvssran_wu_d", + "xvssrlrn_w_d", "xvssrarn_w_d", "xvssrlrn_wu_d", "xvssrarn_wu_d"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], + [llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; + +foreach inst = ["xvmadd_b", "xvmsub_b", "xvfrstp_b", "xvbitsel_v", "xvshuf_b"] in + def int_loongarch_lasx_#inst + : VecInt<[llvm_v32i8_ty], + [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty], + [IntrNoMem]>; +foreach inst = ["xvmadd_h", "xvmsub_h", "xvfrstp_h", "xvshuf_h"] in + def int_loongarch_lasx_#inst + : VecInt<[llvm_v16i16_ty], + [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty], + [IntrNoMem]>; +foreach inst = ["xvmadd_w", "xvmsub_w", "xvshuf_w"] in + def int_loongarch_lasx_#inst + : VecInt<[llvm_v8i32_ty], + [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; +foreach inst = ["xvmadd_d", "xvmsub_d", "xvshuf_d"] in + def int_loongarch_lasx_#inst + : VecInt<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; + +foreach inst = ["xvsrlni_b_h", "xvsrani_b_h", "xvsrlrni_b_h", "xvsrarni_b_h", + "xvssrlni_b_h", "xvssrani_b_h", "xvssrlni_bu_h", "xvssrani_bu_h", + "xvssrlrni_b_h", "xvssrarni_b_h", "xvssrlrni_bu_h", "xvssrarni_bu_h", + "xvfrstpi_b", "xvbitseli_b", "xvextrins_b", "xvpermi_q"] in + def int_loongarch_lasx_#inst + : VecInt<[llvm_v32i8_ty], + [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["xvsrlni_h_w", "xvsrani_h_w", "xvsrlrni_h_w", "xvsrarni_h_w", + "xvssrlni_h_w", "xvssrani_h_w", "xvssrlni_hu_w", "xvssrani_hu_w", + "xvssrlrni_h_w", "xvssrarni_h_w", "xvssrlrni_hu_w", "xvssrarni_hu_w", + "xvfrstpi_h", "xvextrins_h"] in + def int_loongarch_lasx_#inst + : VecInt<[llvm_v16i16_ty], + [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["xvsrlni_w_d", "xvsrani_w_d", "xvsrlrni_w_d", "xvsrarni_w_d", + "xvssrlni_w_d", "xvssrani_w_d", "xvssrlni_wu_d", "xvssrani_wu_d", + "xvssrlrni_w_d", "xvssrarni_w_d", "xvssrlrni_wu_d", "xvssrarni_wu_d", + "xvpermi_w", "xvextrins_w", "xvinsve0_w"] in + def int_loongarch_lasx_#inst + : VecInt<[llvm_v8i32_ty], + [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["xvsrlni_d_q", "xvsrani_d_q", "xvsrlrni_d_q", "xvsrarni_d_q", + "xvssrlni_d_q", "xvssrani_d_q", "xvssrlni_du_q", "xvssrani_du_q", + "xvssrlrni_d_q", "xvssrarni_d_q", "xvssrlrni_du_q", "xvssrarni_du_q", + "xvshuf4i_d", "xvextrins_d", "xvinsve0_d"] in + def int_loongarch_lasx_#inst + : VecInt<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + +foreach inst = ["xvmaddwev_h_b", "xvmaddwod_h_b", "xvmaddwev_h_bu", + "xvmaddwod_h_bu", "xvmaddwev_h_bu_b", "xvmaddwod_h_bu_b"] in + def int_loongarch_lasx_#inst + : VecInt<[llvm_v16i16_ty], + [llvm_v16i16_ty, llvm_v32i8_ty, llvm_v32i8_ty], + [IntrNoMem]>; +foreach inst = ["xvmaddwev_w_h", "xvmaddwod_w_h", "xvmaddwev_w_hu", + "xvmaddwod_w_hu", "xvmaddwev_w_hu_h", "xvmaddwod_w_hu_h"] in + def int_loongarch_lasx_#inst + : VecInt<[llvm_v8i32_ty], + [llvm_v8i32_ty, llvm_v16i16_ty, llvm_v16i16_ty], + [IntrNoMem]>; +foreach inst = ["xvmaddwev_d_w", "xvmaddwod_d_w", "xvmaddwev_d_wu", + "xvmaddwod_d_wu", "xvmaddwev_d_wu_w", "xvmaddwod_d_wu_w"] in + def int_loongarch_lasx_#inst + : VecInt<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; +foreach inst = ["xvmaddwev_q_d", "xvmaddwod_q_d", "xvmaddwev_q_du", + "xvmaddwod_q_du", "xvmaddwev_q_du_d", "xvmaddwod_q_du_d"] in + def int_loongarch_lasx_#inst + : VecInt<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; + +foreach inst = ["xvsllwil_h_b", "xvsllwil_hu_bu"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], + [llvm_v32i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["xvsllwil_w_h", "xvsllwil_wu_hu"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], + [llvm_v16i16_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["xvsllwil_d_w", "xvsllwil_du_wu"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], + [llvm_v8i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + +foreach inst = ["xvneg_b", "xvmskltz_b", "xvmskgez_b", "xvmsknz_b", + "xvclo_b", "xvclz_b", "xvpcnt_b", + "xvreplve0_b", "xvreplve0_q"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v32i8_ty], [llvm_v32i8_ty], + [IntrNoMem]>; +foreach inst = ["xvneg_h", "xvmskltz_h", "xvclo_h", "xvclz_h", "xvpcnt_h", + "xvreplve0_h"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], [llvm_v16i16_ty], + [IntrNoMem]>; +foreach inst = ["xvneg_w", "xvmskltz_w", "xvclo_w", "xvclz_w", "xvpcnt_w", + "xvreplve0_w"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], [llvm_v8i32_ty], + [IntrNoMem]>; +foreach inst = ["xvneg_d", "xvexth_q_d", "xvexth_qu_du", "xvmskltz_d", + "xvextl_q_d", "xvextl_qu_du", "xvclo_d", "xvclz_d", "xvpcnt_d", + "xvreplve0_d"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], [llvm_v4i64_ty], + [IntrNoMem]>; + +foreach inst = ["xvexth_h_b", "xvexth_hu_bu", "vext2xv_h_b", "vext2xv_hu_bu"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], [llvm_v32i8_ty], + [IntrNoMem]>; +foreach inst = ["xvexth_w_h", "xvexth_wu_hu", "vext2xv_w_h", "vext2xv_wu_hu"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], [llvm_v16i16_ty], + [IntrNoMem]>; +foreach inst = ["xvexth_d_w", "xvexth_du_wu", "vext2xv_d_w", "vext2xv_du_wu"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], [llvm_v8i32_ty], + [IntrNoMem]>; + +foreach inst = ["vext2xv_w_b", "vext2xv_wu_bu"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], [llvm_v32i8_ty], + [IntrNoMem]>; +foreach inst = ["vext2xv_d_h", "vext2xv_du_hu"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], [llvm_v16i16_ty], + [IntrNoMem]>; + +foreach inst = ["vext2xv_d_b", "vext2xv_du_bu"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], [llvm_v32i8_ty], + [IntrNoMem]>; + +def int_loongarch_lasx_xvldi : VecInt<[llvm_v4i64_ty], [llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +def int_loongarch_lasx_xvrepli_b : VecInt<[llvm_v32i8_ty], [llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +def int_loongarch_lasx_xvrepli_h : VecInt<[llvm_v16i16_ty], [llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +def int_loongarch_lasx_xvrepli_w : VecInt<[llvm_v8i32_ty], [llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +def int_loongarch_lasx_xvrepli_d : VecInt<[llvm_v4i64_ty], [llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + +def int_loongarch_lasx_xvreplgr2vr_b : VecInt<[llvm_v32i8_ty], [llvm_i32_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvreplgr2vr_h : VecInt<[llvm_v16i16_ty], [llvm_i32_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvreplgr2vr_w : VecInt<[llvm_v8i32_ty], [llvm_i32_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvreplgr2vr_d : VecInt<[llvm_v4i64_ty], [llvm_i64_ty], + [IntrNoMem]>; + +def int_loongarch_lasx_xvinsgr2vr_w + : VecInt<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +def int_loongarch_lasx_xvinsgr2vr_d + : VecInt<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i64_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + +def int_loongarch_lasx_xvreplve_b + : VecInt<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvreplve_h + : VecInt<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvreplve_w + : VecInt<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvreplve_d + : VecInt<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +foreach inst = ["xvpickve2gr_w", "xvpickve2gr_wu" ] in + def int_loongarch_lasx_#inst : VecInt<[llvm_i32_ty], + [llvm_v8i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["xvpickve2gr_d", "xvpickve2gr_du" ] in + def int_loongarch_lasx_#inst : VecInt<[llvm_i64_ty], + [llvm_v4i64_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + +def int_loongarch_lasx_xbz_b : VecInt<[llvm_i32_ty], [llvm_v32i8_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xbz_h : VecInt<[llvm_i32_ty], [llvm_v16i16_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xbz_w : VecInt<[llvm_i32_ty], [llvm_v8i32_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xbz_d : VecInt<[llvm_i32_ty], [llvm_v4i64_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xbz_v : VecInt<[llvm_i32_ty], [llvm_v32i8_ty], + [IntrNoMem]>; + +def int_loongarch_lasx_xbnz_v : VecInt<[llvm_i32_ty], [llvm_v32i8_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xbnz_b : VecInt<[llvm_i32_ty], [llvm_v32i8_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xbnz_h : VecInt<[llvm_i32_ty], [llvm_v16i16_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xbnz_w : VecInt<[llvm_i32_ty], [llvm_v8i32_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xbnz_d : VecInt<[llvm_i32_ty], [llvm_v4i64_ty], + [IntrNoMem]>; + +// LASX Float + +foreach inst = ["xvfadd_s", "xvfsub_s", "xvfmul_s", "xvfdiv_s", + "xvfmax_s", "xvfmin_s", "xvfmaxa_s", "xvfmina_s"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8f32_ty], + [IntrNoMem]>; +foreach inst = ["xvfadd_d", "xvfsub_d", "xvfmul_d", "xvfdiv_d", + "xvfmax_d", "xvfmin_d", "xvfmaxa_d", "xvfmina_d"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; + +foreach inst = ["xvfmadd_s", "xvfmsub_s", "xvfnmadd_s", "xvfnmsub_s"] in + def int_loongarch_lasx_#inst + : VecInt<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], + [IntrNoMem]>; +foreach inst = ["xvfmadd_d", "xvfmsub_d", "xvfnmadd_d", "xvfnmsub_d"] in + def int_loongarch_lasx_#inst + : VecInt<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; + +foreach inst = ["xvflogb_s", "xvfsqrt_s", "xvfrecip_s", "xvfrsqrt_s", "xvfrint_s", + "xvfrintrne_s", "xvfrintrz_s", "xvfrintrp_s", "xvfrintrm_s"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], [llvm_v8f32_ty], + [IntrNoMem]>; +foreach inst = ["xvflogb_d", "xvfsqrt_d", "xvfrecip_d", "xvfrsqrt_d", "xvfrint_d", + "xvfrintrne_d", "xvfrintrz_d", "xvfrintrp_d", "xvfrintrm_d"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v4f64_ty], [llvm_v4f64_ty], + [IntrNoMem]>; + +foreach inst = ["xvfcvtl_s_h", "xvfcvth_s_h"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], [llvm_v16i16_ty], + [IntrNoMem]>; +foreach inst = ["xvfcvtl_d_s", "xvfcvth_d_s"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v4f64_ty], [llvm_v8f32_ty], + [IntrNoMem]>; + +foreach inst = ["xvftintrne_w_s", "xvftintrz_w_s", "xvftintrp_w_s", "xvftintrm_w_s", + "xvftint_w_s", "xvftintrz_wu_s", "xvftint_wu_s", "xvfclass_s"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], [llvm_v8f32_ty], + [IntrNoMem]>; +foreach inst = ["xvftintrne_l_d", "xvftintrz_l_d", "xvftintrp_l_d", "xvftintrm_l_d", + "xvftint_l_d", "xvftintrz_lu_d", "xvftint_lu_d", "xvfclass_d"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], [llvm_v4f64_ty], + [IntrNoMem]>; + +foreach inst = ["xvftintrnel_l_s", "xvftintrneh_l_s", "xvftintrzl_l_s", + "xvftintrzh_l_s", "xvftintrpl_l_s", "xvftintrph_l_s", + "xvftintrml_l_s", "xvftintrmh_l_s", "xvftintl_l_s", + "xvftinth_l_s"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], [llvm_v8f32_ty], + [IntrNoMem]>; + +foreach inst = ["xvffint_s_w", "xvffint_s_wu"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], [llvm_v8i32_ty], + [IntrNoMem]>; +foreach inst = ["xvffint_d_l", "xvffint_d_lu"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v4f64_ty], [llvm_v4i64_ty], + [IntrNoMem]>; + +foreach inst = ["xvffintl_d_w", "xvffinth_d_w"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v4f64_ty], [llvm_v8i32_ty], + [IntrNoMem]>; + +foreach inst = ["xvffint_s_l"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], + [llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; +foreach inst = ["xvftintrne_w_d", "xvftintrz_w_d", "xvftintrp_w_d", "xvftintrm_w_d", + "xvftint_w_d"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], + [llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; + +foreach inst = ["xvfcvt_h_s"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], + [llvm_v8f32_ty, llvm_v8f32_ty], + [IntrNoMem]>; +foreach inst = ["xvfcvt_s_d"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], + [llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; + +foreach inst = ["xvfcmp_caf_s", "xvfcmp_cun_s", "xvfcmp_ceq_s", "xvfcmp_cueq_s", + "xvfcmp_clt_s", "xvfcmp_cult_s", "xvfcmp_cle_s", "xvfcmp_cule_s", + "xvfcmp_cne_s", "xvfcmp_cor_s", "xvfcmp_cune_s", + "xvfcmp_saf_s", "xvfcmp_sun_s", "xvfcmp_seq_s", "xvfcmp_sueq_s", + "xvfcmp_slt_s", "xvfcmp_sult_s", "xvfcmp_sle_s", "xvfcmp_sule_s", + "xvfcmp_sne_s", "xvfcmp_sor_s", "xvfcmp_sune_s"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], + [llvm_v8f32_ty, llvm_v8f32_ty], + [IntrNoMem]>; +foreach inst = ["xvfcmp_caf_d", "xvfcmp_cun_d", "xvfcmp_ceq_d", "xvfcmp_cueq_d", + "xvfcmp_clt_d", "xvfcmp_cult_d", "xvfcmp_cle_d", "xvfcmp_cule_d", + "xvfcmp_cne_d", "xvfcmp_cor_d", "xvfcmp_cune_d", + "xvfcmp_saf_d", "xvfcmp_sun_d", "xvfcmp_seq_d", "xvfcmp_sueq_d", + "xvfcmp_slt_d", "xvfcmp_sult_d", "xvfcmp_sle_d", "xvfcmp_sule_d", + "xvfcmp_sne_d", "xvfcmp_sor_d", "xvfcmp_sune_d"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], + [llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; + +def int_loongarch_lasx_xvpickve_w_f + : VecInt<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +def int_loongarch_lasx_xvpickve_d_f + : VecInt<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + +// LASX load/store +def int_loongarch_lasx_xvld + : VecInt<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly, ImmArg>]>; +def int_loongarch_lasx_xvldx + : VecInt<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i64_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_loongarch_lasx_xvldrepl_b + : VecInt<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly, ImmArg>]>; +def int_loongarch_lasx_xvldrepl_h + : VecInt<[llvm_v16i16_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly, ImmArg>]>; +def int_loongarch_lasx_xvldrepl_w + : VecInt<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly, ImmArg>]>; +def int_loongarch_lasx_xvldrepl_d + : VecInt<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly, ImmArg>]>; + +def int_loongarch_lasx_xvst + : VecInt<[], [llvm_v32i8_ty, llvm_ptr_ty, llvm_i32_ty], + [IntrWriteMem, IntrArgMemOnly, ImmArg>]>; +def int_loongarch_lasx_xvstx + : VecInt<[], [llvm_v32i8_ty, llvm_ptr_ty, llvm_i64_ty], + [IntrWriteMem, IntrArgMemOnly]>; +def int_loongarch_lasx_xvstelm_b + : VecInt<[], [llvm_v32i8_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], + [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; +def int_loongarch_lasx_xvstelm_h + : VecInt<[], [llvm_v16i16_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], + [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; +def int_loongarch_lasx_xvstelm_w + : VecInt<[], [llvm_v8i32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], + [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; +def int_loongarch_lasx_xvstelm_d + : VecInt<[], [llvm_v4i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], + [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; } // TargetPrefix = "loongarch" diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index 27219e89dc5f75bc3292617537a539333fd19f1d..435800d9e5f9fe3cc68d70bc6c824d4b35493724 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -37,6 +37,7 @@ #include "llvm/IR/IntrinsicsBPF.h" #include "llvm/IR/IntrinsicsDirectX.h" #include "llvm/IR/IntrinsicsHexagon.h" +#include "llvm/IR/IntrinsicsLoongArch.h" #include "llvm/IR/IntrinsicsMips.h" #include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/IR/IntrinsicsPowerPC.h" diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td index 75b65fe69f26291b79d848191928339727b554ae..2a4c991a43b09cfb52844f58e6dcb760e2b2abae 100644 --- a/llvm/lib/Target/LoongArch/LoongArch.td +++ b/llvm/lib/Target/LoongArch/LoongArch.td @@ -105,6 +105,10 @@ def FeatureUAL def FeatureRelax : SubtargetFeature<"relax", "HasLinkerRelax", "true", "Enable Linker relaxation">; +// Experimental auto vectorization +def FeatureAutoVec + : SubtargetFeature<"auto-vec", "HasExpAutoVec", "true", + "Experimental auto vectorization">; //===----------------------------------------------------------------------===// // Registers, instruction descriptions ... diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp index ae7167cb5ce7211b977edf4d122d8d68494b7781..01b2f720f902cee5a44e928936421ae097688e24 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp @@ -15,6 +15,7 @@ #include "MCTargetDesc/LoongArchMCTargetDesc.h" #include "MCTargetDesc/LoongArchMatInt.h" #include "llvm/Support/KnownBits.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -75,7 +76,64 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) { ReplaceNode(Node, CurDAG->getMachineNode(ADDIOp, DL, VT, TFI, Imm)); return; } - // TODO: Add selection nodes needed later. + case ISD::BITCAST: { + if (VT.is128BitVector() || VT.is256BitVector()) { + ReplaceUses(SDValue(Node, 0), Node->getOperand(0)); + CurDAG->RemoveDeadNode(Node); + return; + } + break; + } + case ISD::BUILD_VECTOR: { + // Select appropriate [x]vrepli.[bhwd] instructions for constant splats of + // 128/256-bit when LSX/LASX is enabled. + BuildVectorSDNode *BVN = cast(Node); + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + unsigned Op; + EVT ViaVecTy; + bool Is128Vec = BVN->getValueType(0).is128BitVector(); + bool Is256Vec = BVN->getValueType(0).is256BitVector(); + + if (!Subtarget->hasExtLSX() || (!Is128Vec && !Is256Vec)) + break; + if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, + HasAnyUndefs, 8)) + break; + + switch (SplatBitSize) { + default: + break; + case 8: + Op = Is256Vec ? LoongArch::PseudoXVREPLI_B : LoongArch::PseudoVREPLI_B; + ViaVecTy = Is256Vec ? MVT::v32i8 : MVT::v16i8; + break; + case 16: + Op = Is256Vec ? LoongArch::PseudoXVREPLI_H : LoongArch::PseudoVREPLI_H; + ViaVecTy = Is256Vec ? MVT::v16i16 : MVT::v8i16; + break; + case 32: + Op = Is256Vec ? LoongArch::PseudoXVREPLI_W : LoongArch::PseudoVREPLI_W; + ViaVecTy = Is256Vec ? MVT::v8i32 : MVT::v4i32; + break; + case 64: + Op = Is256Vec ? LoongArch::PseudoXVREPLI_D : LoongArch::PseudoVREPLI_D; + ViaVecTy = Is256Vec ? MVT::v4i64 : MVT::v2i64; + break; + } + + SDNode *Res; + // If we have a signed 10 bit integer, we can splat it directly. + if (SplatValue.isSignedIntN(10)) { + SDValue Imm = CurDAG->getTargetConstant(SplatValue, DL, + ViaVecTy.getVectorElementType()); + Res = CurDAG->getMachineNode(Op, DL, ViaVecTy, Imm); + ReplaceNode(Node, Res); + return; + } + break; + } } // Select the default instruction. @@ -262,6 +320,96 @@ bool LoongArchDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) { return false; } +bool LoongArchDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm, + unsigned MinSizeInBits) const { + if (!Subtarget->hasExtLSX()) + return false; + + BuildVectorSDNode *Node = dyn_cast(N); + + if (!Node) + return false; + + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, + MinSizeInBits, /*IsBigEndian=*/false)) + return false; + + Imm = SplatValue; + + return true; +} + +template +bool LoongArchDAGToDAGISel::selectVSplatImm(SDValue N, SDValue &SplatVal) { + APInt ImmValue; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && + ImmValue.getBitWidth() == EltTy.getSizeInBits()) { + if (IsSigned && ImmValue.isSignedIntN(ImmBitSize)) { + SplatVal = CurDAG->getTargetConstant(ImmValue.getSExtValue(), SDLoc(N), + Subtarget->getGRLenVT()); + return true; + } + if (!IsSigned && ImmValue.isIntN(ImmBitSize)) { + SplatVal = CurDAG->getTargetConstant(ImmValue.getZExtValue(), SDLoc(N), + Subtarget->getGRLenVT()); + return true; + } + } + + return false; +} + +bool LoongArchDAGToDAGISel::selectVSplatUimmInvPow2(SDValue N, + SDValue &SplatImm) const { + APInt ImmValue; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && + ImmValue.getBitWidth() == EltTy.getSizeInBits()) { + int32_t Log2 = (~ImmValue).exactLogBase2(); + + if (Log2 != -1) { + SplatImm = CurDAG->getTargetConstant(Log2, SDLoc(N), EltTy); + return true; + } + } + + return false; +} + +bool LoongArchDAGToDAGISel::selectVSplatUimmPow2(SDValue N, + SDValue &SplatImm) const { + APInt ImmValue; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && + ImmValue.getBitWidth() == EltTy.getSizeInBits()) { + int32_t Log2 = ImmValue.exactLogBase2(); + + if (Log2 != -1) { + SplatImm = CurDAG->getTargetConstant(Log2, SDLoc(N), EltTy); + return true; + } + } + + return false; +} + // This pass converts a legalized DAG into a LoongArch-specific DAG, ready // for instruction scheduling. FunctionPass *llvm::createLoongArchISelDag(LoongArchTargetMachine &TM) { diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h index 3099407aea3ee595acedb7e4a3d55c09562446ea..5e3d6ccc3755c06f3cf5f582f0b3299b30bc8cb6 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h @@ -56,6 +56,14 @@ public: bool selectSExti32(SDValue N, SDValue &Val); bool selectZExti32(SDValue N, SDValue &Val); + bool selectVSplat(SDNode *N, APInt &Imm, unsigned MinSizeInBits) const; + + template + bool selectVSplatImm(SDValue N, SDValue &SplatVal); + + bool selectVSplatUimmInvPow2(SDValue N, SDValue &SplatImm) const; + bool selectVSplatUimmPow2(SDValue N, SDValue &SplatImm) const; + // Include the pieces autogenerated from the target description. #include "LoongArchGenDAGISel.inc" }; diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index db5961fc501a0d838413863cf9b60691d2d13a3a..f7eacd56c542f127737c2e5f8c457ebb7f35903f 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -47,53 +47,79 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, : TargetLowering(TM), Subtarget(STI) { MVT GRLenVT = Subtarget.getGRLenVT(); + // Set up the register classes. + addRegisterClass(GRLenVT, &LoongArch::GPRRegClass); if (Subtarget.hasBasicF()) addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass); if (Subtarget.hasBasicD()) addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass); + + static const MVT::SimpleValueType LSXVTs[] = { + MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64}; + static const MVT::SimpleValueType LASXVTs[] = { + MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64}; + if (Subtarget.hasExtLSX()) - for (auto VT : {MVT::v4f32, MVT::v2f64, MVT::v16i8, MVT::v8i16, MVT::v4i32, - MVT::v2i64}) + for (MVT VT : LSXVTs) addRegisterClass(VT, &LoongArch::LSX128RegClass); + if (Subtarget.hasExtLASX()) - for (auto VT : {MVT::v8f32, MVT::v4f64, MVT::v32i8, MVT::v16i16, MVT::v8i32, - MVT::v4i64}) + for (MVT VT : LASXVTs) addRegisterClass(VT, &LoongArch::LASX256RegClass); + // Set operations for LA32 and LA64. + setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT, MVT::i1, Promote); - // TODO: add necessary setOperationAction calls later. setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom); setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom); setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom); setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom); setOperationAction(ISD::ROTL, GRLenVT, Expand); setOperationAction(ISD::CTPOP, GRLenVT, Expand); - setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); - setOperationAction(ISD::TRAP, MVT::Other, Legal); - setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool, - ISD::JumpTable}, + ISD::JumpTable, ISD::GlobalTLSAddress}, GRLenVT, Custom); - setOperationAction(ISD::GlobalTLSAddress, GRLenVT, Custom); - - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); - - setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); - if (Subtarget.is64Bit()) - setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); + setOperationAction(ISD::EH_DWARF_CFA, GRLenVT, Custom); setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand); setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand); setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand); + setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); + setOperationAction(ISD::TRAP, MVT::Other, Legal); + + setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + + // Expand bitreverse.i16 with native-width bitrev and shift for now, before + // we get to know which of sll and revb.2h is faster. + setOperationAction(ISD::BITREVERSE, MVT::i8, Custom); + setOperationAction(ISD::BITREVERSE, GRLenVT, Legal); + + // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and + // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16 + // and i32 could still be byte-swapped relatively cheaply. + setOperationAction(ISD::BSWAP, MVT::i16, Custom); + + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BR_CC, GRLenVT, Expand); + setOperationAction(ISD::SELECT_CC, GRLenVT, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand); + + setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom); + setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand); + + // Set operations for LA64 only. + if (Subtarget.is64Bit()) { setOperationAction(ISD::SHL, MVT::i32, Custom); setOperationAction(ISD::SRA, MVT::i32, Custom); @@ -104,48 +130,39 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::ROTL, MVT::i32, Custom); setOperationAction(ISD::CTTZ, MVT::i32, Custom); setOperationAction(ISD::CTLZ, MVT::i32, Custom); - setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); setOperationAction(ISD::READ_REGISTER, MVT::i32, Custom); setOperationAction(ISD::WRITE_REGISTER, MVT::i32, Custom); - if (Subtarget.hasBasicF() && !Subtarget.hasBasicD()) - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); - if (Subtarget.hasBasicF()) - setOperationAction(ISD::FRINT, MVT::f32, Legal); - if (Subtarget.hasBasicD()) - setOperationAction(ISD::FRINT, MVT::f64, Legal); - } + setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); - // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and - // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16 - // and i32 could still be byte-swapped relatively cheaply. - setOperationAction(ISD::BSWAP, MVT::i16, Custom); - if (Subtarget.is64Bit()) { + setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); setOperationAction(ISD::BSWAP, MVT::i32, Custom); } - // Expand bitreverse.i16 with native-width bitrev and shift for now, before - // we get to know which of sll and revb.2h is faster. - setOperationAction(ISD::BITREVERSE, MVT::i8, Custom); - if (Subtarget.is64Bit()) { - setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); - setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); - } else { - setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); + // Set operations for LA32 only. + + if (!Subtarget.is64Bit()) { setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom); setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); + + // Set libcalls. + setLibcallName(RTLIB::MUL_I128, nullptr); } static const ISD::CondCode FPCCToExpand[] = { ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE, ISD::SETGE, ISD::SETNE, ISD::SETGT}; + // Set operations for 'F' feature. + if (Subtarget.hasBasicF()) { setCondCodeAction(FPCCToExpand, MVT::f32, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); setOperationAction(ISD::BR_CC, MVT::f32, Expand); setOperationAction(ISD::FMA, MVT::f32, Legal); @@ -158,14 +175,30 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::FPOW, MVT::f32, Expand); setOperationAction(ISD::FREM, MVT::f32, Expand); + + if (Subtarget.is64Bit()) + setOperationAction(ISD::FRINT, MVT::f32, Legal); + + if (!Subtarget.hasBasicD()) { + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); + if (Subtarget.is64Bit()) { + setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); + } + } } + + // Set operations for 'D' feature. + if (Subtarget.hasBasicD()) { + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); + setTruncStoreAction(MVT::f64, MVT::f32, Expand); setCondCodeAction(FPCCToExpand, MVT::f64, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); setOperationAction(ISD::BR_CC, MVT::f64, Expand); setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal); setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal); - setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); setOperationAction(ISD::FMA, MVT::f64, Legal); setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal); setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal); @@ -174,32 +207,135 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSINCOS, MVT::f64, Expand); setOperationAction(ISD::FPOW, MVT::f64, Expand); setOperationAction(ISD::FREM, MVT::f64, Expand); - setTruncStoreAction(MVT::f64, MVT::f32, Expand); + + if (Subtarget.is64Bit()) + setOperationAction(ISD::FRINT, MVT::f64, Legal); } - setOperationAction(ISD::BR_JT, MVT::Other, Expand); + // Set operations for 'LSX' feature. - setOperationAction(ISD::BR_CC, GRLenVT, Expand); - setOperationAction(ISD::SELECT_CC, GRLenVT, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand); - if (!Subtarget.is64Bit()) - setLibcallName(RTLIB::MUL_I128, nullptr); + if (Subtarget.hasExtLSX()) { + for (MVT VT : MVT::fixedlen_vector_valuetypes()) { + // Expand all truncating stores and extending loads. + for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { + setTruncStoreAction(VT, InnerVT, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); + } + // By default everything must be expanded. Then we will selectively turn + // on ones that can be effectively codegen'd. + for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) + setOperationAction(Op, VT, Expand); + } - setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom); - setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand); - if ((Subtarget.is64Bit() && Subtarget.hasBasicF() && - !Subtarget.hasBasicD())) { - setOperationAction(ISD::SINT_TO_FP, GRLenVT, Custom); - setOperationAction(ISD::UINT_TO_FP, GRLenVT, Custom); + for (MVT VT : LSXVTs) { + setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal); + setOperationAction(ISD::BITCAST, VT, Legal); + setOperationAction(ISD::UNDEF, VT, Legal); + + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + + setOperationAction(ISD::SETCC, VT, Legal); + setOperationAction(ISD::VSELECT, VT, Legal); + } + for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); + setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, + Legal); + setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM}, + VT, Legal); + setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); + setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); + setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); + setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal); + setCondCodeAction( + {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, + Expand); + } + for (MVT VT : {MVT::v4i32, MVT::v2i64}) { + setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal); + setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal); + } + for (MVT VT : {MVT::v4f32, MVT::v2f64}) { + setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); + setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); + setOperationAction(ISD::FMA, VT, Legal); + setOperationAction(ISD::FSQRT, VT, Legal); + setOperationAction(ISD::FNEG, VT, Legal); + setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, + ISD::SETUGE, ISD::SETUGT}, + VT, Expand); + } } + // Set operations for 'LASX' feature. + + if (Subtarget.hasExtLASX()) { + for (MVT VT : LASXVTs) { + setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal); + setOperationAction(ISD::BITCAST, VT, Legal); + setOperationAction(ISD::UNDEF, VT, Legal); + + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + + setOperationAction(ISD::SETCC, VT, Legal); + setOperationAction(ISD::VSELECT, VT, Legal); + } + for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); + setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, + Legal); + setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM}, + VT, Legal); + setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); + setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); + setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); + setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal); + setCondCodeAction( + {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, + Expand); + } + for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) { + setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal); + setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal); + } + for (MVT VT : {MVT::v8f32, MVT::v4f64}) { + setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); + setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); + setOperationAction(ISD::FMA, VT, Legal); + setOperationAction(ISD::FSQRT, VT, Legal); + setOperationAction(ISD::FNEG, VT, Legal); + setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, + ISD::SETUGE, ISD::SETUGT}, + VT, Expand); + } + } + + // Set DAG combine for LA32 and LA64. + + setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::OR); + setTargetDAGCombine(ISD::SRL); + + // Set DAG combine for 'LSX' feature. + + if (Subtarget.hasExtLSX()) + setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); + // Compute derived properties from the register classes. computeRegisterProperties(Subtarget.getRegisterInfo()); setStackPointerRegisterToSaveRestore(LoongArch::R3); setBooleanContents(ZeroOrOneBooleanContent); + setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen()); @@ -211,10 +347,6 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment()); setPrefLoopAlignment(Subtarget.getPrefLoopAlignment()); setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment()); - - setTargetDAGCombine(ISD::AND); - setTargetDAGCombine(ISD::OR); - setTargetDAGCombine(ISD::SRL); } bool LoongArchTargetLowering::isOffsetFoldingLegal( @@ -269,7 +401,139 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, return lowerRETURNADDR(Op, DAG); case ISD::WRITE_REGISTER: return lowerWRITE_REGISTER(Op, DAG); + case ISD::INSERT_VECTOR_ELT: + return lowerINSERT_VECTOR_ELT(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: + return lowerEXTRACT_VECTOR_ELT(Op, DAG); + case ISD::BUILD_VECTOR: + return lowerBUILD_VECTOR(Op, DAG); + case ISD::VECTOR_SHUFFLE: + return lowerVECTOR_SHUFFLE(Op, DAG); + } + return SDValue(); +} + +SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, + SelectionDAG &DAG) const { + // TODO: custom shuffle. + return SDValue(); +} + +static bool isConstantOrUndef(const SDValue Op) { + if (Op->isUndef()) + return true; + if (isa(Op)) + return true; + if (isa(Op)) + return true; + return false; +} + +static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { + for (unsigned i = 0; i < Op->getNumOperands(); ++i) + if (isConstantOrUndef(Op->getOperand(i))) + return true; + return false; +} + +SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + BuildVectorSDNode *Node = cast(Op); + EVT ResTy = Op->getValueType(0); + SDLoc DL(Op); + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + bool Is128Vec = ResTy.is128BitVector(); + bool Is256Vec = ResTy.is256BitVector(); + + if ((!Subtarget.hasExtLSX() || !Is128Vec) && + (!Subtarget.hasExtLASX() || !Is256Vec)) + return SDValue(); + + if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, + /*MinSplatBits=*/8) && + SplatBitSize <= 64) { + // We can only cope with 8, 16, 32, or 64-bit elements. + if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 && + SplatBitSize != 64) + return SDValue(); + + EVT ViaVecTy; + + switch (SplatBitSize) { + default: + return SDValue(); + case 8: + ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8; + break; + case 16: + ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16; + break; + case 32: + ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32; + break; + case 64: + ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64; + break; + } + + // SelectionDAG::getConstant will promote SplatValue appropriately. + SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy); + + // Bitcast to the type we originally wanted. + if (ViaVecTy != ResTy) + Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result); + + return Result; } + + if (DAG.isSplatValue(Op, /*AllowUndefs=*/false)) + return Op; + + if (!isConstantOrUndefBUILD_VECTOR(Node)) { + // Use INSERT_VECTOR_ELT operations rather than expand to stores. + // The resulting code is the same length as the expansion, but it doesn't + // use memory operations. + EVT ResTy = Node->getValueType(0); + + assert(ResTy.isVector()); + + unsigned NumElts = ResTy.getVectorNumElements(); + SDValue Vector = DAG.getUNDEF(ResTy); + for (unsigned i = 0; i < NumElts; ++i) { + Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, + Node->getOperand(i), + DAG.getConstant(i, DL, Subtarget.getGRLenVT())); + } + return Vector; + } + + return SDValue(); +} + +SDValue +LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + EVT VecTy = Op->getOperand(0)->getValueType(0); + SDValue Idx = Op->getOperand(1); + EVT EltTy = VecTy.getVectorElementType(); + unsigned NumElts = VecTy.getVectorNumElements(); + + if (isa(Idx) && + (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 || + EltTy == MVT::f64 || + cast(Idx)->getZExtValue() < NumElts / 2)) + return Op; + + return SDValue(); +} + +SDValue +LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + if (isa(Op->getOperand(2))) + return Op; return SDValue(); } @@ -652,9 +916,24 @@ LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op, return Addr; } +template +static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, + SelectionDAG &DAG, bool IsSigned = false) { + auto *CImm = cast(Op->getOperand(ImmOp)); + // Check the ImmArg. + if ((IsSigned && !isInt(CImm->getSExtValue())) || + (!IsSigned && !isUInt(CImm->getZExtValue()))) { + DAG.getContext()->emitError(Op->getOperationName(0) + + ": argument out of range."); + return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType()); + } + return SDValue(); +} + SDValue LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); switch (Op.getConstantOperandVal(0)) { default: return SDValue(); // Don't custom lower most intrinsics. @@ -662,6 +941,271 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, EVT PtrVT = getPointerTy(DAG.getDataLayout()); return DAG.getRegister(LoongArch::R2, PtrVT); } + case Intrinsic::loongarch_lsx_vpickve2gr_d: + case Intrinsic::loongarch_lsx_vpickve2gr_du: + case Intrinsic::loongarch_lsx_vreplvei_d: + case Intrinsic::loongarch_lasx_xvrepl128vei_d: + return checkIntrinsicImmArg<1>(Op, 2, DAG); + case Intrinsic::loongarch_lsx_vreplvei_w: + case Intrinsic::loongarch_lasx_xvrepl128vei_w: + case Intrinsic::loongarch_lasx_xvpickve2gr_d: + case Intrinsic::loongarch_lasx_xvpickve2gr_du: + case Intrinsic::loongarch_lasx_xvpickve_d: + case Intrinsic::loongarch_lasx_xvpickve_d_f: + return checkIntrinsicImmArg<2>(Op, 2, DAG); + case Intrinsic::loongarch_lasx_xvinsve0_d: + return checkIntrinsicImmArg<2>(Op, 3, DAG); + case Intrinsic::loongarch_lsx_vsat_b: + case Intrinsic::loongarch_lsx_vsat_bu: + case Intrinsic::loongarch_lsx_vrotri_b: + case Intrinsic::loongarch_lsx_vsllwil_h_b: + case Intrinsic::loongarch_lsx_vsllwil_hu_bu: + case Intrinsic::loongarch_lsx_vsrlri_b: + case Intrinsic::loongarch_lsx_vsrari_b: + case Intrinsic::loongarch_lsx_vreplvei_h: + case Intrinsic::loongarch_lasx_xvsat_b: + case Intrinsic::loongarch_lasx_xvsat_bu: + case Intrinsic::loongarch_lasx_xvrotri_b: + case Intrinsic::loongarch_lasx_xvsllwil_h_b: + case Intrinsic::loongarch_lasx_xvsllwil_hu_bu: + case Intrinsic::loongarch_lasx_xvsrlri_b: + case Intrinsic::loongarch_lasx_xvsrari_b: + case Intrinsic::loongarch_lasx_xvrepl128vei_h: + case Intrinsic::loongarch_lasx_xvpickve_w: + case Intrinsic::loongarch_lasx_xvpickve_w_f: + return checkIntrinsicImmArg<3>(Op, 2, DAG); + case Intrinsic::loongarch_lasx_xvinsve0_w: + return checkIntrinsicImmArg<3>(Op, 3, DAG); + case Intrinsic::loongarch_lsx_vsat_h: + case Intrinsic::loongarch_lsx_vsat_hu: + case Intrinsic::loongarch_lsx_vrotri_h: + case Intrinsic::loongarch_lsx_vsllwil_w_h: + case Intrinsic::loongarch_lsx_vsllwil_wu_hu: + case Intrinsic::loongarch_lsx_vsrlri_h: + case Intrinsic::loongarch_lsx_vsrari_h: + case Intrinsic::loongarch_lsx_vreplvei_b: + case Intrinsic::loongarch_lasx_xvsat_h: + case Intrinsic::loongarch_lasx_xvsat_hu: + case Intrinsic::loongarch_lasx_xvrotri_h: + case Intrinsic::loongarch_lasx_xvsllwil_w_h: + case Intrinsic::loongarch_lasx_xvsllwil_wu_hu: + case Intrinsic::loongarch_lasx_xvsrlri_h: + case Intrinsic::loongarch_lasx_xvsrari_h: + case Intrinsic::loongarch_lasx_xvrepl128vei_b: + return checkIntrinsicImmArg<4>(Op, 2, DAG); + case Intrinsic::loongarch_lsx_vsrlni_b_h: + case Intrinsic::loongarch_lsx_vsrani_b_h: + case Intrinsic::loongarch_lsx_vsrlrni_b_h: + case Intrinsic::loongarch_lsx_vsrarni_b_h: + case Intrinsic::loongarch_lsx_vssrlni_b_h: + case Intrinsic::loongarch_lsx_vssrani_b_h: + case Intrinsic::loongarch_lsx_vssrlni_bu_h: + case Intrinsic::loongarch_lsx_vssrani_bu_h: + case Intrinsic::loongarch_lsx_vssrlrni_b_h: + case Intrinsic::loongarch_lsx_vssrarni_b_h: + case Intrinsic::loongarch_lsx_vssrlrni_bu_h: + case Intrinsic::loongarch_lsx_vssrarni_bu_h: + case Intrinsic::loongarch_lasx_xvsrlni_b_h: + case Intrinsic::loongarch_lasx_xvsrani_b_h: + case Intrinsic::loongarch_lasx_xvsrlrni_b_h: + case Intrinsic::loongarch_lasx_xvsrarni_b_h: + case Intrinsic::loongarch_lasx_xvssrlni_b_h: + case Intrinsic::loongarch_lasx_xvssrani_b_h: + case Intrinsic::loongarch_lasx_xvssrlni_bu_h: + case Intrinsic::loongarch_lasx_xvssrani_bu_h: + case Intrinsic::loongarch_lasx_xvssrlrni_b_h: + case Intrinsic::loongarch_lasx_xvssrarni_b_h: + case Intrinsic::loongarch_lasx_xvssrlrni_bu_h: + case Intrinsic::loongarch_lasx_xvssrarni_bu_h: + return checkIntrinsicImmArg<4>(Op, 3, DAG); + case Intrinsic::loongarch_lsx_vsat_w: + case Intrinsic::loongarch_lsx_vsat_wu: + case Intrinsic::loongarch_lsx_vrotri_w: + case Intrinsic::loongarch_lsx_vsllwil_d_w: + case Intrinsic::loongarch_lsx_vsllwil_du_wu: + case Intrinsic::loongarch_lsx_vsrlri_w: + case Intrinsic::loongarch_lsx_vsrari_w: + case Intrinsic::loongarch_lsx_vslei_bu: + case Intrinsic::loongarch_lsx_vslei_hu: + case Intrinsic::loongarch_lsx_vslei_wu: + case Intrinsic::loongarch_lsx_vslei_du: + case Intrinsic::loongarch_lsx_vslti_bu: + case Intrinsic::loongarch_lsx_vslti_hu: + case Intrinsic::loongarch_lsx_vslti_wu: + case Intrinsic::loongarch_lsx_vslti_du: + case Intrinsic::loongarch_lsx_vbsll_v: + case Intrinsic::loongarch_lsx_vbsrl_v: + case Intrinsic::loongarch_lasx_xvsat_w: + case Intrinsic::loongarch_lasx_xvsat_wu: + case Intrinsic::loongarch_lasx_xvrotri_w: + case Intrinsic::loongarch_lasx_xvsllwil_d_w: + case Intrinsic::loongarch_lasx_xvsllwil_du_wu: + case Intrinsic::loongarch_lasx_xvsrlri_w: + case Intrinsic::loongarch_lasx_xvsrari_w: + case Intrinsic::loongarch_lasx_xvslei_bu: + case Intrinsic::loongarch_lasx_xvslei_hu: + case Intrinsic::loongarch_lasx_xvslei_wu: + case Intrinsic::loongarch_lasx_xvslei_du: + case Intrinsic::loongarch_lasx_xvslti_bu: + case Intrinsic::loongarch_lasx_xvslti_hu: + case Intrinsic::loongarch_lasx_xvslti_wu: + case Intrinsic::loongarch_lasx_xvslti_du: + case Intrinsic::loongarch_lasx_xvbsll_v: + case Intrinsic::loongarch_lasx_xvbsrl_v: + return checkIntrinsicImmArg<5>(Op, 2, DAG); + case Intrinsic::loongarch_lsx_vseqi_b: + case Intrinsic::loongarch_lsx_vseqi_h: + case Intrinsic::loongarch_lsx_vseqi_w: + case Intrinsic::loongarch_lsx_vseqi_d: + case Intrinsic::loongarch_lsx_vslei_b: + case Intrinsic::loongarch_lsx_vslei_h: + case Intrinsic::loongarch_lsx_vslei_w: + case Intrinsic::loongarch_lsx_vslei_d: + case Intrinsic::loongarch_lsx_vslti_b: + case Intrinsic::loongarch_lsx_vslti_h: + case Intrinsic::loongarch_lsx_vslti_w: + case Intrinsic::loongarch_lsx_vslti_d: + case Intrinsic::loongarch_lasx_xvseqi_b: + case Intrinsic::loongarch_lasx_xvseqi_h: + case Intrinsic::loongarch_lasx_xvseqi_w: + case Intrinsic::loongarch_lasx_xvseqi_d: + case Intrinsic::loongarch_lasx_xvslei_b: + case Intrinsic::loongarch_lasx_xvslei_h: + case Intrinsic::loongarch_lasx_xvslei_w: + case Intrinsic::loongarch_lasx_xvslei_d: + case Intrinsic::loongarch_lasx_xvslti_b: + case Intrinsic::loongarch_lasx_xvslti_h: + case Intrinsic::loongarch_lasx_xvslti_w: + case Intrinsic::loongarch_lasx_xvslti_d: + return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true); + case Intrinsic::loongarch_lsx_vsrlni_h_w: + case Intrinsic::loongarch_lsx_vsrani_h_w: + case Intrinsic::loongarch_lsx_vsrlrni_h_w: + case Intrinsic::loongarch_lsx_vsrarni_h_w: + case Intrinsic::loongarch_lsx_vssrlni_h_w: + case Intrinsic::loongarch_lsx_vssrani_h_w: + case Intrinsic::loongarch_lsx_vssrlni_hu_w: + case Intrinsic::loongarch_lsx_vssrani_hu_w: + case Intrinsic::loongarch_lsx_vssrlrni_h_w: + case Intrinsic::loongarch_lsx_vssrarni_h_w: + case Intrinsic::loongarch_lsx_vssrlrni_hu_w: + case Intrinsic::loongarch_lsx_vssrarni_hu_w: + case Intrinsic::loongarch_lsx_vfrstpi_b: + case Intrinsic::loongarch_lsx_vfrstpi_h: + case Intrinsic::loongarch_lasx_xvsrlni_h_w: + case Intrinsic::loongarch_lasx_xvsrani_h_w: + case Intrinsic::loongarch_lasx_xvsrlrni_h_w: + case Intrinsic::loongarch_lasx_xvsrarni_h_w: + case Intrinsic::loongarch_lasx_xvssrlni_h_w: + case Intrinsic::loongarch_lasx_xvssrani_h_w: + case Intrinsic::loongarch_lasx_xvssrlni_hu_w: + case Intrinsic::loongarch_lasx_xvssrani_hu_w: + case Intrinsic::loongarch_lasx_xvssrlrni_h_w: + case Intrinsic::loongarch_lasx_xvssrarni_h_w: + case Intrinsic::loongarch_lasx_xvssrlrni_hu_w: + case Intrinsic::loongarch_lasx_xvssrarni_hu_w: + case Intrinsic::loongarch_lasx_xvfrstpi_b: + case Intrinsic::loongarch_lasx_xvfrstpi_h: + return checkIntrinsicImmArg<5>(Op, 3, DAG); + case Intrinsic::loongarch_lsx_vsat_d: + case Intrinsic::loongarch_lsx_vsat_du: + case Intrinsic::loongarch_lsx_vrotri_d: + case Intrinsic::loongarch_lsx_vsrlri_d: + case Intrinsic::loongarch_lsx_vsrari_d: + case Intrinsic::loongarch_lasx_xvsat_d: + case Intrinsic::loongarch_lasx_xvsat_du: + case Intrinsic::loongarch_lasx_xvrotri_d: + case Intrinsic::loongarch_lasx_xvsrlri_d: + case Intrinsic::loongarch_lasx_xvsrari_d: + return checkIntrinsicImmArg<6>(Op, 2, DAG); + case Intrinsic::loongarch_lsx_vsrlni_w_d: + case Intrinsic::loongarch_lsx_vsrani_w_d: + case Intrinsic::loongarch_lsx_vsrlrni_w_d: + case Intrinsic::loongarch_lsx_vsrarni_w_d: + case Intrinsic::loongarch_lsx_vssrlni_w_d: + case Intrinsic::loongarch_lsx_vssrani_w_d: + case Intrinsic::loongarch_lsx_vssrlni_wu_d: + case Intrinsic::loongarch_lsx_vssrani_wu_d: + case Intrinsic::loongarch_lsx_vssrlrni_w_d: + case Intrinsic::loongarch_lsx_vssrarni_w_d: + case Intrinsic::loongarch_lsx_vssrlrni_wu_d: + case Intrinsic::loongarch_lsx_vssrarni_wu_d: + case Intrinsic::loongarch_lasx_xvsrlni_w_d: + case Intrinsic::loongarch_lasx_xvsrani_w_d: + case Intrinsic::loongarch_lasx_xvsrlrni_w_d: + case Intrinsic::loongarch_lasx_xvsrarni_w_d: + case Intrinsic::loongarch_lasx_xvssrlni_w_d: + case Intrinsic::loongarch_lasx_xvssrani_w_d: + case Intrinsic::loongarch_lasx_xvssrlni_wu_d: + case Intrinsic::loongarch_lasx_xvssrani_wu_d: + case Intrinsic::loongarch_lasx_xvssrlrni_w_d: + case Intrinsic::loongarch_lasx_xvssrarni_w_d: + case Intrinsic::loongarch_lasx_xvssrlrni_wu_d: + case Intrinsic::loongarch_lasx_xvssrarni_wu_d: + return checkIntrinsicImmArg<6>(Op, 3, DAG); + case Intrinsic::loongarch_lsx_vsrlni_d_q: + case Intrinsic::loongarch_lsx_vsrani_d_q: + case Intrinsic::loongarch_lsx_vsrlrni_d_q: + case Intrinsic::loongarch_lsx_vsrarni_d_q: + case Intrinsic::loongarch_lsx_vssrlni_d_q: + case Intrinsic::loongarch_lsx_vssrani_d_q: + case Intrinsic::loongarch_lsx_vssrlni_du_q: + case Intrinsic::loongarch_lsx_vssrani_du_q: + case Intrinsic::loongarch_lsx_vssrlrni_d_q: + case Intrinsic::loongarch_lsx_vssrarni_d_q: + case Intrinsic::loongarch_lsx_vssrlrni_du_q: + case Intrinsic::loongarch_lsx_vssrarni_du_q: + case Intrinsic::loongarch_lasx_xvsrlni_d_q: + case Intrinsic::loongarch_lasx_xvsrani_d_q: + case Intrinsic::loongarch_lasx_xvsrlrni_d_q: + case Intrinsic::loongarch_lasx_xvsrarni_d_q: + case Intrinsic::loongarch_lasx_xvssrlni_d_q: + case Intrinsic::loongarch_lasx_xvssrani_d_q: + case Intrinsic::loongarch_lasx_xvssrlni_du_q: + case Intrinsic::loongarch_lasx_xvssrani_du_q: + case Intrinsic::loongarch_lasx_xvssrlrni_d_q: + case Intrinsic::loongarch_lasx_xvssrarni_d_q: + case Intrinsic::loongarch_lasx_xvssrlrni_du_q: + case Intrinsic::loongarch_lasx_xvssrarni_du_q: + return checkIntrinsicImmArg<7>(Op, 3, DAG); + case Intrinsic::loongarch_lsx_vnori_b: + case Intrinsic::loongarch_lsx_vshuf4i_b: + case Intrinsic::loongarch_lsx_vshuf4i_h: + case Intrinsic::loongarch_lsx_vshuf4i_w: + case Intrinsic::loongarch_lasx_xvnori_b: + case Intrinsic::loongarch_lasx_xvshuf4i_b: + case Intrinsic::loongarch_lasx_xvshuf4i_h: + case Intrinsic::loongarch_lasx_xvshuf4i_w: + case Intrinsic::loongarch_lasx_xvpermi_d: + return checkIntrinsicImmArg<8>(Op, 2, DAG); + case Intrinsic::loongarch_lsx_vshuf4i_d: + case Intrinsic::loongarch_lsx_vpermi_w: + case Intrinsic::loongarch_lsx_vbitseli_b: + case Intrinsic::loongarch_lsx_vextrins_b: + case Intrinsic::loongarch_lsx_vextrins_h: + case Intrinsic::loongarch_lsx_vextrins_w: + case Intrinsic::loongarch_lsx_vextrins_d: + case Intrinsic::loongarch_lasx_xvshuf4i_d: + case Intrinsic::loongarch_lasx_xvpermi_w: + case Intrinsic::loongarch_lasx_xvpermi_q: + case Intrinsic::loongarch_lasx_xvbitseli_b: + case Intrinsic::loongarch_lasx_xvextrins_b: + case Intrinsic::loongarch_lasx_xvextrins_h: + case Intrinsic::loongarch_lasx_xvextrins_w: + case Intrinsic::loongarch_lasx_xvextrins_d: + return checkIntrinsicImmArg<8>(Op, 3, DAG); + case Intrinsic::loongarch_lsx_vrepli_b: + case Intrinsic::loongarch_lsx_vrepli_h: + case Intrinsic::loongarch_lsx_vrepli_w: + case Intrinsic::loongarch_lsx_vrepli_d: + case Intrinsic::loongarch_lasx_xvrepli_b: + case Intrinsic::loongarch_lasx_xvrepli_h: + case Intrinsic::loongarch_lasx_xvrepli_w: + case Intrinsic::loongarch_lasx_xvrepli_d: + return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true); + case Intrinsic::loongarch_lsx_vldi: + case Intrinsic::loongarch_lasx_xvldi: + return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true); } } @@ -757,6 +1301,34 @@ LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other}, {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); } + case Intrinsic::loongarch_lsx_vld: + case Intrinsic::loongarch_lsx_vldrepl_b: + case Intrinsic::loongarch_lasx_xvld: + case Intrinsic::loongarch_lasx_xvldrepl_b: + return !isInt<12>(cast(Op.getOperand(3))->getSExtValue()) + ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) + : SDValue(); + case Intrinsic::loongarch_lsx_vldrepl_h: + case Intrinsic::loongarch_lasx_xvldrepl_h: + return !isShiftedInt<11, 1>( + cast(Op.getOperand(3))->getSExtValue()) + ? emitIntrinsicWithChainErrorMessage( + Op, "argument out of range or not a multiple of 2", DAG) + : SDValue(); + case Intrinsic::loongarch_lsx_vldrepl_w: + case Intrinsic::loongarch_lasx_xvldrepl_w: + return !isShiftedInt<10, 2>( + cast(Op.getOperand(3))->getSExtValue()) + ? emitIntrinsicWithChainErrorMessage( + Op, "argument out of range or not a multiple of 4", DAG) + : SDValue(); + case Intrinsic::loongarch_lsx_vldrepl_d: + case Intrinsic::loongarch_lasx_xvldrepl_d: + return !isShiftedInt<9, 3>( + cast(Op.getOperand(3))->getSExtValue()) + ? emitIntrinsicWithChainErrorMessage( + Op, "argument out of range or not a multiple of 8", DAG) + : SDValue(); } } @@ -875,6 +1447,63 @@ SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) : Op; } + case Intrinsic::loongarch_lsx_vst: + case Intrinsic::loongarch_lasx_xvst: + return !isInt<12>(cast(Op.getOperand(4))->getSExtValue()) + ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) + : SDValue(); + case Intrinsic::loongarch_lasx_xvstelm_b: + return (!isInt<8>(cast(Op.getOperand(4))->getSExtValue()) || + !isUInt<5>(cast(Op.getOperand(5))->getZExtValue())) + ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) + : SDValue(); + case Intrinsic::loongarch_lsx_vstelm_b: + return (!isInt<8>(cast(Op.getOperand(4))->getSExtValue()) || + !isUInt<4>(cast(Op.getOperand(5))->getZExtValue())) + ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) + : SDValue(); + case Intrinsic::loongarch_lasx_xvstelm_h: + return (!isShiftedInt<8, 1>( + cast(Op.getOperand(4))->getSExtValue()) || + !isUInt<4>(cast(Op.getOperand(5))->getZExtValue())) + ? emitIntrinsicErrorMessage( + Op, "argument out of range or not a multiple of 2", DAG) + : SDValue(); + case Intrinsic::loongarch_lsx_vstelm_h: + return (!isShiftedInt<8, 1>( + cast(Op.getOperand(4))->getSExtValue()) || + !isUInt<3>(cast(Op.getOperand(5))->getZExtValue())) + ? emitIntrinsicErrorMessage( + Op, "argument out of range or not a multiple of 2", DAG) + : SDValue(); + case Intrinsic::loongarch_lasx_xvstelm_w: + return (!isShiftedInt<8, 2>( + cast(Op.getOperand(4))->getSExtValue()) || + !isUInt<3>(cast(Op.getOperand(5))->getZExtValue())) + ? emitIntrinsicErrorMessage( + Op, "argument out of range or not a multiple of 4", DAG) + : SDValue(); + case Intrinsic::loongarch_lsx_vstelm_w: + return (!isShiftedInt<8, 2>( + cast(Op.getOperand(4))->getSExtValue()) || + !isUInt<2>(cast(Op.getOperand(5))->getZExtValue())) + ? emitIntrinsicErrorMessage( + Op, "argument out of range or not a multiple of 4", DAG) + : SDValue(); + case Intrinsic::loongarch_lasx_xvstelm_d: + return (!isShiftedInt<8, 3>( + cast(Op.getOperand(4))->getSExtValue()) || + !isUInt<2>(cast(Op.getOperand(5))->getZExtValue())) + ? emitIntrinsicErrorMessage( + Op, "argument out of range or not a multiple of 8", DAG) + : SDValue(); + case Intrinsic::loongarch_lsx_vstelm_d: + return (!isShiftedInt<8, 3>( + cast(Op.getOperand(4))->getSExtValue()) || + !isUInt<1>(cast(Op.getOperand(5))->getZExtValue())) + ? emitIntrinsicErrorMessage( + Op, "argument out of range or not a multiple of 8", DAG) + : SDValue(); } } @@ -1026,16 +1655,122 @@ static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); } -// Helper function that emits error message for intrinsics with chain and return -// a UNDEF and the chain as the results. -static void emitErrorAndReplaceIntrinsicWithChainResults( +// Helper function that emits error message for intrinsics with/without chain +// and return a UNDEF or and the chain as the results. +static void emitErrorAndReplaceIntrinsicResults( SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG, - StringRef ErrorMsg) { + StringRef ErrorMsg, bool WithChain = true) { DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + "."); Results.push_back(DAG.getUNDEF(N->getValueType(0))); + if (!WithChain) + return; Results.push_back(N->getOperand(0)); } +template +static void +replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl &Results, + SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, + unsigned ResOp) { + const StringRef ErrorMsgOOR = "argument out of range"; + unsigned Imm = cast(Node->getOperand(2))->getZExtValue(); + if (!isUInt(Imm)) { + emitErrorAndReplaceIntrinsicResults(Node, Results, DAG, ErrorMsgOOR, + /*WithChain=*/false); + return; + } + SDLoc DL(Node); + SDValue Vec = Node->getOperand(1); + + SDValue PickElt = + DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec, + DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()), + DAG.getValueType(Vec.getValueType().getVectorElementType())); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0), + PickElt.getValue(0))); +} + +static void replaceVecCondBranchResults(SDNode *N, + SmallVectorImpl &Results, + SelectionDAG &DAG, + const LoongArchSubtarget &Subtarget, + unsigned ResOp) { + SDLoc DL(N); + SDValue Vec = N->getOperand(1); + + SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec); + Results.push_back( + DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0))); +} + +static void +replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl &Results, + SelectionDAG &DAG, + const LoongArchSubtarget &Subtarget) { + switch (N->getConstantOperandVal(0)) { + default: + llvm_unreachable("Unexpected Intrinsic."); + case Intrinsic::loongarch_lsx_vpickve2gr_b: + replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget, + LoongArchISD::VPICK_SEXT_ELT); + break; + case Intrinsic::loongarch_lsx_vpickve2gr_h: + case Intrinsic::loongarch_lasx_xvpickve2gr_w: + replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget, + LoongArchISD::VPICK_SEXT_ELT); + break; + case Intrinsic::loongarch_lsx_vpickve2gr_w: + replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget, + LoongArchISD::VPICK_SEXT_ELT); + break; + case Intrinsic::loongarch_lsx_vpickve2gr_bu: + replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget, + LoongArchISD::VPICK_ZEXT_ELT); + break; + case Intrinsic::loongarch_lsx_vpickve2gr_hu: + case Intrinsic::loongarch_lasx_xvpickve2gr_wu: + replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget, + LoongArchISD::VPICK_ZEXT_ELT); + break; + case Intrinsic::loongarch_lsx_vpickve2gr_wu: + replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget, + LoongArchISD::VPICK_ZEXT_ELT); + break; + case Intrinsic::loongarch_lsx_bz_b: + case Intrinsic::loongarch_lsx_bz_h: + case Intrinsic::loongarch_lsx_bz_w: + case Intrinsic::loongarch_lsx_bz_d: + case Intrinsic::loongarch_lasx_xbz_b: + case Intrinsic::loongarch_lasx_xbz_h: + case Intrinsic::loongarch_lasx_xbz_w: + case Intrinsic::loongarch_lasx_xbz_d: + replaceVecCondBranchResults(N, Results, DAG, Subtarget, + LoongArchISD::VALL_ZERO); + break; + case Intrinsic::loongarch_lsx_bz_v: + case Intrinsic::loongarch_lasx_xbz_v: + replaceVecCondBranchResults(N, Results, DAG, Subtarget, + LoongArchISD::VANY_ZERO); + break; + case Intrinsic::loongarch_lsx_bnz_b: + case Intrinsic::loongarch_lsx_bnz_h: + case Intrinsic::loongarch_lsx_bnz_w: + case Intrinsic::loongarch_lsx_bnz_d: + case Intrinsic::loongarch_lasx_xbnz_b: + case Intrinsic::loongarch_lasx_xbnz_h: + case Intrinsic::loongarch_lasx_xbnz_w: + case Intrinsic::loongarch_lasx_xbnz_d: + replaceVecCondBranchResults(N, Results, DAG, Subtarget, + LoongArchISD::VALL_NONZERO); + break; + case Intrinsic::loongarch_lsx_bnz_v: + case Intrinsic::loongarch_lasx_xbnz_v: + replaceVecCondBranchResults(N, Results, DAG, Subtarget, + LoongArchISD::VANY_NONZERO); + break; + } +} + void LoongArchTargetLowering::ReplaceNodeResults( SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { SDLoc DL(N); @@ -1168,14 +1903,12 @@ void LoongArchTargetLowering::ReplaceNodeResults( llvm_unreachable("Unexpected Intrinsic."); case Intrinsic::loongarch_movfcsr2gr: { if (!Subtarget.hasBasicF()) { - emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, - ErrorMsgReqF); + emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF); return; } unsigned Imm = cast(Op2)->getZExtValue(); if (!isUInt<2>(Imm)) { - emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, - ErrorMsgOOR); + emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); return; } SDValue MOVFCSR2GRResults = DAG.getNode( @@ -1211,7 +1944,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( {Chain, Op2, \ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \ - Results.push_back(NODE.getValue(1)); \ + Results.push_back(NODE.getValue(1)); \ break; \ } CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W) @@ -1220,8 +1953,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( #define CSR_CASE(ID) \ case Intrinsic::loongarch_##ID: { \ if (!Subtarget.is64Bit()) \ - emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, \ - ErrorMsgReqLA64); \ + emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \ break; \ } CSR_CASE(csrrd_d); @@ -1232,8 +1964,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( case Intrinsic::loongarch_csrrd_w: { unsigned Imm = cast(Op2)->getZExtValue(); if (!isUInt<14>(Imm)) { - emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, - ErrorMsgOOR); + emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); return; } SDValue CSRRDResults = @@ -1247,8 +1978,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( case Intrinsic::loongarch_csrwr_w: { unsigned Imm = cast(N->getOperand(3))->getZExtValue(); if (!isUInt<14>(Imm)) { - emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, - ErrorMsgOOR); + emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); return; } SDValue CSRWRResults = @@ -1263,8 +1993,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( case Intrinsic::loongarch_csrxchg_w: { unsigned Imm = cast(N->getOperand(4))->getZExtValue(); if (!isUInt<14>(Imm)) { - emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, - ErrorMsgOOR); + emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); return; } SDValue CSRXCHGResults = DAG.getNode( @@ -1302,8 +2031,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( } case Intrinsic::loongarch_lddir_d: { if (!Subtarget.is64Bit()) { - emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, - ErrorMsgReqLA64); + emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); return; } break; @@ -1322,6 +2050,10 @@ void LoongArchTargetLowering::ReplaceNodeResults( Results.push_back(N->getOperand(0)); break; } + case ISD::INTRINSIC_WO_CHAIN: { + replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget); + break; + } } } @@ -1685,6 +2417,608 @@ static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, Src.getOperand(0)); } +template +static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, + SelectionDAG &DAG, + const LoongArchSubtarget &Subtarget, + bool IsSigned = false) { + SDLoc DL(Node); + auto *CImm = cast(Node->getOperand(ImmOp)); + // Check the ImmArg. + if ((IsSigned && !isInt(CImm->getSExtValue())) || + (!IsSigned && !isUInt(CImm->getZExtValue()))) { + DAG.getContext()->emitError(Node->getOperationName(0) + + ": argument out of range."); + return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT()); + } + return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT()); +} + +template +static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, + SelectionDAG &DAG, bool IsSigned = false) { + SDLoc DL(Node); + EVT ResTy = Node->getValueType(0); + auto *CImm = cast(Node->getOperand(ImmOp)); + + // Check the ImmArg. + if ((IsSigned && !isInt(CImm->getSExtValue())) || + (!IsSigned && !isUInt(CImm->getZExtValue()))) { + DAG.getContext()->emitError(Node->getOperationName(0) + + ": argument out of range."); + return DAG.getNode(ISD::UNDEF, DL, ResTy); + } + return DAG.getConstant( + APInt(ResTy.getScalarType().getSizeInBits(), + IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned), + DL, ResTy); +} + +static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG) { + SDLoc DL(Node); + EVT ResTy = Node->getValueType(0); + SDValue Vec = Node->getOperand(2); + SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy); + return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask); +} + +static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG) { + SDLoc DL(Node); + EVT ResTy = Node->getValueType(0); + SDValue One = DAG.getConstant(1, DL, ResTy); + SDValue Bit = + DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG)); + + return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), + DAG.getNOT(DL, Bit, ResTy)); +} + +template +static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG) { + SDLoc DL(Node); + EVT ResTy = Node->getValueType(0); + auto *CImm = cast(Node->getOperand(2)); + // Check the unsigned ImmArg. + if (!isUInt(CImm->getZExtValue())) { + DAG.getContext()->emitError(Node->getOperationName(0) + + ": argument out of range."); + return DAG.getNode(ISD::UNDEF, DL, ResTy); + } + + APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); + SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy); + + return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask); +} + +template +static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG) { + SDLoc DL(Node); + EVT ResTy = Node->getValueType(0); + auto *CImm = cast(Node->getOperand(2)); + // Check the unsigned ImmArg. + if (!isUInt(CImm->getZExtValue())) { + DAG.getContext()->emitError(Node->getOperationName(0) + + ": argument out of range."); + return DAG.getNode(ISD::UNDEF, DL, ResTy); + } + + APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); + SDValue BitImm = DAG.getConstant(Imm, DL, ResTy); + return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm); +} + +template +static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) { + SDLoc DL(Node); + EVT ResTy = Node->getValueType(0); + auto *CImm = cast(Node->getOperand(2)); + // Check the unsigned ImmArg. + if (!isUInt(CImm->getZExtValue())) { + DAG.getContext()->emitError(Node->getOperationName(0) + + ": argument out of range."); + return DAG.getNode(ISD::UNDEF, DL, ResTy); + } + + APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); + SDValue BitImm = DAG.getConstant(Imm, DL, ResTy); + return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm); +} + +static SDValue +performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + SDLoc DL(N); + switch (N->getConstantOperandVal(0)) { + default: + break; + case Intrinsic::loongarch_lsx_vadd_b: + case Intrinsic::loongarch_lsx_vadd_h: + case Intrinsic::loongarch_lsx_vadd_w: + case Intrinsic::loongarch_lsx_vadd_d: + case Intrinsic::loongarch_lasx_xvadd_b: + case Intrinsic::loongarch_lasx_xvadd_h: + case Intrinsic::loongarch_lasx_xvadd_w: + case Intrinsic::loongarch_lasx_xvadd_d: + return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vaddi_bu: + case Intrinsic::loongarch_lsx_vaddi_hu: + case Intrinsic::loongarch_lsx_vaddi_wu: + case Intrinsic::loongarch_lsx_vaddi_du: + case Intrinsic::loongarch_lasx_xvaddi_bu: + case Intrinsic::loongarch_lasx_xvaddi_hu: + case Intrinsic::loongarch_lasx_xvaddi_wu: + case Intrinsic::loongarch_lasx_xvaddi_du: + return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<5>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsub_b: + case Intrinsic::loongarch_lsx_vsub_h: + case Intrinsic::loongarch_lsx_vsub_w: + case Intrinsic::loongarch_lsx_vsub_d: + case Intrinsic::loongarch_lasx_xvsub_b: + case Intrinsic::loongarch_lasx_xvsub_h: + case Intrinsic::loongarch_lasx_xvsub_w: + case Intrinsic::loongarch_lasx_xvsub_d: + return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vsubi_bu: + case Intrinsic::loongarch_lsx_vsubi_hu: + case Intrinsic::loongarch_lsx_vsubi_wu: + case Intrinsic::loongarch_lsx_vsubi_du: + case Intrinsic::loongarch_lasx_xvsubi_bu: + case Intrinsic::loongarch_lasx_xvsubi_hu: + case Intrinsic::loongarch_lasx_xvsubi_wu: + case Intrinsic::loongarch_lasx_xvsubi_du: + return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<5>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vneg_b: + case Intrinsic::loongarch_lsx_vneg_h: + case Intrinsic::loongarch_lsx_vneg_w: + case Intrinsic::loongarch_lsx_vneg_d: + case Intrinsic::loongarch_lasx_xvneg_b: + case Intrinsic::loongarch_lasx_xvneg_h: + case Intrinsic::loongarch_lasx_xvneg_w: + case Intrinsic::loongarch_lasx_xvneg_d: + return DAG.getNode( + ISD::SUB, DL, N->getValueType(0), + DAG.getConstant( + APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0, + /*isSigned=*/true), + SDLoc(N), N->getValueType(0)), + N->getOperand(1)); + case Intrinsic::loongarch_lsx_vmax_b: + case Intrinsic::loongarch_lsx_vmax_h: + case Intrinsic::loongarch_lsx_vmax_w: + case Intrinsic::loongarch_lsx_vmax_d: + case Intrinsic::loongarch_lasx_xvmax_b: + case Intrinsic::loongarch_lasx_xvmax_h: + case Intrinsic::loongarch_lasx_xvmax_w: + case Intrinsic::loongarch_lasx_xvmax_d: + return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vmax_bu: + case Intrinsic::loongarch_lsx_vmax_hu: + case Intrinsic::loongarch_lsx_vmax_wu: + case Intrinsic::loongarch_lsx_vmax_du: + case Intrinsic::loongarch_lasx_xvmax_bu: + case Intrinsic::loongarch_lasx_xvmax_hu: + case Intrinsic::loongarch_lasx_xvmax_wu: + case Intrinsic::loongarch_lasx_xvmax_du: + return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vmaxi_b: + case Intrinsic::loongarch_lsx_vmaxi_h: + case Intrinsic::loongarch_lsx_vmaxi_w: + case Intrinsic::loongarch_lsx_vmaxi_d: + case Intrinsic::loongarch_lasx_xvmaxi_b: + case Intrinsic::loongarch_lasx_xvmaxi_h: + case Intrinsic::loongarch_lasx_xvmaxi_w: + case Intrinsic::loongarch_lasx_xvmaxi_d: + return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true)); + case Intrinsic::loongarch_lsx_vmaxi_bu: + case Intrinsic::loongarch_lsx_vmaxi_hu: + case Intrinsic::loongarch_lsx_vmaxi_wu: + case Intrinsic::loongarch_lsx_vmaxi_du: + case Intrinsic::loongarch_lasx_xvmaxi_bu: + case Intrinsic::loongarch_lasx_xvmaxi_hu: + case Intrinsic::loongarch_lasx_xvmaxi_wu: + case Intrinsic::loongarch_lasx_xvmaxi_du: + return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<5>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vmin_b: + case Intrinsic::loongarch_lsx_vmin_h: + case Intrinsic::loongarch_lsx_vmin_w: + case Intrinsic::loongarch_lsx_vmin_d: + case Intrinsic::loongarch_lasx_xvmin_b: + case Intrinsic::loongarch_lasx_xvmin_h: + case Intrinsic::loongarch_lasx_xvmin_w: + case Intrinsic::loongarch_lasx_xvmin_d: + return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vmin_bu: + case Intrinsic::loongarch_lsx_vmin_hu: + case Intrinsic::loongarch_lsx_vmin_wu: + case Intrinsic::loongarch_lsx_vmin_du: + case Intrinsic::loongarch_lasx_xvmin_bu: + case Intrinsic::loongarch_lasx_xvmin_hu: + case Intrinsic::loongarch_lasx_xvmin_wu: + case Intrinsic::loongarch_lasx_xvmin_du: + return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vmini_b: + case Intrinsic::loongarch_lsx_vmini_h: + case Intrinsic::loongarch_lsx_vmini_w: + case Intrinsic::loongarch_lsx_vmini_d: + case Intrinsic::loongarch_lasx_xvmini_b: + case Intrinsic::loongarch_lasx_xvmini_h: + case Intrinsic::loongarch_lasx_xvmini_w: + case Intrinsic::loongarch_lasx_xvmini_d: + return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true)); + case Intrinsic::loongarch_lsx_vmini_bu: + case Intrinsic::loongarch_lsx_vmini_hu: + case Intrinsic::loongarch_lsx_vmini_wu: + case Intrinsic::loongarch_lsx_vmini_du: + case Intrinsic::loongarch_lasx_xvmini_bu: + case Intrinsic::loongarch_lasx_xvmini_hu: + case Intrinsic::loongarch_lasx_xvmini_wu: + case Intrinsic::loongarch_lasx_xvmini_du: + return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<5>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vmul_b: + case Intrinsic::loongarch_lsx_vmul_h: + case Intrinsic::loongarch_lsx_vmul_w: + case Intrinsic::loongarch_lsx_vmul_d: + case Intrinsic::loongarch_lasx_xvmul_b: + case Intrinsic::loongarch_lasx_xvmul_h: + case Intrinsic::loongarch_lasx_xvmul_w: + case Intrinsic::loongarch_lasx_xvmul_d: + return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vmadd_b: + case Intrinsic::loongarch_lsx_vmadd_h: + case Intrinsic::loongarch_lsx_vmadd_w: + case Intrinsic::loongarch_lsx_vmadd_d: + case Intrinsic::loongarch_lasx_xvmadd_b: + case Intrinsic::loongarch_lasx_xvmadd_h: + case Intrinsic::loongarch_lasx_xvmadd_w: + case Intrinsic::loongarch_lasx_xvmadd_d: { + EVT ResTy = N->getValueType(0); + return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1), + DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2), + N->getOperand(3))); + } + case Intrinsic::loongarch_lsx_vmsub_b: + case Intrinsic::loongarch_lsx_vmsub_h: + case Intrinsic::loongarch_lsx_vmsub_w: + case Intrinsic::loongarch_lsx_vmsub_d: + case Intrinsic::loongarch_lasx_xvmsub_b: + case Intrinsic::loongarch_lasx_xvmsub_h: + case Intrinsic::loongarch_lasx_xvmsub_w: + case Intrinsic::loongarch_lasx_xvmsub_d: { + EVT ResTy = N->getValueType(0); + return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1), + DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2), + N->getOperand(3))); + } + case Intrinsic::loongarch_lsx_vdiv_b: + case Intrinsic::loongarch_lsx_vdiv_h: + case Intrinsic::loongarch_lsx_vdiv_w: + case Intrinsic::loongarch_lsx_vdiv_d: + case Intrinsic::loongarch_lasx_xvdiv_b: + case Intrinsic::loongarch_lasx_xvdiv_h: + case Intrinsic::loongarch_lasx_xvdiv_w: + case Intrinsic::loongarch_lasx_xvdiv_d: + return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vdiv_bu: + case Intrinsic::loongarch_lsx_vdiv_hu: + case Intrinsic::loongarch_lsx_vdiv_wu: + case Intrinsic::loongarch_lsx_vdiv_du: + case Intrinsic::loongarch_lasx_xvdiv_bu: + case Intrinsic::loongarch_lasx_xvdiv_hu: + case Intrinsic::loongarch_lasx_xvdiv_wu: + case Intrinsic::loongarch_lasx_xvdiv_du: + return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vmod_b: + case Intrinsic::loongarch_lsx_vmod_h: + case Intrinsic::loongarch_lsx_vmod_w: + case Intrinsic::loongarch_lsx_vmod_d: + case Intrinsic::loongarch_lasx_xvmod_b: + case Intrinsic::loongarch_lasx_xvmod_h: + case Intrinsic::loongarch_lasx_xvmod_w: + case Intrinsic::loongarch_lasx_xvmod_d: + return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vmod_bu: + case Intrinsic::loongarch_lsx_vmod_hu: + case Intrinsic::loongarch_lsx_vmod_wu: + case Intrinsic::loongarch_lsx_vmod_du: + case Intrinsic::loongarch_lasx_xvmod_bu: + case Intrinsic::loongarch_lasx_xvmod_hu: + case Intrinsic::loongarch_lasx_xvmod_wu: + case Intrinsic::loongarch_lasx_xvmod_du: + return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vand_v: + case Intrinsic::loongarch_lasx_xvand_v: + return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vor_v: + case Intrinsic::loongarch_lasx_xvor_v: + return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vxor_v: + case Intrinsic::loongarch_lasx_xvxor_v: + return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vnor_v: + case Intrinsic::loongarch_lasx_xvnor_v: { + SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + return DAG.getNOT(DL, Res, Res->getValueType(0)); + } + case Intrinsic::loongarch_lsx_vandi_b: + case Intrinsic::loongarch_lasx_xvandi_b: + return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<8>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vori_b: + case Intrinsic::loongarch_lasx_xvori_b: + return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<8>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vxori_b: + case Intrinsic::loongarch_lasx_xvxori_b: + return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<8>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsll_b: + case Intrinsic::loongarch_lsx_vsll_h: + case Intrinsic::loongarch_lsx_vsll_w: + case Intrinsic::loongarch_lsx_vsll_d: + case Intrinsic::loongarch_lasx_xvsll_b: + case Intrinsic::loongarch_lasx_xvsll_h: + case Intrinsic::loongarch_lasx_xvsll_w: + case Intrinsic::loongarch_lasx_xvsll_d: + return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), + truncateVecElts(N, DAG)); + case Intrinsic::loongarch_lsx_vslli_b: + case Intrinsic::loongarch_lasx_xvslli_b: + return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<3>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vslli_h: + case Intrinsic::loongarch_lasx_xvslli_h: + return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<4>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vslli_w: + case Intrinsic::loongarch_lasx_xvslli_w: + return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<5>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vslli_d: + case Intrinsic::loongarch_lasx_xvslli_d: + return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<6>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsrl_b: + case Intrinsic::loongarch_lsx_vsrl_h: + case Intrinsic::loongarch_lsx_vsrl_w: + case Intrinsic::loongarch_lsx_vsrl_d: + case Intrinsic::loongarch_lasx_xvsrl_b: + case Intrinsic::loongarch_lasx_xvsrl_h: + case Intrinsic::loongarch_lasx_xvsrl_w: + case Intrinsic::loongarch_lasx_xvsrl_d: + return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), + truncateVecElts(N, DAG)); + case Intrinsic::loongarch_lsx_vsrli_b: + case Intrinsic::loongarch_lasx_xvsrli_b: + return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<3>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsrli_h: + case Intrinsic::loongarch_lasx_xvsrli_h: + return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<4>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsrli_w: + case Intrinsic::loongarch_lasx_xvsrli_w: + return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<5>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsrli_d: + case Intrinsic::loongarch_lasx_xvsrli_d: + return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<6>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsra_b: + case Intrinsic::loongarch_lsx_vsra_h: + case Intrinsic::loongarch_lsx_vsra_w: + case Intrinsic::loongarch_lsx_vsra_d: + case Intrinsic::loongarch_lasx_xvsra_b: + case Intrinsic::loongarch_lasx_xvsra_h: + case Intrinsic::loongarch_lasx_xvsra_w: + case Intrinsic::loongarch_lasx_xvsra_d: + return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), + truncateVecElts(N, DAG)); + case Intrinsic::loongarch_lsx_vsrai_b: + case Intrinsic::loongarch_lasx_xvsrai_b: + return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<3>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsrai_h: + case Intrinsic::loongarch_lasx_xvsrai_h: + return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<4>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsrai_w: + case Intrinsic::loongarch_lasx_xvsrai_w: + return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<5>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsrai_d: + case Intrinsic::loongarch_lasx_xvsrai_d: + return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<6>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vclz_b: + case Intrinsic::loongarch_lsx_vclz_h: + case Intrinsic::loongarch_lsx_vclz_w: + case Intrinsic::loongarch_lsx_vclz_d: + case Intrinsic::loongarch_lasx_xvclz_b: + case Intrinsic::loongarch_lasx_xvclz_h: + case Intrinsic::loongarch_lasx_xvclz_w: + case Intrinsic::loongarch_lasx_xvclz_d: + return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1)); + case Intrinsic::loongarch_lsx_vpcnt_b: + case Intrinsic::loongarch_lsx_vpcnt_h: + case Intrinsic::loongarch_lsx_vpcnt_w: + case Intrinsic::loongarch_lsx_vpcnt_d: + case Intrinsic::loongarch_lasx_xvpcnt_b: + case Intrinsic::loongarch_lasx_xvpcnt_h: + case Intrinsic::loongarch_lasx_xvpcnt_w: + case Intrinsic::loongarch_lasx_xvpcnt_d: + return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1)); + case Intrinsic::loongarch_lsx_vbitclr_b: + case Intrinsic::loongarch_lsx_vbitclr_h: + case Intrinsic::loongarch_lsx_vbitclr_w: + case Intrinsic::loongarch_lsx_vbitclr_d: + case Intrinsic::loongarch_lasx_xvbitclr_b: + case Intrinsic::loongarch_lasx_xvbitclr_h: + case Intrinsic::loongarch_lasx_xvbitclr_w: + case Intrinsic::loongarch_lasx_xvbitclr_d: + return lowerVectorBitClear(N, DAG); + case Intrinsic::loongarch_lsx_vbitclri_b: + case Intrinsic::loongarch_lasx_xvbitclri_b: + return lowerVectorBitClearImm<3>(N, DAG); + case Intrinsic::loongarch_lsx_vbitclri_h: + case Intrinsic::loongarch_lasx_xvbitclri_h: + return lowerVectorBitClearImm<4>(N, DAG); + case Intrinsic::loongarch_lsx_vbitclri_w: + case Intrinsic::loongarch_lasx_xvbitclri_w: + return lowerVectorBitClearImm<5>(N, DAG); + case Intrinsic::loongarch_lsx_vbitclri_d: + case Intrinsic::loongarch_lasx_xvbitclri_d: + return lowerVectorBitClearImm<6>(N, DAG); + case Intrinsic::loongarch_lsx_vbitset_b: + case Intrinsic::loongarch_lsx_vbitset_h: + case Intrinsic::loongarch_lsx_vbitset_w: + case Intrinsic::loongarch_lsx_vbitset_d: + case Intrinsic::loongarch_lasx_xvbitset_b: + case Intrinsic::loongarch_lasx_xvbitset_h: + case Intrinsic::loongarch_lasx_xvbitset_w: + case Intrinsic::loongarch_lasx_xvbitset_d: { + EVT VecTy = N->getValueType(0); + SDValue One = DAG.getConstant(1, DL, VecTy); + return DAG.getNode( + ISD::OR, DL, VecTy, N->getOperand(1), + DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG))); + } + case Intrinsic::loongarch_lsx_vbitseti_b: + case Intrinsic::loongarch_lasx_xvbitseti_b: + return lowerVectorBitSetImm<3>(N, DAG); + case Intrinsic::loongarch_lsx_vbitseti_h: + case Intrinsic::loongarch_lasx_xvbitseti_h: + return lowerVectorBitSetImm<4>(N, DAG); + case Intrinsic::loongarch_lsx_vbitseti_w: + case Intrinsic::loongarch_lasx_xvbitseti_w: + return lowerVectorBitSetImm<5>(N, DAG); + case Intrinsic::loongarch_lsx_vbitseti_d: + case Intrinsic::loongarch_lasx_xvbitseti_d: + return lowerVectorBitSetImm<6>(N, DAG); + case Intrinsic::loongarch_lsx_vbitrev_b: + case Intrinsic::loongarch_lsx_vbitrev_h: + case Intrinsic::loongarch_lsx_vbitrev_w: + case Intrinsic::loongarch_lsx_vbitrev_d: + case Intrinsic::loongarch_lasx_xvbitrev_b: + case Intrinsic::loongarch_lasx_xvbitrev_h: + case Intrinsic::loongarch_lasx_xvbitrev_w: + case Intrinsic::loongarch_lasx_xvbitrev_d: { + EVT VecTy = N->getValueType(0); + SDValue One = DAG.getConstant(1, DL, VecTy); + return DAG.getNode( + ISD::XOR, DL, VecTy, N->getOperand(1), + DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG))); + } + case Intrinsic::loongarch_lsx_vbitrevi_b: + case Intrinsic::loongarch_lasx_xvbitrevi_b: + return lowerVectorBitRevImm<3>(N, DAG); + case Intrinsic::loongarch_lsx_vbitrevi_h: + case Intrinsic::loongarch_lasx_xvbitrevi_h: + return lowerVectorBitRevImm<4>(N, DAG); + case Intrinsic::loongarch_lsx_vbitrevi_w: + case Intrinsic::loongarch_lasx_xvbitrevi_w: + return lowerVectorBitRevImm<5>(N, DAG); + case Intrinsic::loongarch_lsx_vbitrevi_d: + case Intrinsic::loongarch_lasx_xvbitrevi_d: + return lowerVectorBitRevImm<6>(N, DAG); + case Intrinsic::loongarch_lsx_vfadd_s: + case Intrinsic::loongarch_lsx_vfadd_d: + case Intrinsic::loongarch_lasx_xvfadd_s: + case Intrinsic::loongarch_lasx_xvfadd_d: + return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vfsub_s: + case Intrinsic::loongarch_lsx_vfsub_d: + case Intrinsic::loongarch_lasx_xvfsub_s: + case Intrinsic::loongarch_lasx_xvfsub_d: + return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vfmul_s: + case Intrinsic::loongarch_lsx_vfmul_d: + case Intrinsic::loongarch_lasx_xvfmul_s: + case Intrinsic::loongarch_lasx_xvfmul_d: + return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vfdiv_s: + case Intrinsic::loongarch_lsx_vfdiv_d: + case Intrinsic::loongarch_lasx_xvfdiv_s: + case Intrinsic::loongarch_lasx_xvfdiv_d: + return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vfmadd_s: + case Intrinsic::loongarch_lsx_vfmadd_d: + case Intrinsic::loongarch_lasx_xvfmadd_s: + case Intrinsic::loongarch_lasx_xvfmadd_d: + return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2), N->getOperand(3)); + case Intrinsic::loongarch_lsx_vinsgr2vr_b: + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), + legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget)); + case Intrinsic::loongarch_lsx_vinsgr2vr_h: + case Intrinsic::loongarch_lasx_xvinsgr2vr_w: + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), + legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget)); + case Intrinsic::loongarch_lsx_vinsgr2vr_w: + case Intrinsic::loongarch_lasx_xvinsgr2vr_d: + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), + legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget)); + case Intrinsic::loongarch_lsx_vinsgr2vr_d: + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), + legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget)); + case Intrinsic::loongarch_lsx_vreplgr2vr_b: + case Intrinsic::loongarch_lsx_vreplgr2vr_h: + case Intrinsic::loongarch_lsx_vreplgr2vr_w: + case Intrinsic::loongarch_lsx_vreplgr2vr_d: + case Intrinsic::loongarch_lasx_xvreplgr2vr_b: + case Intrinsic::loongarch_lasx_xvreplgr2vr_h: + case Intrinsic::loongarch_lasx_xvreplgr2vr_w: + case Intrinsic::loongarch_lasx_xvreplgr2vr_d: { + EVT ResTy = N->getValueType(0); + SmallVector Ops(ResTy.getVectorNumElements(), N->getOperand(1)); + return DAG.getBuildVector(ResTy, DL, Ops); + } + case Intrinsic::loongarch_lsx_vreplve_b: + case Intrinsic::loongarch_lsx_vreplve_h: + case Intrinsic::loongarch_lsx_vreplve_w: + case Intrinsic::loongarch_lsx_vreplve_d: + case Intrinsic::loongarch_lasx_xvreplve_b: + case Intrinsic::loongarch_lasx_xvreplve_h: + case Intrinsic::loongarch_lasx_xvreplve_w: + case Intrinsic::loongarch_lasx_xvreplve_d: + return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0), + N->getOperand(1), + DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(), + N->getOperand(2))); + } + return SDValue(); +} + SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -1699,6 +3033,8 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, return performSRLCombine(N, DAG, DCI, Subtarget); case LoongArchISD::BITREV_W: return performBITREV_WCombine(N, DAG, DCI, Subtarget); + case ISD::INTRINSIC_WO_CHAIN: + return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget); } return SDValue(); } @@ -1752,6 +3088,196 @@ static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI, return SinkMBB; } +static MachineBasicBlock * +emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, + const LoongArchSubtarget &Subtarget) { + unsigned CondOpc; + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unexpected opcode"); + case LoongArch::PseudoVBZ: + CondOpc = LoongArch::VSETEQZ_V; + break; + case LoongArch::PseudoVBZ_B: + CondOpc = LoongArch::VSETANYEQZ_B; + break; + case LoongArch::PseudoVBZ_H: + CondOpc = LoongArch::VSETANYEQZ_H; + break; + case LoongArch::PseudoVBZ_W: + CondOpc = LoongArch::VSETANYEQZ_W; + break; + case LoongArch::PseudoVBZ_D: + CondOpc = LoongArch::VSETANYEQZ_D; + break; + case LoongArch::PseudoVBNZ: + CondOpc = LoongArch::VSETNEZ_V; + break; + case LoongArch::PseudoVBNZ_B: + CondOpc = LoongArch::VSETALLNEZ_B; + break; + case LoongArch::PseudoVBNZ_H: + CondOpc = LoongArch::VSETALLNEZ_H; + break; + case LoongArch::PseudoVBNZ_W: + CondOpc = LoongArch::VSETALLNEZ_W; + break; + case LoongArch::PseudoVBNZ_D: + CondOpc = LoongArch::VSETALLNEZ_D; + break; + case LoongArch::PseudoXVBZ: + CondOpc = LoongArch::XVSETEQZ_V; + break; + case LoongArch::PseudoXVBZ_B: + CondOpc = LoongArch::XVSETANYEQZ_B; + break; + case LoongArch::PseudoXVBZ_H: + CondOpc = LoongArch::XVSETANYEQZ_H; + break; + case LoongArch::PseudoXVBZ_W: + CondOpc = LoongArch::XVSETANYEQZ_W; + break; + case LoongArch::PseudoXVBZ_D: + CondOpc = LoongArch::XVSETANYEQZ_D; + break; + case LoongArch::PseudoXVBNZ: + CondOpc = LoongArch::XVSETNEZ_V; + break; + case LoongArch::PseudoXVBNZ_B: + CondOpc = LoongArch::XVSETALLNEZ_B; + break; + case LoongArch::PseudoXVBNZ_H: + CondOpc = LoongArch::XVSETALLNEZ_H; + break; + case LoongArch::PseudoXVBNZ_W: + CondOpc = LoongArch::XVSETALLNEZ_W; + break; + case LoongArch::PseudoXVBNZ_D: + CondOpc = LoongArch::XVSETALLNEZ_D; + break; + } + + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + DebugLoc DL = MI.getDebugLoc(); + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + MachineFunction::iterator It = ++BB->getIterator(); + + MachineFunction *F = BB->getParent(); + MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB); + + F->insert(It, FalseBB); + F->insert(It, TrueBB); + F->insert(It, SinkBB); + + // Transfer the remainder of MBB and its successor edges to Sink. + SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end()); + SinkBB->transferSuccessorsAndUpdatePHIs(BB); + + // Insert the real instruction to BB. + Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass); + BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg()); + + // Insert branch. + BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB); + BB->addSuccessor(FalseBB); + BB->addSuccessor(TrueBB); + + // FalseBB. + Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass); + BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1) + .addReg(LoongArch::R0) + .addImm(0); + BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB); + FalseBB->addSuccessor(SinkBB); + + // TrueBB. + Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass); + BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2) + .addReg(LoongArch::R0) + .addImm(1); + TrueBB->addSuccessor(SinkBB); + + // SinkBB: merge the results. + BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI), + MI.getOperand(0).getReg()) + .addReg(RD1) + .addMBB(FalseBB) + .addReg(RD2) + .addMBB(TrueBB); + + // The pseudo instruction is gone now. + MI.eraseFromParent(); + return SinkBB; +} + +static MachineBasicBlock * +emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, + const LoongArchSubtarget &Subtarget) { + unsigned InsOp; + unsigned HalfSize; + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unexpected opcode"); + case LoongArch::PseudoXVINSGR2VR_B: + HalfSize = 16; + InsOp = LoongArch::VINSGR2VR_B; + break; + case LoongArch::PseudoXVINSGR2VR_H: + HalfSize = 8; + InsOp = LoongArch::VINSGR2VR_H; + break; + } + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + const TargetRegisterClass *RC = &LoongArch::LASX256RegClass; + const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass; + DebugLoc DL = MI.getDebugLoc(); + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + // XDst = vector_insert XSrc, Elt, Idx + Register XDst = MI.getOperand(0).getReg(); + Register XSrc = MI.getOperand(1).getReg(); + Register Elt = MI.getOperand(2).getReg(); + unsigned Idx = MI.getOperand(3).getImm(); + + Register ScratchReg1 = XSrc; + if (Idx >= HalfSize) { + ScratchReg1 = MRI.createVirtualRegister(RC); + BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1) + .addReg(XSrc) + .addReg(XSrc) + .addImm(1); + } + + Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC); + Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC); + BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1) + .addReg(ScratchReg1, 0, LoongArch::sub_128); + BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2) + .addReg(ScratchSubReg1) + .addReg(Elt) + .addImm(Idx >= HalfSize ? Idx - HalfSize : Idx); + + Register ScratchReg2 = XDst; + if (Idx >= HalfSize) + ScratchReg2 = MRI.createVirtualRegister(RC); + + BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2) + .addImm(0) + .addReg(ScratchSubReg2) + .addImm(LoongArch::sub_128); + + if (Idx >= HalfSize) + BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst) + .addReg(XSrc) + .addReg(ScratchReg2) + .addImm(2); + + MI.eraseFromParent(); + return BB; +} + MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( MachineInstr &MI, MachineBasicBlock *BB) const { const TargetInstrInfo *TII = Subtarget.getInstrInfo(); @@ -1786,6 +3312,30 @@ MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( MI.eraseFromParent(); return BB; } + case LoongArch::PseudoVBZ: + case LoongArch::PseudoVBZ_B: + case LoongArch::PseudoVBZ_H: + case LoongArch::PseudoVBZ_W: + case LoongArch::PseudoVBZ_D: + case LoongArch::PseudoVBNZ: + case LoongArch::PseudoVBNZ_B: + case LoongArch::PseudoVBNZ_H: + case LoongArch::PseudoVBNZ_W: + case LoongArch::PseudoVBNZ_D: + case LoongArch::PseudoXVBZ: + case LoongArch::PseudoXVBZ_B: + case LoongArch::PseudoXVBZ_H: + case LoongArch::PseudoXVBZ_W: + case LoongArch::PseudoXVBZ_D: + case LoongArch::PseudoXVBNZ: + case LoongArch::PseudoXVBNZ_B: + case LoongArch::PseudoXVBNZ_H: + case LoongArch::PseudoXVBNZ_W: + case LoongArch::PseudoXVBNZ_D: + return emitVecCondBranchPseudo(MI, BB, Subtarget); + case LoongArch::PseudoXVINSGR2VR_B: + case LoongArch::PseudoXVINSGR2VR_H: + return emitPseudoXVINSGR2VR(MI, BB, Subtarget); } } @@ -1858,6 +3408,13 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(MOVFCSR2GR) NODE_NAME_CASE(CACOP_D) NODE_NAME_CASE(CACOP_W) + NODE_NAME_CASE(VPICK_SEXT_ELT) + NODE_NAME_CASE(VPICK_ZEXT_ELT) + NODE_NAME_CASE(VREPLVE) + NODE_NAME_CASE(VALL_ZERO) + NODE_NAME_CASE(VANY_ZERO) + NODE_NAME_CASE(VALL_NONZERO) + NODE_NAME_CASE(VANY_NONZERO) } #undef NODE_NAME_CASE return nullptr; @@ -1884,6 +3441,14 @@ const MCPhysReg ArgFPR64s[] = { LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64, LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64}; +const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2, + LoongArch::VR3, LoongArch::VR4, LoongArch::VR5, + LoongArch::VR6, LoongArch::VR7}; + +const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2, + LoongArch::XR3, LoongArch::XR4, LoongArch::XR5, + LoongArch::XR6, LoongArch::XR7}; + // Pass a 2*GRLen argument that has been split into two GRLen values through // registers or the stack as necessary. static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, @@ -2030,6 +3595,10 @@ static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, Reg = State.AllocateReg(ArgFPR32s); else if (ValVT == MVT::f64 && !UseGPRForFloat) Reg = State.AllocateReg(ArgFPR64s); + else if (ValVT.is128BitVector()) + Reg = State.AllocateReg(ArgVRs); + else if (ValVT.is256BitVector()) + Reg = State.AllocateReg(ArgXRs); else Reg = State.AllocateReg(ArgGPRs); diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 500407493fe5a113ffc985e71f47ed5483993c44..6b5a851ec55d01425b37965351b10b58bf2e84fc 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -110,6 +110,20 @@ enum NodeType : unsigned { // Read CPU configuration information operation CPUCFG, + + // Vector Shuffle + VREPLVE, + + // Extended vector element extraction + VPICK_SEXT_ELT, + VPICK_ZEXT_ELT, + + // Vector comparisons + VALL_ZERO, + VANY_ZERO, + VALL_NONZERO, + VANY_NONZERO, + // Intrinsic operations end ============================================= }; } // end namespace LoongArchISD @@ -216,6 +230,10 @@ public: MachineMemOperand::Flags Flags = MachineMemOperand::MONone, unsigned *Fast = nullptr) const override; + bool isShuffleMaskLegal(ArrayRef Mask, EVT VT) const override { + return false; + } + private: /// Target-specific function used to lower LoongArch calling conventions. typedef bool LoongArchCCAssignFn(const DataLayout &DL, LoongArchABI::ABI ABI, @@ -261,6 +279,10 @@ private: SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp index ef79b8a0dcd39dfcecf4cf5a22bc5a74115a7311..6576100d3b3218672239316ca7f63e7f01d731b9 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp @@ -47,6 +47,22 @@ void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } + // VR->VR copies. + if (LoongArch::LSX128RegClass.contains(DstReg, SrcReg)) { + BuildMI(MBB, MBBI, DL, get(LoongArch::VORI_B), DstReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addImm(0); + return; + } + + // XR->XR copies. + if (LoongArch::LASX256RegClass.contains(DstReg, SrcReg)) { + BuildMI(MBB, MBBI, DL, get(LoongArch::XVORI_B), DstReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addImm(0); + return; + } + // GPR->CFR copy. if (LoongArch::CFRRegClass.contains(DstReg) && LoongArch::GPRRegClass.contains(SrcReg)) { @@ -74,6 +90,14 @@ void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = LoongArch::FMOV_S; } else if (LoongArch::FPR64RegClass.contains(DstReg, SrcReg)) { Opc = LoongArch::FMOV_D; + } else if (LoongArch::GPRRegClass.contains(DstReg) && + LoongArch::FPR32RegClass.contains(SrcReg)) { + // FPR32 -> GPR copies + Opc = LoongArch::MOVFR2GR_S; + } else if (LoongArch::GPRRegClass.contains(DstReg) && + LoongArch::FPR64RegClass.contains(SrcReg)) { + // FPR64 -> GPR copies + Opc = LoongArch::MOVFR2GR_D; } else { // TODO: support other copies. llvm_unreachable("Impossible reg-to-reg copy"); @@ -99,6 +123,10 @@ void LoongArchInstrInfo::storeRegToStackSlot( Opcode = LoongArch::FST_S; else if (LoongArch::FPR64RegClass.hasSubClassEq(RC)) Opcode = LoongArch::FST_D; + else if (LoongArch::LSX128RegClass.hasSubClassEq(RC)) + Opcode = LoongArch::VST; + else if (LoongArch::LASX256RegClass.hasSubClassEq(RC)) + Opcode = LoongArch::XVST; else if (LoongArch::CFRRegClass.hasSubClassEq(RC)) Opcode = LoongArch::PseudoST_CFR; else @@ -133,6 +161,10 @@ void LoongArchInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, Opcode = LoongArch::FLD_S; else if (LoongArch::FPR64RegClass.hasSubClassEq(RC)) Opcode = LoongArch::FLD_D; + else if (LoongArch::LSX128RegClass.hasSubClassEq(RC)) + Opcode = LoongArch::VLD; + else if (LoongArch::LASX256RegClass.hasSubClassEq(RC)) + Opcode = LoongArch::XVLD; else if (LoongArch::CFRRegClass.hasSubClassEq(RC)) Opcode = LoongArch::PseudoLD_CFR; else diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td index ac391ef471b1aed4c2101afcebd27c2ebf2c149b..b2c4bb812ba5e9e22634ab7dbe33570d84a810db 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -182,7 +182,7 @@ def imm32 : Operand { let ParserMatchClass = ImmAsmOperand<"", 32, "">; } -def uimm1 : Operand { +def uimm1 : Operand, ImmLeaf(Imm);}]>{ let ParserMatchClass = UImmAsmOperand<1>; } @@ -197,11 +197,11 @@ def uimm2_plus1 : Operand, let DecoderMethod = "decodeUImmOperand<2, 1>"; } -def uimm3 : Operand { +def uimm3 : Operand, ImmLeaf(Imm);}]> { let ParserMatchClass = UImmAsmOperand<3>; } -def uimm4 : Operand { +def uimm4 : Operand, ImmLeaf(Imm);}]> { let ParserMatchClass = UImmAsmOperand<4>; } diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index dc37b37b2186e8978cb39c96fadc4acaf4ead58e..492b62da6ce7806dff589e3adc944a7397fddd83 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -10,6 +10,37 @@ // //===----------------------------------------------------------------------===// +def lasxsplati8 + : PatFrag<(ops node:$e0), + (v32i8 (build_vector node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0))>; +def lasxsplati16 + : PatFrag<(ops node:$e0), + (v16i16 (build_vector node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0))>; +def lasxsplati32 + : PatFrag<(ops node:$e0), + (v8i32 (build_vector node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0))>; +def lasxsplati64 + : PatFrag<(ops node:$e0), + (v4i64 (build_vector node:$e0, node:$e0, node:$e0, node:$e0))>; +def lasxsplatf32 + : PatFrag<(ops node:$e0), + (v8f32 (build_vector node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0))>; +def lasxsplatf64 + : PatFrag<(ops node:$e0), + (v4f64 (build_vector node:$e0, node:$e0, node:$e0, node:$e0))>; + //===----------------------------------------------------------------------===// // Instruction class templates //===----------------------------------------------------------------------===// @@ -1029,4 +1060,929 @@ def PseudoXVREPLI_D : Pseudo<(outs LASX256:$xd), (ins simm10:$imm), [], "xvrepli.d", "$xd, $imm">; } +def PseudoXVBNZ_B : VecCond; +def PseudoXVBNZ_H : VecCond; +def PseudoXVBNZ_W : VecCond; +def PseudoXVBNZ_D : VecCond; +def PseudoXVBNZ : VecCond; + +def PseudoXVBZ_B : VecCond; +def PseudoXVBZ_H : VecCond; +def PseudoXVBZ_W : VecCond; +def PseudoXVBZ_D : VecCond; +def PseudoXVBZ : VecCond; + +let usesCustomInserter = 1, Constraints = "$xd = $dst" in { +def PseudoXVINSGR2VR_B + : Pseudo<(outs LASX256:$dst), (ins LASX256:$xd, GPR:$rj, uimm5:$imm)>; +def PseudoXVINSGR2VR_H + : Pseudo<(outs LASX256:$dst), (ins LASX256:$xd, GPR:$rj, uimm4:$imm)>; +} // usesCustomInserter = 1, Constraints = "$xd = $dst" + +} // Predicates = [HasExtLASX] + +multiclass PatXr { + def : Pat<(v32i8 (OpNode (v32i8 LASX256:$xj))), + (!cast(Inst#"_B") LASX256:$xj)>; + def : Pat<(v16i16 (OpNode (v16i16 LASX256:$xj))), + (!cast(Inst#"_H") LASX256:$xj)>; + def : Pat<(v8i32 (OpNode (v8i32 LASX256:$xj))), + (!cast(Inst#"_W") LASX256:$xj)>; + def : Pat<(v4i64 (OpNode (v4i64 LASX256:$xj))), + (!cast(Inst#"_D") LASX256:$xj)>; +} + +multiclass PatXrF { + def : Pat<(v8f32 (OpNode (v8f32 LASX256:$xj))), + (!cast(Inst#"_S") LASX256:$xj)>; + def : Pat<(v4f64 (OpNode (v4f64 LASX256:$xj))), + (!cast(Inst#"_D") LASX256:$xj)>; +} + +multiclass PatXrXr { + def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), + (!cast(Inst#"_B") LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)), + (!cast(Inst#"_H") LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)), + (!cast(Inst#"_W") LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)), + (!cast(Inst#"_D") LASX256:$xj, LASX256:$xk)>; +} + +multiclass PatXrXrF { + def : Pat<(OpNode (v8f32 LASX256:$xj), (v8f32 LASX256:$xk)), + (!cast(Inst#"_S") LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v4f64 LASX256:$xj), (v4f64 LASX256:$xk)), + (!cast(Inst#"_D") LASX256:$xj, LASX256:$xk)>; +} + +multiclass PatXrXrU { + def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), + (!cast(Inst#"_BU") LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)), + (!cast(Inst#"_HU") LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)), + (!cast(Inst#"_WU") LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)), + (!cast(Inst#"_DU") LASX256:$xj, LASX256:$xk)>; +} + +multiclass PatXrSimm5 { + def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 (SplatPat_simm5 simm5:$imm))), + (!cast(Inst#"_B") LASX256:$xj, simm5:$imm)>; + def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 (SplatPat_simm5 simm5:$imm))), + (!cast(Inst#"_H") LASX256:$xj, simm5:$imm)>; + def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 (SplatPat_simm5 simm5:$imm))), + (!cast(Inst#"_W") LASX256:$xj, simm5:$imm)>; + def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 (SplatPat_simm5 simm5:$imm))), + (!cast(Inst#"_D") LASX256:$xj, simm5:$imm)>; +} + +multiclass PatXrUimm5 { + def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm5 uimm5:$imm))), + (!cast(Inst#"_BU") LASX256:$xj, uimm5:$imm)>; + def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 (SplatPat_uimm5 uimm5:$imm))), + (!cast(Inst#"_HU") LASX256:$xj, uimm5:$imm)>; + def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 (SplatPat_uimm5 uimm5:$imm))), + (!cast(Inst#"_WU") LASX256:$xj, uimm5:$imm)>; + def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 (SplatPat_uimm5 uimm5:$imm))), + (!cast(Inst#"_DU") LASX256:$xj, uimm5:$imm)>; +} + +multiclass PatXrXrXr { + def : Pat<(OpNode (v32i8 LASX256:$xd), (v32i8 LASX256:$xj), + (v32i8 LASX256:$xk)), + (!cast(Inst#"_B") LASX256:$xd, LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v16i16 LASX256:$xd), (v16i16 LASX256:$xj), + (v16i16 LASX256:$xk)), + (!cast(Inst#"_H") LASX256:$xd, LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v8i32 LASX256:$xd), (v8i32 LASX256:$xj), + (v8i32 LASX256:$xk)), + (!cast(Inst#"_W") LASX256:$xd, LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v4i64 LASX256:$xd), (v4i64 LASX256:$xj), + (v4i64 LASX256:$xk)), + (!cast(Inst#"_D") LASX256:$xd, LASX256:$xj, LASX256:$xk)>; +} + +multiclass PatShiftXrXr { + def : Pat<(OpNode (v32i8 LASX256:$xj), (and vsplati8_imm_eq_7, + (v32i8 LASX256:$xk))), + (!cast(Inst#"_B") LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v16i16 LASX256:$xj), (and vsplati16_imm_eq_15, + (v16i16 LASX256:$xk))), + (!cast(Inst#"_H") LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v8i32 LASX256:$xj), (and vsplati32_imm_eq_31, + (v8i32 LASX256:$xk))), + (!cast(Inst#"_W") LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v4i64 LASX256:$xj), (and vsplati64_imm_eq_63, + (v4i64 LASX256:$xk))), + (!cast(Inst#"_D") LASX256:$xj, LASX256:$xk)>; +} + +multiclass PatShiftXrUimm { + def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm3 uimm3:$imm))), + (!cast(Inst#"_B") LASX256:$xj, uimm3:$imm)>; + def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 (SplatPat_uimm4 uimm4:$imm))), + (!cast(Inst#"_H") LASX256:$xj, uimm4:$imm)>; + def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 (SplatPat_uimm5 uimm5:$imm))), + (!cast(Inst#"_W") LASX256:$xj, uimm5:$imm)>; + def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 (SplatPat_uimm6 uimm6:$imm))), + (!cast(Inst#"_D") LASX256:$xj, uimm6:$imm)>; +} + +multiclass PatCCXrSimm5 { + def : Pat<(v32i8 (setcc (v32i8 LASX256:$xj), + (v32i8 (SplatPat_simm5 simm5:$imm)), CC)), + (!cast(Inst#"_B") LASX256:$xj, simm5:$imm)>; + def : Pat<(v16i16 (setcc (v16i16 LASX256:$xj), + (v16i16 (SplatPat_simm5 simm5:$imm)), CC)), + (!cast(Inst#"_H") LASX256:$xj, simm5:$imm)>; + def : Pat<(v8i32 (setcc (v8i32 LASX256:$xj), + (v8i32 (SplatPat_simm5 simm5:$imm)), CC)), + (!cast(Inst#"_W") LASX256:$xj, simm5:$imm)>; + def : Pat<(v4i64 (setcc (v4i64 LASX256:$xj), + (v4i64 (SplatPat_simm5 simm5:$imm)), CC)), + (!cast(Inst#"_D") LASX256:$xj, simm5:$imm)>; +} + +multiclass PatCCXrUimm5 { + def : Pat<(v32i8 (setcc (v32i8 LASX256:$xj), + (v32i8 (SplatPat_uimm5 uimm5:$imm)), CC)), + (!cast(Inst#"_BU") LASX256:$xj, uimm5:$imm)>; + def : Pat<(v16i16 (setcc (v16i16 LASX256:$xj), + (v16i16 (SplatPat_uimm5 uimm5:$imm)), CC)), + (!cast(Inst#"_HU") LASX256:$xj, uimm5:$imm)>; + def : Pat<(v8i32 (setcc (v8i32 LASX256:$xj), + (v8i32 (SplatPat_uimm5 uimm5:$imm)), CC)), + (!cast(Inst#"_WU") LASX256:$xj, uimm5:$imm)>; + def : Pat<(v4i64 (setcc (v4i64 LASX256:$xj), + (v4i64 (SplatPat_uimm5 uimm5:$imm)), CC)), + (!cast(Inst#"_DU") LASX256:$xj, uimm5:$imm)>; +} + +multiclass PatCCXrXr { + def : Pat<(v32i8 (setcc (v32i8 LASX256:$xj), (v32i8 LASX256:$xk), CC)), + (!cast(Inst#"_B") LASX256:$xj, LASX256:$xk)>; + def : Pat<(v16i16 (setcc (v16i16 LASX256:$xj), (v16i16 LASX256:$xk), CC)), + (!cast(Inst#"_H") LASX256:$xj, LASX256:$xk)>; + def : Pat<(v8i32 (setcc (v8i32 LASX256:$xj), (v8i32 LASX256:$xk), CC)), + (!cast(Inst#"_W") LASX256:$xj, LASX256:$xk)>; + def : Pat<(v4i64 (setcc (v4i64 LASX256:$xj), (v4i64 LASX256:$xk), CC)), + (!cast(Inst#"_D") LASX256:$xj, LASX256:$xk)>; +} + +multiclass PatCCXrXrU { + def : Pat<(v32i8 (setcc (v32i8 LASX256:$xj), (v32i8 LASX256:$xk), CC)), + (!cast(Inst#"_BU") LASX256:$xj, LASX256:$xk)>; + def : Pat<(v16i16 (setcc (v16i16 LASX256:$xj), (v16i16 LASX256:$xk), CC)), + (!cast(Inst#"_HU") LASX256:$xj, LASX256:$xk)>; + def : Pat<(v8i32 (setcc (v8i32 LASX256:$xj), (v8i32 LASX256:$xk), CC)), + (!cast(Inst#"_WU") LASX256:$xj, LASX256:$xk)>; + def : Pat<(v4i64 (setcc (v4i64 LASX256:$xj), (v4i64 LASX256:$xk), CC)), + (!cast(Inst#"_DU") LASX256:$xj, LASX256:$xk)>; +} + +multiclass PatCCXrXrF { + def : Pat<(v8i32 (setcc (v8f32 LASX256:$xj), (v8f32 LASX256:$xk), CC)), + (!cast(Inst#"_S") LASX256:$xj, LASX256:$xk)>; + def : Pat<(v4i64 (setcc (v4f64 LASX256:$xj), (v4f64 LASX256:$xk), CC)), + (!cast(Inst#"_D") LASX256:$xj, LASX256:$xk)>; +} + +let Predicates = [HasExtLASX] in { + +// XVADD_{B/H/W/D} +defm : PatXrXr; +// XVSUB_{B/H/W/D} +defm : PatXrXr; + +// XVADDI_{B/H/W/D}U +defm : PatXrUimm5; +// XVSUBI_{B/H/W/D}U +defm : PatXrUimm5; + +// XVNEG_{B/H/W/D} +def : Pat<(sub immAllZerosV, (v32i8 LASX256:$xj)), (XVNEG_B LASX256:$xj)>; +def : Pat<(sub immAllZerosV, (v16i16 LASX256:$xj)), (XVNEG_H LASX256:$xj)>; +def : Pat<(sub immAllZerosV, (v8i32 LASX256:$xj)), (XVNEG_W LASX256:$xj)>; +def : Pat<(sub immAllZerosV, (v4i64 LASX256:$xj)), (XVNEG_D LASX256:$xj)>; + +// XVMAX[I]_{B/H/W/D}[U] +defm : PatXrXr; +defm : PatXrXrU; +defm : PatXrSimm5; +defm : PatXrUimm5; + +// XVMIN[I]_{B/H/W/D}[U] +defm : PatXrXr; +defm : PatXrXrU; +defm : PatXrSimm5; +defm : PatXrUimm5; + +// XVMUL_{B/H/W/D} +defm : PatXrXr; + +// XVMUH_{B/H/W/D}[U] +defm : PatXrXr; +defm : PatXrXrU; + +// XVMADD_{B/H/W/D} +defm : PatXrXrXr; +// XVMSUB_{B/H/W/D} +defm : PatXrXrXr; + +// XVDIV_{B/H/W/D}[U] +defm : PatXrXr; +defm : PatXrXrU; + +// XVMOD_{B/H/W/D}[U] +defm : PatXrXr; +defm : PatXrXrU; + +// XVAND_V +foreach vt = [v32i8, v16i16, v8i32, v4i64] in +def : Pat<(and (vt LASX256:$xj), (vt LASX256:$xk)), + (XVAND_V LASX256:$xj, LASX256:$xk)>; +// XVOR_V +foreach vt = [v32i8, v16i16, v8i32, v4i64] in +def : Pat<(or (vt LASX256:$xj), (vt LASX256:$xk)), + (XVOR_V LASX256:$xj, LASX256:$xk)>; +// XVXOR_V +foreach vt = [v32i8, v16i16, v8i32, v4i64] in +def : Pat<(xor (vt LASX256:$xj), (vt LASX256:$xk)), + (XVXOR_V LASX256:$xj, LASX256:$xk)>; +// XVNOR_V +foreach vt = [v32i8, v16i16, v8i32, v4i64] in +def : Pat<(vnot (or (vt LASX256:$xj), (vt LASX256:$xk))), + (XVNOR_V LASX256:$xj, LASX256:$xk)>; + +// XVANDI_B +def : Pat<(and (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm8 uimm8:$imm))), + (XVANDI_B LASX256:$xj, uimm8:$imm)>; +// XVORI_B +def : Pat<(or (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm8 uimm8:$imm))), + (XVORI_B LASX256:$xj, uimm8:$imm)>; + +// XVXORI_B +def : Pat<(xor (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm8 uimm8:$imm))), + (XVXORI_B LASX256:$xj, uimm8:$imm)>; + +// XVSLL[I]_{B/H/W/D} +defm : PatXrXr; +defm : PatShiftXrXr; +defm : PatShiftXrUimm; + +// XVSRL[I]_{B/H/W/D} +defm : PatXrXr; +defm : PatShiftXrXr; +defm : PatShiftXrUimm; + +// XVSRA[I]_{B/H/W/D} +defm : PatXrXr; +defm : PatShiftXrXr; +defm : PatShiftXrUimm; + +// XVCLZ_{B/H/W/D} +defm : PatXr; + +// XVPCNT_{B/H/W/D} +defm : PatXr; + +// XVBITCLR_{B/H/W/D} +def : Pat<(and v32i8:$xj, (vnot (shl vsplat_imm_eq_1, v32i8:$xk))), + (v32i8 (XVBITCLR_B v32i8:$xj, v32i8:$xk))>; +def : Pat<(and v16i16:$xj, (vnot (shl vsplat_imm_eq_1, v16i16:$xk))), + (v16i16 (XVBITCLR_H v16i16:$xj, v16i16:$xk))>; +def : Pat<(and v8i32:$xj, (vnot (shl vsplat_imm_eq_1, v8i32:$xk))), + (v8i32 (XVBITCLR_W v8i32:$xj, v8i32:$xk))>; +def : Pat<(and v4i64:$xj, (vnot (shl vsplat_imm_eq_1, v4i64:$xk))), + (v4i64 (XVBITCLR_D v4i64:$xj, v4i64:$xk))>; +def : Pat<(and v32i8:$xj, (vnot (shl vsplat_imm_eq_1, + (vsplati8imm7 v32i8:$xk)))), + (v32i8 (XVBITCLR_B v32i8:$xj, v32i8:$xk))>; +def : Pat<(and v16i16:$xj, (vnot (shl vsplat_imm_eq_1, + (vsplati16imm15 v16i16:$xk)))), + (v16i16 (XVBITCLR_H v16i16:$xj, v16i16:$xk))>; +def : Pat<(and v8i32:$xj, (vnot (shl vsplat_imm_eq_1, + (vsplati32imm31 v8i32:$xk)))), + (v8i32 (XVBITCLR_W v8i32:$xj, v8i32:$xk))>; +def : Pat<(and v4i64:$xj, (vnot (shl vsplat_imm_eq_1, + (vsplati64imm63 v4i64:$xk)))), + (v4i64 (XVBITCLR_D v4i64:$xj, v4i64:$xk))>; + +// XVBITCLRI_{B/H/W/D} +def : Pat<(and (v32i8 LASX256:$xj), (v32i8 (vsplat_uimm_inv_pow2 uimm3:$imm))), + (XVBITCLRI_B LASX256:$xj, uimm3:$imm)>; +def : Pat<(and (v16i16 LASX256:$xj), (v16i16 (vsplat_uimm_inv_pow2 uimm4:$imm))), + (XVBITCLRI_H LASX256:$xj, uimm4:$imm)>; +def : Pat<(and (v8i32 LASX256:$xj), (v8i32 (vsplat_uimm_inv_pow2 uimm5:$imm))), + (XVBITCLRI_W LASX256:$xj, uimm5:$imm)>; +def : Pat<(and (v4i64 LASX256:$xj), (v4i64 (vsplat_uimm_inv_pow2 uimm6:$imm))), + (XVBITCLRI_D LASX256:$xj, uimm6:$imm)>; + +// XVBITSET_{B/H/W/D} +def : Pat<(or v32i8:$xj, (shl vsplat_imm_eq_1, v32i8:$xk)), + (v32i8 (XVBITSET_B v32i8:$xj, v32i8:$xk))>; +def : Pat<(or v16i16:$xj, (shl vsplat_imm_eq_1, v16i16:$xk)), + (v16i16 (XVBITSET_H v16i16:$xj, v16i16:$xk))>; +def : Pat<(or v8i32:$xj, (shl vsplat_imm_eq_1, v8i32:$xk)), + (v8i32 (XVBITSET_W v8i32:$xj, v8i32:$xk))>; +def : Pat<(or v4i64:$xj, (shl vsplat_imm_eq_1, v4i64:$xk)), + (v4i64 (XVBITSET_D v4i64:$xj, v4i64:$xk))>; +def : Pat<(or v32i8:$xj, (shl vsplat_imm_eq_1, (vsplati8imm7 v32i8:$xk))), + (v32i8 (XVBITSET_B v32i8:$xj, v32i8:$xk))>; +def : Pat<(or v16i16:$xj, (shl vsplat_imm_eq_1, (vsplati16imm15 v16i16:$xk))), + (v16i16 (XVBITSET_H v16i16:$xj, v16i16:$xk))>; +def : Pat<(or v8i32:$xj, (shl vsplat_imm_eq_1, (vsplati32imm31 v8i32:$xk))), + (v8i32 (XVBITSET_W v8i32:$xj, v8i32:$xk))>; +def : Pat<(or v4i64:$xj, (shl vsplat_imm_eq_1, (vsplati64imm63 v4i64:$xk))), + (v4i64 (XVBITSET_D v4i64:$xj, v4i64:$xk))>; + +// XVBITSETI_{B/H/W/D} +def : Pat<(or (v32i8 LASX256:$xj), (v32i8 (vsplat_uimm_pow2 uimm3:$imm))), + (XVBITSETI_B LASX256:$xj, uimm3:$imm)>; +def : Pat<(or (v16i16 LASX256:$xj), (v16i16 (vsplat_uimm_pow2 uimm4:$imm))), + (XVBITSETI_H LASX256:$xj, uimm4:$imm)>; +def : Pat<(or (v8i32 LASX256:$xj), (v8i32 (vsplat_uimm_pow2 uimm5:$imm))), + (XVBITSETI_W LASX256:$xj, uimm5:$imm)>; +def : Pat<(or (v4i64 LASX256:$xj), (v4i64 (vsplat_uimm_pow2 uimm6:$imm))), + (XVBITSETI_D LASX256:$xj, uimm6:$imm)>; + +// XVBITREV_{B/H/W/D} +def : Pat<(xor v32i8:$xj, (shl vsplat_imm_eq_1, v32i8:$xk)), + (v32i8 (XVBITREV_B v32i8:$xj, v32i8:$xk))>; +def : Pat<(xor v16i16:$xj, (shl vsplat_imm_eq_1, v16i16:$xk)), + (v16i16 (XVBITREV_H v16i16:$xj, v16i16:$xk))>; +def : Pat<(xor v8i32:$xj, (shl vsplat_imm_eq_1, v8i32:$xk)), + (v8i32 (XVBITREV_W v8i32:$xj, v8i32:$xk))>; +def : Pat<(xor v4i64:$xj, (shl vsplat_imm_eq_1, v4i64:$xk)), + (v4i64 (XVBITREV_D v4i64:$xj, v4i64:$xk))>; +def : Pat<(xor v32i8:$xj, (shl vsplat_imm_eq_1, (vsplati8imm7 v32i8:$xk))), + (v32i8 (XVBITREV_B v32i8:$xj, v32i8:$xk))>; +def : Pat<(xor v16i16:$xj, (shl vsplat_imm_eq_1, (vsplati16imm15 v16i16:$xk))), + (v16i16 (XVBITREV_H v16i16:$xj, v16i16:$xk))>; +def : Pat<(xor v8i32:$xj, (shl vsplat_imm_eq_1, (vsplati32imm31 v8i32:$xk))), + (v8i32 (XVBITREV_W v8i32:$xj, v8i32:$xk))>; +def : Pat<(xor v4i64:$xj, (shl vsplat_imm_eq_1, (vsplati64imm63 v4i64:$xk))), + (v4i64 (XVBITREV_D v4i64:$xj, v4i64:$xk))>; + +// XVBITREVI_{B/H/W/D} +def : Pat<(xor (v32i8 LASX256:$xj), (v32i8 (vsplat_uimm_pow2 uimm3:$imm))), + (XVBITREVI_B LASX256:$xj, uimm3:$imm)>; +def : Pat<(xor (v16i16 LASX256:$xj), (v16i16 (vsplat_uimm_pow2 uimm4:$imm))), + (XVBITREVI_H LASX256:$xj, uimm4:$imm)>; +def : Pat<(xor (v8i32 LASX256:$xj), (v8i32 (vsplat_uimm_pow2 uimm5:$imm))), + (XVBITREVI_W LASX256:$xj, uimm5:$imm)>; +def : Pat<(xor (v4i64 LASX256:$xj), (v4i64 (vsplat_uimm_pow2 uimm6:$imm))), + (XVBITREVI_D LASX256:$xj, uimm6:$imm)>; + +// XVFADD_{S/D} +defm : PatXrXrF; + +// XVFSUB_{S/D} +defm : PatXrXrF; + +// XVFMUL_{S/D} +defm : PatXrXrF; + +// XVFDIV_{S/D} +defm : PatXrXrF; + +// XVFMADD_{S/D} +def : Pat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa), + (XVFMADD_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; +def : Pat<(fma v4f64:$xj, v4f64:$xk, v4f64:$xa), + (XVFMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; + +// XVFMSUB_{S/D} +def : Pat<(fma v8f32:$xj, v8f32:$xk, (fneg v8f32:$xa)), + (XVFMSUB_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; +def : Pat<(fma v4f64:$xj, v4f64:$xk, (fneg v4f64:$xa)), + (XVFMSUB_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; + +// XVFNMADD_{S/D} +def : Pat<(fneg (fma v8f32:$xj, v8f32:$xk, v8f32:$xa)), + (XVFNMADD_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; +def : Pat<(fneg (fma v4f64:$xj, v4f64:$xk, v4f64:$xa)), + (XVFNMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; +def : Pat<(fma_nsz (fneg v8f32:$xj), v8f32:$xk, (fneg v8f32:$xa)), + (XVFNMADD_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; +def : Pat<(fma_nsz (fneg v4f64:$xj), v4f64:$xk, (fneg v4f64:$xa)), + (XVFNMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; + +// XVFNMSUB_{S/D} +def : Pat<(fneg (fma v8f32:$xj, v8f32:$xk, (fneg v8f32:$xa))), + (XVFNMSUB_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; +def : Pat<(fneg (fma v4f64:$xj, v4f64:$xk, (fneg v4f64:$xa))), + (XVFNMSUB_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; +def : Pat<(fma_nsz (fneg v8f32:$xj), v8f32:$xk, v8f32:$xa), + (XVFNMSUB_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; +def : Pat<(fma_nsz (fneg v4f64:$xj), v4f64:$xk, v4f64:$xa), + (XVFNMSUB_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; + +// XVFSQRT_{S/D} +defm : PatXrF; + +// XVRECIP_{S/D} +def : Pat<(fdiv vsplatf32_fpimm_eq_1, v8f32:$xj), + (XVFRECIP_S v8f32:$xj)>; +def : Pat<(fdiv vsplatf64_fpimm_eq_1, v4f64:$xj), + (XVFRECIP_D v4f64:$xj)>; + +// XVFRSQRT_{S/D} +def : Pat<(fdiv vsplatf32_fpimm_eq_1, (fsqrt v8f32:$xj)), + (XVFRSQRT_S v8f32:$xj)>; +def : Pat<(fdiv vsplatf64_fpimm_eq_1, (fsqrt v4f64:$xj)), + (XVFRSQRT_D v4f64:$xj)>; + +// XVSEQ[I]_{B/H/W/D} +defm : PatCCXrSimm5; +defm : PatCCXrXr; + +// XVSLE[I]_{B/H/W/D}[U] +defm : PatCCXrSimm5; +defm : PatCCXrUimm5; +defm : PatCCXrXr; +defm : PatCCXrXrU; + +// XVSLT[I]_{B/H/W/D}[U] +defm : PatCCXrSimm5; +defm : PatCCXrUimm5; +defm : PatCCXrXr; +defm : PatCCXrXrU; + +// XVFCMP.cond.{S/D} +defm : PatCCXrXrF; +defm : PatCCXrXrF; +defm : PatCCXrXrF; + +defm : PatCCXrXrF; +defm : PatCCXrXrF; +defm : PatCCXrXrF; + +defm : PatCCXrXrF; +defm : PatCCXrXrF; +defm : PatCCXrXrF; + +defm : PatCCXrXrF; +defm : PatCCXrXrF; +defm : PatCCXrXrF; + +defm : PatCCXrXrF; +defm : PatCCXrXrF; + +// PseudoXVINSGR2VR_{B/H} +def : Pat<(vector_insert v32i8:$xd, GRLenVT:$rj, uimm5:$imm), + (PseudoXVINSGR2VR_B v32i8:$xd, GRLenVT:$rj, uimm5:$imm)>; +def : Pat<(vector_insert v16i16:$xd, GRLenVT:$rj, uimm4:$imm), + (PseudoXVINSGR2VR_H v16i16:$xd, GRLenVT:$rj, uimm4:$imm)>; + +// XVINSGR2VR_{W/D} +def : Pat<(vector_insert v8i32:$xd, GRLenVT:$rj, uimm3:$imm), + (XVINSGR2VR_W v8i32:$xd, GRLenVT:$rj, uimm3:$imm)>; +def : Pat<(vector_insert v4i64:$xd, GRLenVT:$rj, uimm2:$imm), + (XVINSGR2VR_D v4i64:$xd, GRLenVT:$rj, uimm2:$imm)>; + +def : Pat<(vector_insert v8f32:$vd, FPR32:$fj, uimm3:$imm), + (XVINSGR2VR_W $vd, (COPY_TO_REGCLASS FPR32:$fj, GPR), uimm3:$imm)>; +def : Pat<(vector_insert v4f64:$vd, FPR64:$fj, uimm2:$imm), + (XVINSGR2VR_D $vd, (COPY_TO_REGCLASS FPR64:$fj, GPR), uimm2:$imm)>; + +// XVPICKVE2GR_W[U] +def : Pat<(loongarch_vpick_sext_elt v8i32:$xd, uimm3:$imm, i32), + (XVPICKVE2GR_W v8i32:$xd, uimm3:$imm)>; +def : Pat<(loongarch_vpick_zext_elt v8i32:$xd, uimm3:$imm, i32), + (XVPICKVE2GR_WU v8i32:$xd, uimm3:$imm)>; + +// XVREPLGR2VR_{B/H/W/D} +def : Pat<(lasxsplati8 GPR:$rj), (XVREPLGR2VR_B GPR:$rj)>; +def : Pat<(lasxsplati16 GPR:$rj), (XVREPLGR2VR_H GPR:$rj)>; +def : Pat<(lasxsplati32 GPR:$rj), (XVREPLGR2VR_W GPR:$rj)>; +def : Pat<(lasxsplati64 GPR:$rj), (XVREPLGR2VR_D GPR:$rj)>; + +// XVREPLVE_{B/H/W/D} +def : Pat<(loongarch_vreplve v32i8:$xj, GRLenVT:$rk), + (XVREPLVE_B v32i8:$xj, GRLenVT:$rk)>; +def : Pat<(loongarch_vreplve v16i16:$xj, GRLenVT:$rk), + (XVREPLVE_H v16i16:$xj, GRLenVT:$rk)>; +def : Pat<(loongarch_vreplve v8i32:$xj, GRLenVT:$rk), + (XVREPLVE_W v8i32:$xj, GRLenVT:$rk)>; +def : Pat<(loongarch_vreplve v4i64:$xj, GRLenVT:$rk), + (XVREPLVE_D v4i64:$xj, GRLenVT:$rk)>; + +// XVREPLVE0_{W/D} +def : Pat<(lasxsplatf32 FPR32:$fj), + (XVREPLVE0_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32))>; +def : Pat<(lasxsplatf64 FPR64:$fj), + (XVREPLVE0_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64))>; + +// Loads/Stores +foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in { + defm : LdPat; + def : RegRegLdPat; + defm : StPat; + def : RegRegStPat; +} + +// Vector extraction with constant index. +def : Pat<(i64 (vector_extract v32i8:$xj, uimm4:$imm)), + (VPICKVE2GR_B (EXTRACT_SUBREG v32i8:$xj, sub_128), uimm4:$imm)>; +def : Pat<(i64 (vector_extract v16i16:$xj, uimm3:$imm)), + (VPICKVE2GR_H (EXTRACT_SUBREG v16i16:$xj, sub_128), uimm3:$imm)>; +def : Pat<(i64 (vector_extract v8i32:$xj, uimm3:$imm)), + (XVPICKVE2GR_W v8i32:$xj, uimm3:$imm)>; +def : Pat<(i64 (vector_extract v4i64:$xj, uimm2:$imm)), + (XVPICKVE2GR_D v4i64:$xj, uimm2:$imm)>; +def : Pat<(f32 (vector_extract v8f32:$xj, uimm3:$imm)), + (MOVGR2FR_W (XVPICKVE2GR_W v8f32:$xj, uimm3:$imm))>; +def : Pat<(f64 (vector_extract v4f64:$xj, uimm2:$imm)), + (MOVGR2FR_D (XVPICKVE2GR_D v4f64:$xj, uimm2:$imm))>; + +// vselect +def : Pat<(v32i8 (vselect LASX256:$xd, (v32i8 (SplatPat_uimm8 uimm8:$imm)), + LASX256:$xj)), + (XVBITSELI_B LASX256:$xd, LASX256:$xj, uimm8:$imm)>; +foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in + def : Pat<(vt (vselect LASX256:$xa, LASX256:$xk, LASX256:$xj)), + (XVBITSEL_V LASX256:$xj, LASX256:$xk, LASX256:$xa)>; + +// fneg +def : Pat<(fneg (v8f32 LASX256:$xj)), (XVBITREVI_W LASX256:$xj, 31)>; +def : Pat<(fneg (v4f64 LASX256:$xj)), (XVBITREVI_D LASX256:$xj, 63)>; + +// XVFFINT_{S_W/D_L} +def : Pat<(v8f32 (sint_to_fp v8i32:$vj)), (XVFFINT_S_W v8i32:$vj)>; +def : Pat<(v4f64 (sint_to_fp v4i64:$vj)), (XVFFINT_D_L v4i64:$vj)>; +def : Pat<(v4f64 (sint_to_fp v4i32:$vj)), + (XVFFINT_D_L (VEXT2XV_D_W (SUBREG_TO_REG (i64 0), v4i32:$vj, + sub_128)))>; +def : Pat<(v4f32 (sint_to_fp v4i64:$vj)), + (EXTRACT_SUBREG (XVFCVT_S_D (XVPERMI_D (XVFFINT_D_L v4i64:$vj), 238), + (XVFFINT_D_L v4i64:$vj)), + sub_128)>; + +// XVFFINT_{S_WU/D_LU} +def : Pat<(v8f32 (uint_to_fp v8i32:$vj)), (XVFFINT_S_WU v8i32:$vj)>; +def : Pat<(v4f64 (uint_to_fp v4i64:$vj)), (XVFFINT_D_LU v4i64:$vj)>; +def : Pat<(v4f64 (uint_to_fp v4i32:$vj)), + (XVFFINT_D_LU (VEXT2XV_DU_WU (SUBREG_TO_REG (i64 0), v4i32:$vj, + sub_128)))>; +def : Pat<(v4f32 (uint_to_fp v4i64:$vj)), + (EXTRACT_SUBREG (XVFCVT_S_D (XVPERMI_D (XVFFINT_D_LU v4i64:$vj), 238), + (XVFFINT_D_LU v4i64:$vj)), + sub_128)>; + +// XVFTINTRZ_{W_S/L_D} +def : Pat<(v8i32 (fp_to_sint v8f32:$vj)), (XVFTINTRZ_W_S v8f32:$vj)>; +def : Pat<(v4i64 (fp_to_sint v4f64:$vj)), (XVFTINTRZ_L_D v4f64:$vj)>; +def : Pat<(v4i64 (fp_to_sint v4f32:$vj)), + (VEXT2XV_D_W (SUBREG_TO_REG (i64 0), (VFTINTRZ_W_S v4f32:$vj), + sub_128))>; +def : Pat<(v4i32 (fp_to_sint (v4f64 LASX256:$vj))), + (EXTRACT_SUBREG (XVFTINTRZ_W_S (XVFCVT_S_D (XVPERMI_D v4f64:$vj, 238), + v4f64:$vj)), + sub_128)>; + +// XVFTINTRZ_{W_SU/L_DU} +def : Pat<(v8i32 (fp_to_uint v8f32:$vj)), (XVFTINTRZ_WU_S v8f32:$vj)>; +def : Pat<(v4i64 (fp_to_uint v4f64:$vj)), (XVFTINTRZ_LU_D v4f64:$vj)>; +def : Pat<(v4i64 (fp_to_uint v4f32:$vj)), + (VEXT2XV_DU_WU (SUBREG_TO_REG (i64 0), (VFTINTRZ_WU_S v4f32:$vj), + sub_128))>; +def : Pat<(v4i32 (fp_to_uint (v4f64 LASX256:$vj))), + (EXTRACT_SUBREG (XVFTINTRZ_W_S (XVFCVT_S_D (XVPERMI_D v4f64:$vj, 238), + v4f64:$vj)), + sub_128)>; + +} // Predicates = [HasExtLASX] + +/// Intrinsic pattern + +class deriveLASXIntrinsic { + Intrinsic ret = !cast(!tolower("int_loongarch_lasx_"#Inst)); +} + +let Predicates = [HasExtLASX] in { + +// vty: v32i8/v16i16/v8i32/v4i64 +// Pat<(Intrinsic vty:$xj, vty:$xk), +// (LAInst vty:$xj, vty:$xk)>; +foreach Inst = ["XVSADD_B", "XVSADD_BU", "XVSSUB_B", "XVSSUB_BU", + "XVHADDW_H_B", "XVHADDW_HU_BU", "XVHSUBW_H_B", "XVHSUBW_HU_BU", + "XVADDWEV_H_B", "XVADDWOD_H_B", "XVSUBWEV_H_B", "XVSUBWOD_H_B", + "XVADDWEV_H_BU", "XVADDWOD_H_BU", "XVSUBWEV_H_BU", "XVSUBWOD_H_BU", + "XVADDWEV_H_BU_B", "XVADDWOD_H_BU_B", + "XVAVG_B", "XVAVG_BU", "XVAVGR_B", "XVAVGR_BU", + "XVABSD_B", "XVABSD_BU", "XVADDA_B", "XVMUH_B", "XVMUH_BU", + "XVMULWEV_H_B", "XVMULWOD_H_B", "XVMULWEV_H_BU", "XVMULWOD_H_BU", + "XVMULWEV_H_BU_B", "XVMULWOD_H_BU_B", "XVSIGNCOV_B", + "XVANDN_V", "XVORN_V", "XVROTR_B", "XVSRLR_B", "XVSRAR_B", + "XVSEQ_B", "XVSLE_B", "XVSLE_BU", "XVSLT_B", "XVSLT_BU", + "XVPACKEV_B", "XVPACKOD_B", "XVPICKEV_B", "XVPICKOD_B", + "XVILVL_B", "XVILVH_B"] in + def : Pat<(deriveLASXIntrinsic.ret + (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), + (!cast(Inst) LASX256:$xj, LASX256:$xk)>; +foreach Inst = ["XVSADD_H", "XVSADD_HU", "XVSSUB_H", "XVSSUB_HU", + "XVHADDW_W_H", "XVHADDW_WU_HU", "XVHSUBW_W_H", "XVHSUBW_WU_HU", + "XVADDWEV_W_H", "XVADDWOD_W_H", "XVSUBWEV_W_H", "XVSUBWOD_W_H", + "XVADDWEV_W_HU", "XVADDWOD_W_HU", "XVSUBWEV_W_HU", "XVSUBWOD_W_HU", + "XVADDWEV_W_HU_H", "XVADDWOD_W_HU_H", + "XVAVG_H", "XVAVG_HU", "XVAVGR_H", "XVAVGR_HU", + "XVABSD_H", "XVABSD_HU", "XVADDA_H", "XVMUH_H", "XVMUH_HU", + "XVMULWEV_W_H", "XVMULWOD_W_H", "XVMULWEV_W_HU", "XVMULWOD_W_HU", + "XVMULWEV_W_HU_H", "XVMULWOD_W_HU_H", "XVSIGNCOV_H", "XVROTR_H", + "XVSRLR_H", "XVSRAR_H", "XVSRLN_B_H", "XVSRAN_B_H", "XVSRLRN_B_H", + "XVSRARN_B_H", "XVSSRLN_B_H", "XVSSRAN_B_H", "XVSSRLN_BU_H", + "XVSSRAN_BU_H", "XVSSRLRN_B_H", "XVSSRARN_B_H", "XVSSRLRN_BU_H", + "XVSSRARN_BU_H", + "XVSEQ_H", "XVSLE_H", "XVSLE_HU", "XVSLT_H", "XVSLT_HU", + "XVPACKEV_H", "XVPACKOD_H", "XVPICKEV_H", "XVPICKOD_H", + "XVILVL_H", "XVILVH_H"] in + def : Pat<(deriveLASXIntrinsic.ret + (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)), + (!cast(Inst) LASX256:$xj, LASX256:$xk)>; +foreach Inst = ["XVSADD_W", "XVSADD_WU", "XVSSUB_W", "XVSSUB_WU", + "XVHADDW_D_W", "XVHADDW_DU_WU", "XVHSUBW_D_W", "XVHSUBW_DU_WU", + "XVADDWEV_D_W", "XVADDWOD_D_W", "XVSUBWEV_D_W", "XVSUBWOD_D_W", + "XVADDWEV_D_WU", "XVADDWOD_D_WU", "XVSUBWEV_D_WU", "XVSUBWOD_D_WU", + "XVADDWEV_D_WU_W", "XVADDWOD_D_WU_W", + "XVAVG_W", "XVAVG_WU", "XVAVGR_W", "XVAVGR_WU", + "XVABSD_W", "XVABSD_WU", "XVADDA_W", "XVMUH_W", "XVMUH_WU", + "XVMULWEV_D_W", "XVMULWOD_D_W", "XVMULWEV_D_WU", "XVMULWOD_D_WU", + "XVMULWEV_D_WU_W", "XVMULWOD_D_WU_W", "XVSIGNCOV_W", "XVROTR_W", + "XVSRLR_W", "XVSRAR_W", "XVSRLN_H_W", "XVSRAN_H_W", "XVSRLRN_H_W", + "XVSRARN_H_W", "XVSSRLN_H_W", "XVSSRAN_H_W", "XVSSRLN_HU_W", + "XVSSRAN_HU_W", "XVSSRLRN_H_W", "XVSSRARN_H_W", "XVSSRLRN_HU_W", + "XVSSRARN_HU_W", + "XVSEQ_W", "XVSLE_W", "XVSLE_WU", "XVSLT_W", "XVSLT_WU", + "XVPACKEV_W", "XVPACKOD_W", "XVPICKEV_W", "XVPICKOD_W", + "XVILVL_W", "XVILVH_W", "XVPERM_W"] in + def : Pat<(deriveLASXIntrinsic.ret + (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)), + (!cast(Inst) LASX256:$xj, LASX256:$xk)>; +foreach Inst = ["XVADD_Q", "XVSUB_Q", + "XVSADD_D", "XVSADD_DU", "XVSSUB_D", "XVSSUB_DU", + "XVHADDW_Q_D", "XVHADDW_QU_DU", "XVHSUBW_Q_D", "XVHSUBW_QU_DU", + "XVADDWEV_Q_D", "XVADDWOD_Q_D", "XVSUBWEV_Q_D", "XVSUBWOD_Q_D", + "XVADDWEV_Q_DU", "XVADDWOD_Q_DU", "XVSUBWEV_Q_DU", "XVSUBWOD_Q_DU", + "XVADDWEV_Q_DU_D", "XVADDWOD_Q_DU_D", + "XVAVG_D", "XVAVG_DU", "XVAVGR_D", "XVAVGR_DU", + "XVABSD_D", "XVABSD_DU", "XVADDA_D", "XVMUH_D", "XVMUH_DU", + "XVMULWEV_Q_D", "XVMULWOD_Q_D", "XVMULWEV_Q_DU", "XVMULWOD_Q_DU", + "XVMULWEV_Q_DU_D", "XVMULWOD_Q_DU_D", "XVSIGNCOV_D", "XVROTR_D", + "XVSRLR_D", "XVSRAR_D", "XVSRLN_W_D", "XVSRAN_W_D", "XVSRLRN_W_D", + "XVSRARN_W_D", "XVSSRLN_W_D", "XVSSRAN_W_D", "XVSSRLN_WU_D", + "XVSSRAN_WU_D", "XVSSRLRN_W_D", "XVSSRARN_W_D", "XVSSRLRN_WU_D", + "XVSSRARN_WU_D", "XVFFINT_S_L", + "XVSEQ_D", "XVSLE_D", "XVSLE_DU", "XVSLT_D", "XVSLT_DU", + "XVPACKEV_D", "XVPACKOD_D", "XVPICKEV_D", "XVPICKOD_D", + "XVILVL_D", "XVILVH_D"] in + def : Pat<(deriveLASXIntrinsic.ret + (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)), + (!cast(Inst) LASX256:$xj, LASX256:$xk)>; + +// vty: v32i8/v16i16/v8i32/v4i64 +// Pat<(Intrinsic vty:$xd, vty:$xj, vty:$xk), +// (LAInst vty:$xd, vty:$xj, vty:$xk)>; +foreach Inst = ["XVMADDWEV_H_B", "XVMADDWOD_H_B", "XVMADDWEV_H_BU", + "XVMADDWOD_H_BU", "XVMADDWEV_H_BU_B", "XVMADDWOD_H_BU_B"] in + def : Pat<(deriveLASXIntrinsic.ret + (v16i16 LASX256:$xd), (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), + (!cast(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>; +foreach Inst = ["XVMADDWEV_W_H", "XVMADDWOD_W_H", "XVMADDWEV_W_HU", + "XVMADDWOD_W_HU", "XVMADDWEV_W_HU_H", "XVMADDWOD_W_HU_H"] in + def : Pat<(deriveLASXIntrinsic.ret + (v8i32 LASX256:$xd), (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)), + (!cast(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>; +foreach Inst = ["XVMADDWEV_D_W", "XVMADDWOD_D_W", "XVMADDWEV_D_WU", + "XVMADDWOD_D_WU", "XVMADDWEV_D_WU_W", "XVMADDWOD_D_WU_W"] in + def : Pat<(deriveLASXIntrinsic.ret + (v4i64 LASX256:$xd), (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)), + (!cast(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>; +foreach Inst = ["XVMADDWEV_Q_D", "XVMADDWOD_Q_D", "XVMADDWEV_Q_DU", + "XVMADDWOD_Q_DU", "XVMADDWEV_Q_DU_D", "XVMADDWOD_Q_DU_D"] in + def : Pat<(deriveLASXIntrinsic.ret + (v4i64 LASX256:$xd), (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)), + (!cast(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>; + +// vty: v32i8/v16i16/v8i32/v4i64 +// Pat<(Intrinsic vty:$xj), +// (LAInst vty:$xj)>; +foreach Inst = ["XVEXTH_H_B", "XVEXTH_HU_BU", + "XVMSKLTZ_B", "XVMSKGEZ_B", "XVMSKNZ_B", + "XVCLO_B", "VEXT2XV_H_B", "VEXT2XV_HU_BU", + "VEXT2XV_W_B", "VEXT2XV_WU_BU", "VEXT2XV_D_B", + "VEXT2XV_DU_BU", "XVREPLVE0_B", "XVREPLVE0_Q"] in + def : Pat<(deriveLASXIntrinsic.ret (v32i8 LASX256:$xj)), + (!cast(Inst) LASX256:$xj)>; +foreach Inst = ["XVEXTH_W_H", "XVEXTH_WU_HU", "XVMSKLTZ_H", + "XVCLO_H", "XVFCVTL_S_H", "XVFCVTH_S_H", + "VEXT2XV_W_H", "VEXT2XV_WU_HU", "VEXT2XV_D_H", + "VEXT2XV_DU_HU", "XVREPLVE0_H"] in + def : Pat<(deriveLASXIntrinsic.ret (v16i16 LASX256:$xj)), + (!cast(Inst) LASX256:$xj)>; +foreach Inst = ["XVEXTH_D_W", "XVEXTH_DU_WU", "XVMSKLTZ_W", + "XVCLO_W", "XVFFINT_S_W", "XVFFINT_S_WU", + "XVFFINTL_D_W", "XVFFINTH_D_W", + "VEXT2XV_D_W", "VEXT2XV_DU_WU", "XVREPLVE0_W"] in + def : Pat<(deriveLASXIntrinsic.ret (v8i32 LASX256:$xj)), + (!cast(Inst) LASX256:$xj)>; +foreach Inst = ["XVEXTH_Q_D", "XVEXTH_QU_DU", "XVMSKLTZ_D", + "XVEXTL_Q_D", "XVEXTL_QU_DU", + "XVCLO_D", "XVFFINT_D_L", "XVFFINT_D_LU", + "XVREPLVE0_D"] in + def : Pat<(deriveLASXIntrinsic.ret (v4i64 LASX256:$xj)), + (!cast(Inst) LASX256:$xj)>; + +// Pat<(Intrinsic timm:$imm) +// (LAInst timm:$imm)>; +def : Pat<(int_loongarch_lasx_xvldi timm:$imm), + (XVLDI (to_valid_timm timm:$imm))>; +foreach Inst = ["XVREPLI_B", "XVREPLI_H", "XVREPLI_W", "XVREPLI_D"] in + def : Pat<(deriveLASXIntrinsic.ret timm:$imm), + (!cast("Pseudo"#Inst) (to_valid_timm timm:$imm))>; + +// vty: v32i8/v16i16/v8i32/v4i64 +// Pat<(Intrinsic vty:$xj, timm:$imm) +// (LAInst vty:$xj, timm:$imm)>; +foreach Inst = ["XVSAT_B", "XVSAT_BU", "XVNORI_B", "XVROTRI_B", "XVSLLWIL_H_B", + "XVSLLWIL_HU_BU", "XVSRLRI_B", "XVSRARI_B", + "XVSEQI_B", "XVSLEI_B", "XVSLEI_BU", "XVSLTI_B", "XVSLTI_BU", + "XVREPL128VEI_B", "XVBSLL_V", "XVBSRL_V", "XVSHUF4I_B"] in + def : Pat<(deriveLASXIntrinsic.ret (v32i8 LASX256:$xj), timm:$imm), + (!cast(Inst) LASX256:$xj, (to_valid_timm timm:$imm))>; +foreach Inst = ["XVSAT_H", "XVSAT_HU", "XVROTRI_H", "XVSLLWIL_W_H", + "XVSLLWIL_WU_HU", "XVSRLRI_H", "XVSRARI_H", + "XVSEQI_H", "XVSLEI_H", "XVSLEI_HU", "XVSLTI_H", "XVSLTI_HU", + "XVREPL128VEI_H", "XVSHUF4I_H"] in + def : Pat<(deriveLASXIntrinsic.ret (v16i16 LASX256:$xj), timm:$imm), + (!cast(Inst) LASX256:$xj, (to_valid_timm timm:$imm))>; +foreach Inst = ["XVSAT_W", "XVSAT_WU", "XVROTRI_W", "XVSLLWIL_D_W", + "XVSLLWIL_DU_WU", "XVSRLRI_W", "XVSRARI_W", + "XVSEQI_W", "XVSLEI_W", "XVSLEI_WU", "XVSLTI_W", "XVSLTI_WU", + "XVREPL128VEI_W", "XVSHUF4I_W", "XVPICKVE_W"] in + def : Pat<(deriveLASXIntrinsic.ret (v8i32 LASX256:$xj), timm:$imm), + (!cast(Inst) LASX256:$xj, (to_valid_timm timm:$imm))>; +foreach Inst = ["XVSAT_D", "XVSAT_DU", "XVROTRI_D", "XVSRLRI_D", "XVSRARI_D", + "XVSEQI_D", "XVSLEI_D", "XVSLEI_DU", "XVSLTI_D", "XVSLTI_DU", + "XVPICKVE2GR_D", "XVPICKVE2GR_DU", + "XVREPL128VEI_D", "XVPERMI_D", "XVPICKVE_D"] in + def : Pat<(deriveLASXIntrinsic.ret (v4i64 LASX256:$xj), timm:$imm), + (!cast(Inst) LASX256:$xj, (to_valid_timm timm:$imm))>; + +// vty: v32i8/v16i16/v8i32/v4i64 +// Pat<(Intrinsic vty:$xd, vty:$xj, timm:$imm) +// (LAInst vty:$xd, vty:$xj, timm:$imm)>; +foreach Inst = ["XVSRLNI_B_H", "XVSRANI_B_H", "XVSRLRNI_B_H", "XVSRARNI_B_H", + "XVSSRLNI_B_H", "XVSSRANI_B_H", "XVSSRLNI_BU_H", "XVSSRANI_BU_H", + "XVSSRLRNI_B_H", "XVSSRARNI_B_H", "XVSSRLRNI_BU_H", "XVSSRARNI_BU_H", + "XVFRSTPI_B", "XVBITSELI_B", "XVEXTRINS_B", "XVPERMI_Q"] in + def : Pat<(deriveLASXIntrinsic.ret + (v32i8 LASX256:$xd), (v32i8 LASX256:$xj), timm:$imm), + (!cast(Inst) LASX256:$xd, LASX256:$xj, + (to_valid_timm timm:$imm))>; +foreach Inst = ["XVSRLNI_H_W", "XVSRANI_H_W", "XVSRLRNI_H_W", "XVSRARNI_H_W", + "XVSSRLNI_H_W", "XVSSRANI_H_W", "XVSSRLNI_HU_W", "XVSSRANI_HU_W", + "XVSSRLRNI_H_W", "XVSSRARNI_H_W", "XVSSRLRNI_HU_W", "XVSSRARNI_HU_W", + "XVFRSTPI_H", "XVEXTRINS_H"] in + def : Pat<(deriveLASXIntrinsic.ret + (v16i16 LASX256:$xd), (v16i16 LASX256:$xj), timm:$imm), + (!cast(Inst) LASX256:$xd, LASX256:$xj, + (to_valid_timm timm:$imm))>; +foreach Inst = ["XVSRLNI_W_D", "XVSRANI_W_D", "XVSRLRNI_W_D", "XVSRARNI_W_D", + "XVSSRLNI_W_D", "XVSSRANI_W_D", "XVSSRLNI_WU_D", "XVSSRANI_WU_D", + "XVSSRLRNI_W_D", "XVSSRARNI_W_D", "XVSSRLRNI_WU_D", "XVSSRARNI_WU_D", + "XVPERMI_W", "XVEXTRINS_W", "XVINSVE0_W"] in + def : Pat<(deriveLASXIntrinsic.ret + (v8i32 LASX256:$xd), (v8i32 LASX256:$xj), timm:$imm), + (!cast(Inst) LASX256:$xd, LASX256:$xj, + (to_valid_timm timm:$imm))>; +foreach Inst = ["XVSRLNI_D_Q", "XVSRANI_D_Q", "XVSRLRNI_D_Q", "XVSRARNI_D_Q", + "XVSSRLNI_D_Q", "XVSSRANI_D_Q", "XVSSRLNI_DU_Q", "XVSSRANI_DU_Q", + "XVSSRLRNI_D_Q", "XVSSRARNI_D_Q", "XVSSRLRNI_DU_Q", "XVSSRARNI_DU_Q", + "XVSHUF4I_D", "XVEXTRINS_D", "XVINSVE0_D"] in + def : Pat<(deriveLASXIntrinsic.ret + (v4i64 LASX256:$xd), (v4i64 LASX256:$xj), timm:$imm), + (!cast(Inst) LASX256:$xd, LASX256:$xj, + (to_valid_timm timm:$imm))>; + +// vty: v32i8/v16i16/v8i32/v4i64 +// Pat<(Intrinsic vty:$xd, vty:$xj, vty:$xk), +// (LAInst vty:$xd, vty:$xj, vty:$xk)>; +foreach Inst = ["XVFRSTP_B", "XVBITSEL_V", "XVSHUF_B"] in + def : Pat<(deriveLASXIntrinsic.ret + (v32i8 LASX256:$xd), (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), + (!cast(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>; +foreach Inst = ["XVFRSTP_H", "XVSHUF_H"] in + def : Pat<(deriveLASXIntrinsic.ret + (v16i16 LASX256:$xd), (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)), + (!cast(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>; +def : Pat<(int_loongarch_lasx_xvshuf_w (v8i32 LASX256:$xd), (v8i32 LASX256:$xj), + (v8i32 LASX256:$xk)), + (XVSHUF_W LASX256:$xd, LASX256:$xj, LASX256:$xk)>; +def : Pat<(int_loongarch_lasx_xvshuf_d (v4i64 LASX256:$xd), (v4i64 LASX256:$xj), + (v4i64 LASX256:$xk)), + (XVSHUF_D LASX256:$xd, LASX256:$xj, LASX256:$xk)>; + +// vty: v8f32/v4f64 +// Pat<(Intrinsic vty:$xj, vty:$xk, vty:$xa), +// (LAInst vty:$xj, vty:$xk, vty:$xa)>; +foreach Inst = ["XVFMSUB_S", "XVFNMADD_S", "XVFNMSUB_S"] in + def : Pat<(deriveLASXIntrinsic.ret + (v8f32 LASX256:$xj), (v8f32 LASX256:$xk), (v8f32 LASX256:$xa)), + (!cast(Inst) LASX256:$xj, LASX256:$xk, LASX256:$xa)>; +foreach Inst = ["XVFMSUB_D", "XVFNMADD_D", "XVFNMSUB_D"] in + def : Pat<(deriveLASXIntrinsic.ret + (v4f64 LASX256:$xj), (v4f64 LASX256:$xk), (v4f64 LASX256:$xa)), + (!cast(Inst) LASX256:$xj, LASX256:$xk, LASX256:$xa)>; + +// vty: v8f32/v4f64 +// Pat<(Intrinsic vty:$xj, vty:$xk), +// (LAInst vty:$xj, vty:$xk)>; +foreach Inst = ["XVFMAX_S", "XVFMIN_S", "XVFMAXA_S", "XVFMINA_S", "XVFCVT_H_S", + "XVFCMP_CAF_S", "XVFCMP_CUN_S", "XVFCMP_CEQ_S", "XVFCMP_CUEQ_S", + "XVFCMP_CLT_S", "XVFCMP_CULT_S", "XVFCMP_CLE_S", "XVFCMP_CULE_S", + "XVFCMP_CNE_S", "XVFCMP_COR_S", "XVFCMP_CUNE_S", + "XVFCMP_SAF_S", "XVFCMP_SUN_S", "XVFCMP_SEQ_S", "XVFCMP_SUEQ_S", + "XVFCMP_SLT_S", "XVFCMP_SULT_S", "XVFCMP_SLE_S", "XVFCMP_SULE_S", + "XVFCMP_SNE_S", "XVFCMP_SOR_S", "XVFCMP_SUNE_S"] in + def : Pat<(deriveLASXIntrinsic.ret + (v8f32 LASX256:$xj), (v8f32 LASX256:$xk)), + (!cast(Inst) LASX256:$xj, LASX256:$xk)>; +foreach Inst = ["XVFMAX_D", "XVFMIN_D", "XVFMAXA_D", "XVFMINA_D", "XVFCVT_S_D", + "XVFTINTRNE_W_D", "XVFTINTRZ_W_D", "XVFTINTRP_W_D", "XVFTINTRM_W_D", + "XVFTINT_W_D", + "XVFCMP_CAF_D", "XVFCMP_CUN_D", "XVFCMP_CEQ_D", "XVFCMP_CUEQ_D", + "XVFCMP_CLT_D", "XVFCMP_CULT_D", "XVFCMP_CLE_D", "XVFCMP_CULE_D", + "XVFCMP_CNE_D", "XVFCMP_COR_D", "XVFCMP_CUNE_D", + "XVFCMP_SAF_D", "XVFCMP_SUN_D", "XVFCMP_SEQ_D", "XVFCMP_SUEQ_D", + "XVFCMP_SLT_D", "XVFCMP_SULT_D", "XVFCMP_SLE_D", "XVFCMP_SULE_D", + "XVFCMP_SNE_D", "XVFCMP_SOR_D", "XVFCMP_SUNE_D"] in + def : Pat<(deriveLASXIntrinsic.ret + (v4f64 LASX256:$xj), (v4f64 LASX256:$xk)), + (!cast(Inst) LASX256:$xj, LASX256:$xk)>; + +// vty: v8f32/v4f64 +// Pat<(Intrinsic vty:$xj), +// (LAInst vty:$xj)>; +foreach Inst = ["XVFLOGB_S", "XVFCLASS_S", "XVFSQRT_S", "XVFRECIP_S", "XVFRSQRT_S", + "XVFRINT_S", "XVFCVTL_D_S", "XVFCVTH_D_S", + "XVFRINTRNE_S", "XVFRINTRZ_S", "XVFRINTRP_S", "XVFRINTRM_S", + "XVFTINTRNE_W_S", "XVFTINTRZ_W_S", "XVFTINTRP_W_S", "XVFTINTRM_W_S", + "XVFTINT_W_S", "XVFTINTRZ_WU_S", "XVFTINT_WU_S", + "XVFTINTRNEL_L_S", "XVFTINTRNEH_L_S", "XVFTINTRZL_L_S", + "XVFTINTRZH_L_S", "XVFTINTRPL_L_S", "XVFTINTRPH_L_S", + "XVFTINTRML_L_S", "XVFTINTRMH_L_S", "XVFTINTL_L_S", + "XVFTINTH_L_S"] in + def : Pat<(deriveLASXIntrinsic.ret (v8f32 LASX256:$xj)), + (!cast(Inst) LASX256:$xj)>; +foreach Inst = ["XVFLOGB_D", "XVFCLASS_D", "XVFSQRT_D", "XVFRECIP_D", "XVFRSQRT_D", + "XVFRINT_D", + "XVFRINTRNE_D", "XVFRINTRZ_D", "XVFRINTRP_D", "XVFRINTRM_D", + "XVFTINTRNE_L_D", "XVFTINTRZ_L_D", "XVFTINTRP_L_D", "XVFTINTRM_L_D", + "XVFTINT_L_D", "XVFTINTRZ_LU_D", "XVFTINT_LU_D"] in + def : Pat<(deriveLASXIntrinsic.ret (v4f64 LASX256:$xj)), + (!cast(Inst) LASX256:$xj)>; + +def : Pat<(int_loongarch_lasx_xvpickve_w_f v8f32:$xj, timm:$imm), + (XVPICKVE_W v8f32:$xj, (to_valid_timm timm:$imm))>; +def : Pat<(int_loongarch_lasx_xvpickve_d_f v4f64:$xj, timm:$imm), + (XVPICKVE_D v4f64:$xj, (to_valid_timm timm:$imm))>; + +// load +def : Pat<(int_loongarch_lasx_xvld GPR:$rj, timm:$imm), + (XVLD GPR:$rj, (to_valid_timm timm:$imm))>; +def : Pat<(int_loongarch_lasx_xvldx GPR:$rj, GPR:$rk), + (XVLDX GPR:$rj, GPR:$rk)>; + +def : Pat<(int_loongarch_lasx_xvldrepl_b GPR:$rj, timm:$imm), + (XVLDREPL_B GPR:$rj, (to_valid_timm timm:$imm))>; +def : Pat<(int_loongarch_lasx_xvldrepl_h GPR:$rj, timm:$imm), + (XVLDREPL_H GPR:$rj, (to_valid_timm timm:$imm))>; +def : Pat<(int_loongarch_lasx_xvldrepl_w GPR:$rj, timm:$imm), + (XVLDREPL_W GPR:$rj, (to_valid_timm timm:$imm))>; +def : Pat<(int_loongarch_lasx_xvldrepl_d GPR:$rj, timm:$imm), + (XVLDREPL_D GPR:$rj, (to_valid_timm timm:$imm))>; + +// store +def : Pat<(int_loongarch_lasx_xvst LASX256:$xd, GPR:$rj, timm:$imm), + (XVST LASX256:$xd, GPR:$rj, (to_valid_timm timm:$imm))>; +def : Pat<(int_loongarch_lasx_xvstx LASX256:$xd, GPR:$rj, GPR:$rk), + (XVSTX LASX256:$xd, GPR:$rj, GPR:$rk)>; + +def : Pat<(int_loongarch_lasx_xvstelm_b v32i8:$xd, GPR:$rj, timm:$imm, timm:$idx), + (XVSTELM_B v32i8:$xd, GPR:$rj, (to_valid_timm timm:$imm), + (to_valid_timm timm:$idx))>; +def : Pat<(int_loongarch_lasx_xvstelm_h v16i16:$xd, GPR:$rj, timm:$imm, timm:$idx), + (XVSTELM_H v16i16:$xd, GPR:$rj, (to_valid_timm timm:$imm), + (to_valid_timm timm:$idx))>; +def : Pat<(int_loongarch_lasx_xvstelm_w v8i32:$xd, GPR:$rj, timm:$imm, timm:$idx), + (XVSTELM_W v8i32:$xd, GPR:$rj, (to_valid_timm timm:$imm), + (to_valid_timm timm:$idx))>; +def : Pat<(int_loongarch_lasx_xvstelm_d v4i64:$xd, GPR:$rj, timm:$imm, timm:$idx), + (XVSTELM_D v4i64:$xd, GPR:$rj, (to_valid_timm timm:$imm), + (to_valid_timm timm:$idx))>; + } // Predicates = [HasExtLASX] diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index a8ed285a37cf06063af316e78f2e178faf9a66e3..99ac2f3c162fea31e076b5c267363e9fef7d0807 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -10,6 +10,173 @@ // //===----------------------------------------------------------------------===// +def SDT_LoongArchVreplve : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>, + SDTCisInt<1>, SDTCisVec<1>, + SDTCisSameAs<0, 1>, SDTCisInt<2>]>; +def SDT_LoongArchVecCond : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>; + +// Target nodes. +def loongarch_vreplve : SDNode<"LoongArchISD::VREPLVE", SDT_LoongArchVreplve>; +def loongarch_vall_nonzero : SDNode<"LoongArchISD::VALL_NONZERO", + SDT_LoongArchVecCond>; +def loongarch_vany_nonzero : SDNode<"LoongArchISD::VANY_NONZERO", + SDT_LoongArchVecCond>; +def loongarch_vall_zero : SDNode<"LoongArchISD::VALL_ZERO", + SDT_LoongArchVecCond>; +def loongarch_vany_zero : SDNode<"LoongArchISD::VANY_ZERO", + SDT_LoongArchVecCond>; + +def loongarch_vpick_sext_elt : SDNode<"LoongArchISD::VPICK_SEXT_ELT", + SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>>; +def loongarch_vpick_zext_elt : SDNode<"LoongArchISD::VPICK_ZEXT_ELT", + SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>>; + +class VecCond + : Pseudo<(outs GPR:$rd), (ins RC:$vj), + [(set GPR:$rd, (OpNode (TyNode RC:$vj)))]> { + let hasSideEffects = 0; + let mayLoad = 0; + let mayStore = 0; + let usesCustomInserter = 1; +} + +def vsplat_imm_eq_1 : PatFrags<(ops), [(build_vector), + (bitconvert (v4i32 (build_vector)))], [{ + APInt Imm; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0).getNode(); + + return selectVSplat(N, Imm, EltTy.getSizeInBits()) && + Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1; +}]>; + +def vsplati8_imm_eq_7 : PatFrags<(ops), [(build_vector)], [{ + APInt Imm; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0).getNode(); + + return selectVSplat(N, Imm, EltTy.getSizeInBits()) && + Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 7; +}]>; +def vsplati16_imm_eq_15 : PatFrags<(ops), [(build_vector)], [{ + APInt Imm; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0).getNode(); + + return selectVSplat(N, Imm, EltTy.getSizeInBits()) && + Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 15; +}]>; +def vsplati32_imm_eq_31 : PatFrags<(ops), [(build_vector)], [{ + APInt Imm; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0).getNode(); + + return selectVSplat(N, Imm, EltTy.getSizeInBits()) && + Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 31; +}]>; +def vsplati64_imm_eq_63 : PatFrags<(ops), [(build_vector), + (bitconvert (v4i32 (build_vector)))], [{ + APInt Imm; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0).getNode(); + + return selectVSplat(N, Imm, EltTy.getSizeInBits()) && + Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 63; +}]>; + +def vsplatf32_fpimm_eq_1 + : PatFrags<(ops), [(bitconvert (v4i32 (build_vector))), + (bitconvert (v8i32 (build_vector)))], [{ + APInt Imm; + EVT EltTy = N->getValueType(0).getVectorElementType(); + N = N->getOperand(0).getNode(); + + return selectVSplat(N, Imm, EltTy.getSizeInBits()) && + Imm.getBitWidth() == EltTy.getSizeInBits() && + Imm == APFloat(+1.0f).bitcastToAPInt(); +}]>; +def vsplatf64_fpimm_eq_1 + : PatFrags<(ops), [(bitconvert (v2i64 (build_vector))), + (bitconvert (v4i64 (build_vector)))], [{ + APInt Imm; + EVT EltTy = N->getValueType(0).getVectorElementType(); + N = N->getOperand(0).getNode(); + + return selectVSplat(N, Imm, EltTy.getSizeInBits()) && + Imm.getBitWidth() == EltTy.getSizeInBits() && + Imm == APFloat(+1.0).bitcastToAPInt(); +}]>; + +def vsplati8imm7 : PatFrag<(ops node:$reg), + (and node:$reg, vsplati8_imm_eq_7)>; +def vsplati16imm15 : PatFrag<(ops node:$reg), + (and node:$reg, vsplati16_imm_eq_15)>; +def vsplati32imm31 : PatFrag<(ops node:$reg), + (and node:$reg, vsplati32_imm_eq_31)>; +def vsplati64imm63 : PatFrag<(ops node:$reg), + (and node:$reg, vsplati64_imm_eq_63)>; + +foreach N = [3, 4, 5, 6, 8] in + def SplatPat_uimm#N : ComplexPattern", + [build_vector, bitconvert], [], 2>; + +foreach N = [5] in + def SplatPat_simm#N : ComplexPattern", + [build_vector, bitconvert]>; + +def vsplat_uimm_inv_pow2 : ComplexPattern; + +def vsplat_uimm_pow2 : ComplexPattern; + +def muladd : PatFrag<(ops node:$vd, node:$vj, node:$vk), + (add node:$vd, (mul node:$vj, node:$vk))>; + +def mulsub : PatFrag<(ops node:$vd, node:$vj, node:$vk), + (sub node:$vd, (mul node:$vj, node:$vk))>; + +def lsxsplati8 : PatFrag<(ops node:$e0), + (v16i8 (build_vector node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0))>; +def lsxsplati16 : PatFrag<(ops node:$e0), + (v8i16 (build_vector node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0))>; +def lsxsplati32 : PatFrag<(ops node:$e0), + (v4i32 (build_vector node:$e0, node:$e0, + node:$e0, node:$e0))>; +def lsxsplati64 : PatFrag<(ops node:$e0), + (v2i64 (build_vector node:$e0, node:$e0))>; +def lsxsplatf32 : PatFrag<(ops node:$e0), + (v4f32 (build_vector node:$e0, node:$e0, + node:$e0, node:$e0))>; +def lsxsplatf64 : PatFrag<(ops node:$e0), + (v2f64 (build_vector node:$e0, node:$e0))>; + +def to_valid_timm : SDNodeXForm(N); + return CurDAG->getTargetConstant(CN->getSExtValue(), SDLoc(N), Subtarget->getGRLenVT()); +}]>; + //===----------------------------------------------------------------------===// // Instruction class templates //===----------------------------------------------------------------------===// @@ -1004,4 +1171,910 @@ def PseudoVREPLI_D : Pseudo<(outs LSX128:$vd), (ins simm10:$imm), [], "vrepli.d", "$vd, $imm">; } +def PseudoVBNZ_B : VecCond; +def PseudoVBNZ_H : VecCond; +def PseudoVBNZ_W : VecCond; +def PseudoVBNZ_D : VecCond; +def PseudoVBNZ : VecCond; + +def PseudoVBZ_B : VecCond; +def PseudoVBZ_H : VecCond; +def PseudoVBZ_W : VecCond; +def PseudoVBZ_D : VecCond; +def PseudoVBZ : VecCond; + +} // Predicates = [HasExtLSX] + +multiclass PatVr { + def : Pat<(v16i8 (OpNode (v16i8 LSX128:$vj))), + (!cast(Inst#"_B") LSX128:$vj)>; + def : Pat<(v8i16 (OpNode (v8i16 LSX128:$vj))), + (!cast(Inst#"_H") LSX128:$vj)>; + def : Pat<(v4i32 (OpNode (v4i32 LSX128:$vj))), + (!cast(Inst#"_W") LSX128:$vj)>; + def : Pat<(v2i64 (OpNode (v2i64 LSX128:$vj))), + (!cast(Inst#"_D") LSX128:$vj)>; +} + +multiclass PatVrF { + def : Pat<(v4f32 (OpNode (v4f32 LSX128:$vj))), + (!cast(Inst#"_S") LSX128:$vj)>; + def : Pat<(v2f64 (OpNode (v2f64 LSX128:$vj))), + (!cast(Inst#"_D") LSX128:$vj)>; +} + +multiclass PatVrVr { + def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), + (!cast(Inst#"_B") LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), + (!cast(Inst#"_H") LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)), + (!cast(Inst#"_W") LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)), + (!cast(Inst#"_D") LSX128:$vj, LSX128:$vk)>; +} + +multiclass PatVrVrF { + def : Pat<(OpNode (v4f32 LSX128:$vj), (v4f32 LSX128:$vk)), + (!cast(Inst#"_S") LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v2f64 LSX128:$vj), (v2f64 LSX128:$vk)), + (!cast(Inst#"_D") LSX128:$vj, LSX128:$vk)>; +} + +multiclass PatVrVrU { + def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), + (!cast(Inst#"_BU") LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), + (!cast(Inst#"_HU") LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)), + (!cast(Inst#"_WU") LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)), + (!cast(Inst#"_DU") LSX128:$vj, LSX128:$vk)>; +} + +multiclass PatVrSimm5 { + def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 (SplatPat_simm5 simm5:$imm))), + (!cast(Inst#"_B") LSX128:$vj, simm5:$imm)>; + def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 (SplatPat_simm5 simm5:$imm))), + (!cast(Inst#"_H") LSX128:$vj, simm5:$imm)>; + def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 (SplatPat_simm5 simm5:$imm))), + (!cast(Inst#"_W") LSX128:$vj, simm5:$imm)>; + def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 (SplatPat_simm5 simm5:$imm))), + (!cast(Inst#"_D") LSX128:$vj, simm5:$imm)>; +} + +multiclass PatVrUimm5 { + def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm5 uimm5:$imm))), + (!cast(Inst#"_BU") LSX128:$vj, uimm5:$imm)>; + def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 (SplatPat_uimm5 uimm5:$imm))), + (!cast(Inst#"_HU") LSX128:$vj, uimm5:$imm)>; + def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 (SplatPat_uimm5 uimm5:$imm))), + (!cast(Inst#"_WU") LSX128:$vj, uimm5:$imm)>; + def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 (SplatPat_uimm5 uimm5:$imm))), + (!cast(Inst#"_DU") LSX128:$vj, uimm5:$imm)>; +} + +multiclass PatVrVrVr { + def : Pat<(OpNode (v16i8 LSX128:$vd), (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), + (!cast(Inst#"_B") LSX128:$vd, LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v8i16 LSX128:$vd), (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), + (!cast(Inst#"_H") LSX128:$vd, LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v4i32 LSX128:$vd), (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)), + (!cast(Inst#"_W") LSX128:$vd, LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v2i64 LSX128:$vd), (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)), + (!cast(Inst#"_D") LSX128:$vd, LSX128:$vj, LSX128:$vk)>; +} + +multiclass PatShiftVrVr { + def : Pat<(OpNode (v16i8 LSX128:$vj), (and vsplati8_imm_eq_7, + (v16i8 LSX128:$vk))), + (!cast(Inst#"_B") LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v8i16 LSX128:$vj), (and vsplati16_imm_eq_15, + (v8i16 LSX128:$vk))), + (!cast(Inst#"_H") LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v4i32 LSX128:$vj), (and vsplati32_imm_eq_31, + (v4i32 LSX128:$vk))), + (!cast(Inst#"_W") LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v2i64 LSX128:$vj), (and vsplati64_imm_eq_63, + (v2i64 LSX128:$vk))), + (!cast(Inst#"_D") LSX128:$vj, LSX128:$vk)>; +} + +multiclass PatShiftVrUimm { + def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm3 uimm3:$imm))), + (!cast(Inst#"_B") LSX128:$vj, uimm3:$imm)>; + def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 (SplatPat_uimm4 uimm4:$imm))), + (!cast(Inst#"_H") LSX128:$vj, uimm4:$imm)>; + def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 (SplatPat_uimm5 uimm5:$imm))), + (!cast(Inst#"_W") LSX128:$vj, uimm5:$imm)>; + def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 (SplatPat_uimm6 uimm6:$imm))), + (!cast(Inst#"_D") LSX128:$vj, uimm6:$imm)>; +} + +multiclass PatCCVrSimm5 { + def : Pat<(v16i8 (setcc (v16i8 LSX128:$vj), + (v16i8 (SplatPat_simm5 simm5:$imm)), CC)), + (!cast(Inst#"_B") LSX128:$vj, simm5:$imm)>; + def : Pat<(v8i16 (setcc (v8i16 LSX128:$vj), + (v8i16 (SplatPat_simm5 simm5:$imm)), CC)), + (!cast(Inst#"_H") LSX128:$vj, simm5:$imm)>; + def : Pat<(v4i32 (setcc (v4i32 LSX128:$vj), + (v4i32 (SplatPat_simm5 simm5:$imm)), CC)), + (!cast(Inst#"_W") LSX128:$vj, simm5:$imm)>; + def : Pat<(v2i64 (setcc (v2i64 LSX128:$vj), + (v2i64 (SplatPat_simm5 simm5:$imm)), CC)), + (!cast(Inst#"_D") LSX128:$vj, simm5:$imm)>; +} + +multiclass PatCCVrUimm5 { + def : Pat<(v16i8 (setcc (v16i8 LSX128:$vj), + (v16i8 (SplatPat_uimm5 uimm5:$imm)), CC)), + (!cast(Inst#"_BU") LSX128:$vj, uimm5:$imm)>; + def : Pat<(v8i16 (setcc (v8i16 LSX128:$vj), + (v8i16 (SplatPat_uimm5 uimm5:$imm)), CC)), + (!cast(Inst#"_HU") LSX128:$vj, uimm5:$imm)>; + def : Pat<(v4i32 (setcc (v4i32 LSX128:$vj), + (v4i32 (SplatPat_uimm5 uimm5:$imm)), CC)), + (!cast(Inst#"_WU") LSX128:$vj, uimm5:$imm)>; + def : Pat<(v2i64 (setcc (v2i64 LSX128:$vj), + (v2i64 (SplatPat_uimm5 uimm5:$imm)), CC)), + (!cast(Inst#"_DU") LSX128:$vj, uimm5:$imm)>; +} + +multiclass PatCCVrVr { + def : Pat<(v16i8 (setcc (v16i8 LSX128:$vj), (v16i8 LSX128:$vk), CC)), + (!cast(Inst#"_B") LSX128:$vj, LSX128:$vk)>; + def : Pat<(v8i16 (setcc (v8i16 LSX128:$vj), (v8i16 LSX128:$vk), CC)), + (!cast(Inst#"_H") LSX128:$vj, LSX128:$vk)>; + def : Pat<(v4i32 (setcc (v4i32 LSX128:$vj), (v4i32 LSX128:$vk), CC)), + (!cast(Inst#"_W") LSX128:$vj, LSX128:$vk)>; + def : Pat<(v2i64 (setcc (v2i64 LSX128:$vj), (v2i64 LSX128:$vk), CC)), + (!cast(Inst#"_D") LSX128:$vj, LSX128:$vk)>; +} + +multiclass PatCCVrVrU { + def : Pat<(v16i8 (setcc (v16i8 LSX128:$vj), (v16i8 LSX128:$vk), CC)), + (!cast(Inst#"_BU") LSX128:$vj, LSX128:$vk)>; + def : Pat<(v8i16 (setcc (v8i16 LSX128:$vj), (v8i16 LSX128:$vk), CC)), + (!cast(Inst#"_HU") LSX128:$vj, LSX128:$vk)>; + def : Pat<(v4i32 (setcc (v4i32 LSX128:$vj), (v4i32 LSX128:$vk), CC)), + (!cast(Inst#"_WU") LSX128:$vj, LSX128:$vk)>; + def : Pat<(v2i64 (setcc (v2i64 LSX128:$vj), (v2i64 LSX128:$vk), CC)), + (!cast(Inst#"_DU") LSX128:$vj, LSX128:$vk)>; +} + +multiclass PatCCVrVrF { + def : Pat<(v4i32 (setcc (v4f32 LSX128:$vj), (v4f32 LSX128:$vk), CC)), + (!cast(Inst#"_S") LSX128:$vj, LSX128:$vk)>; + def : Pat<(v2i64 (setcc (v2f64 LSX128:$vj), (v2f64 LSX128:$vk), CC)), + (!cast(Inst#"_D") LSX128:$vj, LSX128:$vk)>; +} + +let Predicates = [HasExtLSX] in { + +// VADD_{B/H/W/D} +defm : PatVrVr; +// VSUB_{B/H/W/D} +defm : PatVrVr; + +// VADDI_{B/H/W/D}U +defm : PatVrUimm5; +// VSUBI_{B/H/W/D}U +defm : PatVrUimm5; + +// VNEG_{B/H/W/D} +def : Pat<(sub immAllZerosV, (v16i8 LSX128:$vj)), (VNEG_B LSX128:$vj)>; +def : Pat<(sub immAllZerosV, (v8i16 LSX128:$vj)), (VNEG_H LSX128:$vj)>; +def : Pat<(sub immAllZerosV, (v4i32 LSX128:$vj)), (VNEG_W LSX128:$vj)>; +def : Pat<(sub immAllZerosV, (v2i64 LSX128:$vj)), (VNEG_D LSX128:$vj)>; + +// VMAX[I]_{B/H/W/D}[U] +defm : PatVrVr; +defm : PatVrVrU; +defm : PatVrSimm5; +defm : PatVrUimm5; + +// VMIN[I]_{B/H/W/D}[U] +defm : PatVrVr; +defm : PatVrVrU; +defm : PatVrSimm5; +defm : PatVrUimm5; + +// VMUL_{B/H/W/D} +defm : PatVrVr; + +// VMUH_{B/H/W/D}[U] +defm : PatVrVr; +defm : PatVrVrU; + +// VMADD_{B/H/W/D} +defm : PatVrVrVr; +// VMSUB_{B/H/W/D} +defm : PatVrVrVr; + +// VDIV_{B/H/W/D}[U] +defm : PatVrVr; +defm : PatVrVrU; + +// VMOD_{B/H/W/D}[U] +defm : PatVrVr; +defm : PatVrVrU; + +// VAND_V +foreach vt = [v16i8, v8i16, v4i32, v2i64] in +def : Pat<(and (vt LSX128:$vj), (vt LSX128:$vk)), + (VAND_V LSX128:$vj, LSX128:$vk)>; +// VOR_V +foreach vt = [v16i8, v8i16, v4i32, v2i64] in +def : Pat<(or (vt LSX128:$vj), (vt LSX128:$vk)), + (VOR_V LSX128:$vj, LSX128:$vk)>; +// VXOR_V +foreach vt = [v16i8, v8i16, v4i32, v2i64] in +def : Pat<(xor (vt LSX128:$vj), (vt LSX128:$vk)), + (VXOR_V LSX128:$vj, LSX128:$vk)>; +// VNOR_V +foreach vt = [v16i8, v8i16, v4i32, v2i64] in +def : Pat<(vnot (or (vt LSX128:$vj), (vt LSX128:$vk))), + (VNOR_V LSX128:$vj, LSX128:$vk)>; + +// VANDI_B +def : Pat<(and (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm8 uimm8:$imm))), + (VANDI_B LSX128:$vj, uimm8:$imm)>; +// VORI_B +def : Pat<(or (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm8 uimm8:$imm))), + (VORI_B LSX128:$vj, uimm8:$imm)>; + +// VXORI_B +def : Pat<(xor (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm8 uimm8:$imm))), + (VXORI_B LSX128:$vj, uimm8:$imm)>; + +// VSLL[I]_{B/H/W/D} +defm : PatVrVr; +defm : PatShiftVrVr; +defm : PatShiftVrUimm; + +// VSRL[I]_{B/H/W/D} +defm : PatVrVr; +defm : PatShiftVrVr; +defm : PatShiftVrUimm; + +// VSRA[I]_{B/H/W/D} +defm : PatVrVr; +defm : PatShiftVrVr; +defm : PatShiftVrUimm; + +// VCLZ_{B/H/W/D} +defm : PatVr; + +// VPCNT_{B/H/W/D} +defm : PatVr; + +// VBITCLR_{B/H/W/D} +def : Pat<(and v16i8:$vj, (vnot (shl vsplat_imm_eq_1, v16i8:$vk))), + (v16i8 (VBITCLR_B v16i8:$vj, v16i8:$vk))>; +def : Pat<(and v8i16:$vj, (vnot (shl vsplat_imm_eq_1, v8i16:$vk))), + (v8i16 (VBITCLR_H v8i16:$vj, v8i16:$vk))>; +def : Pat<(and v4i32:$vj, (vnot (shl vsplat_imm_eq_1, v4i32:$vk))), + (v4i32 (VBITCLR_W v4i32:$vj, v4i32:$vk))>; +def : Pat<(and v2i64:$vj, (vnot (shl vsplat_imm_eq_1, v2i64:$vk))), + (v2i64 (VBITCLR_D v2i64:$vj, v2i64:$vk))>; +def : Pat<(and v16i8:$vj, (vnot (shl vsplat_imm_eq_1, + (vsplati8imm7 v16i8:$vk)))), + (v16i8 (VBITCLR_B v16i8:$vj, v16i8:$vk))>; +def : Pat<(and v8i16:$vj, (vnot (shl vsplat_imm_eq_1, + (vsplati16imm15 v8i16:$vk)))), + (v8i16 (VBITCLR_H v8i16:$vj, v8i16:$vk))>; +def : Pat<(and v4i32:$vj, (vnot (shl vsplat_imm_eq_1, + (vsplati32imm31 v4i32:$vk)))), + (v4i32 (VBITCLR_W v4i32:$vj, v4i32:$vk))>; +def : Pat<(and v2i64:$vj, (vnot (shl vsplat_imm_eq_1, + (vsplati64imm63 v2i64:$vk)))), + (v2i64 (VBITCLR_D v2i64:$vj, v2i64:$vk))>; + +// VBITCLRI_{B/H/W/D} +def : Pat<(and (v16i8 LSX128:$vj), (v16i8 (vsplat_uimm_inv_pow2 uimm3:$imm))), + (VBITCLRI_B LSX128:$vj, uimm3:$imm)>; +def : Pat<(and (v8i16 LSX128:$vj), (v8i16 (vsplat_uimm_inv_pow2 uimm4:$imm))), + (VBITCLRI_H LSX128:$vj, uimm4:$imm)>; +def : Pat<(and (v4i32 LSX128:$vj), (v4i32 (vsplat_uimm_inv_pow2 uimm5:$imm))), + (VBITCLRI_W LSX128:$vj, uimm5:$imm)>; +def : Pat<(and (v2i64 LSX128:$vj), (v2i64 (vsplat_uimm_inv_pow2 uimm6:$imm))), + (VBITCLRI_D LSX128:$vj, uimm6:$imm)>; + +// VBITSET_{B/H/W/D} +def : Pat<(or v16i8:$vj, (shl vsplat_imm_eq_1, v16i8:$vk)), + (v16i8 (VBITSET_B v16i8:$vj, v16i8:$vk))>; +def : Pat<(or v8i16:$vj, (shl vsplat_imm_eq_1, v8i16:$vk)), + (v8i16 (VBITSET_H v8i16:$vj, v8i16:$vk))>; +def : Pat<(or v4i32:$vj, (shl vsplat_imm_eq_1, v4i32:$vk)), + (v4i32 (VBITSET_W v4i32:$vj, v4i32:$vk))>; +def : Pat<(or v2i64:$vj, (shl vsplat_imm_eq_1, v2i64:$vk)), + (v2i64 (VBITSET_D v2i64:$vj, v2i64:$vk))>; +def : Pat<(or v16i8:$vj, (shl vsplat_imm_eq_1, (vsplati8imm7 v16i8:$vk))), + (v16i8 (VBITSET_B v16i8:$vj, v16i8:$vk))>; +def : Pat<(or v8i16:$vj, (shl vsplat_imm_eq_1, (vsplati16imm15 v8i16:$vk))), + (v8i16 (VBITSET_H v8i16:$vj, v8i16:$vk))>; +def : Pat<(or v4i32:$vj, (shl vsplat_imm_eq_1, (vsplati32imm31 v4i32:$vk))), + (v4i32 (VBITSET_W v4i32:$vj, v4i32:$vk))>; +def : Pat<(or v2i64:$vj, (shl vsplat_imm_eq_1, (vsplati64imm63 v2i64:$vk))), + (v2i64 (VBITSET_D v2i64:$vj, v2i64:$vk))>; + +// VBITSETI_{B/H/W/D} +def : Pat<(or (v16i8 LSX128:$vj), (v16i8 (vsplat_uimm_pow2 uimm3:$imm))), + (VBITSETI_B LSX128:$vj, uimm3:$imm)>; +def : Pat<(or (v8i16 LSX128:$vj), (v8i16 (vsplat_uimm_pow2 uimm4:$imm))), + (VBITSETI_H LSX128:$vj, uimm4:$imm)>; +def : Pat<(or (v4i32 LSX128:$vj), (v4i32 (vsplat_uimm_pow2 uimm5:$imm))), + (VBITSETI_W LSX128:$vj, uimm5:$imm)>; +def : Pat<(or (v2i64 LSX128:$vj), (v2i64 (vsplat_uimm_pow2 uimm6:$imm))), + (VBITSETI_D LSX128:$vj, uimm6:$imm)>; + +// VBITREV_{B/H/W/D} +def : Pat<(xor v16i8:$vj, (shl vsplat_imm_eq_1, v16i8:$vk)), + (v16i8 (VBITREV_B v16i8:$vj, v16i8:$vk))>; +def : Pat<(xor v8i16:$vj, (shl vsplat_imm_eq_1, v8i16:$vk)), + (v8i16 (VBITREV_H v8i16:$vj, v8i16:$vk))>; +def : Pat<(xor v4i32:$vj, (shl vsplat_imm_eq_1, v4i32:$vk)), + (v4i32 (VBITREV_W v4i32:$vj, v4i32:$vk))>; +def : Pat<(xor v2i64:$vj, (shl vsplat_imm_eq_1, v2i64:$vk)), + (v2i64 (VBITREV_D v2i64:$vj, v2i64:$vk))>; +def : Pat<(xor v16i8:$vj, (shl vsplat_imm_eq_1, (vsplati8imm7 v16i8:$vk))), + (v16i8 (VBITREV_B v16i8:$vj, v16i8:$vk))>; +def : Pat<(xor v8i16:$vj, (shl vsplat_imm_eq_1, (vsplati16imm15 v8i16:$vk))), + (v8i16 (VBITREV_H v8i16:$vj, v8i16:$vk))>; +def : Pat<(xor v4i32:$vj, (shl vsplat_imm_eq_1, (vsplati32imm31 v4i32:$vk))), + (v4i32 (VBITREV_W v4i32:$vj, v4i32:$vk))>; +def : Pat<(xor v2i64:$vj, (shl vsplat_imm_eq_1, (vsplati64imm63 v2i64:$vk))), + (v2i64 (VBITREV_D v2i64:$vj, v2i64:$vk))>; + +// VBITREVI_{B/H/W/D} +def : Pat<(xor (v16i8 LSX128:$vj), (v16i8 (vsplat_uimm_pow2 uimm3:$imm))), + (VBITREVI_B LSX128:$vj, uimm3:$imm)>; +def : Pat<(xor (v8i16 LSX128:$vj), (v8i16 (vsplat_uimm_pow2 uimm4:$imm))), + (VBITREVI_H LSX128:$vj, uimm4:$imm)>; +def : Pat<(xor (v4i32 LSX128:$vj), (v4i32 (vsplat_uimm_pow2 uimm5:$imm))), + (VBITREVI_W LSX128:$vj, uimm5:$imm)>; +def : Pat<(xor (v2i64 LSX128:$vj), (v2i64 (vsplat_uimm_pow2 uimm6:$imm))), + (VBITREVI_D LSX128:$vj, uimm6:$imm)>; + +// VFADD_{S/D} +defm : PatVrVrF; + +// VFSUB_{S/D} +defm : PatVrVrF; + +// VFMUL_{S/D} +defm : PatVrVrF; + +// VFDIV_{S/D} +defm : PatVrVrF; + +// VFMADD_{S/D} +def : Pat<(fma v4f32:$vj, v4f32:$vk, v4f32:$va), + (VFMADD_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; +def : Pat<(fma v2f64:$vj, v2f64:$vk, v2f64:$va), + (VFMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; + +// VFMSUB_{S/D} +def : Pat<(fma v4f32:$vj, v4f32:$vk, (fneg v4f32:$va)), + (VFMSUB_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; +def : Pat<(fma v2f64:$vj, v2f64:$vk, (fneg v2f64:$va)), + (VFMSUB_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; + +// VFNMADD_{S/D} +def : Pat<(fneg (fma v4f32:$vj, v4f32:$vk, v4f32:$va)), + (VFNMADD_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; +def : Pat<(fneg (fma v2f64:$vj, v2f64:$vk, v2f64:$va)), + (VFNMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; +def : Pat<(fma_nsz (fneg v4f32:$vj), v4f32:$vk, (fneg v4f32:$va)), + (VFNMADD_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; +def : Pat<(fma_nsz (fneg v2f64:$vj), v2f64:$vk, (fneg v2f64:$va)), + (VFNMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; + +// VFNMSUB_{S/D} +def : Pat<(fneg (fma v4f32:$vj, v4f32:$vk, (fneg v4f32:$va))), + (VFNMSUB_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; +def : Pat<(fneg (fma v2f64:$vj, v2f64:$vk, (fneg v2f64:$va))), + (VFNMSUB_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; +def : Pat<(fma_nsz (fneg v4f32:$vj), v4f32:$vk, v4f32:$va), + (VFNMSUB_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; +def : Pat<(fma_nsz (fneg v2f64:$vj), v2f64:$vk, v2f64:$va), + (VFNMSUB_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; + +// VFSQRT_{S/D} +defm : PatVrF; + +// VFRECIP_{S/D} +def : Pat<(fdiv vsplatf32_fpimm_eq_1, v4f32:$vj), + (VFRECIP_S v4f32:$vj)>; +def : Pat<(fdiv vsplatf64_fpimm_eq_1, v2f64:$vj), + (VFRECIP_D v2f64:$vj)>; + +// VFRSQRT_{S/D} +def : Pat<(fdiv vsplatf32_fpimm_eq_1, (fsqrt v4f32:$vj)), + (VFRSQRT_S v4f32:$vj)>; +def : Pat<(fdiv vsplatf64_fpimm_eq_1, (fsqrt v2f64:$vj)), + (VFRSQRT_D v2f64:$vj)>; + +// VSEQ[I]_{B/H/W/D} +defm : PatCCVrSimm5; +defm : PatCCVrVr; + +// VSLE[I]_{B/H/W/D}[U] +defm : PatCCVrSimm5; +defm : PatCCVrUimm5; +defm : PatCCVrVr; +defm : PatCCVrVrU; + +// VSLT[I]_{B/H/W/D}[U] +defm : PatCCVrSimm5; +defm : PatCCVrUimm5; +defm : PatCCVrVr; +defm : PatCCVrVrU; + +// VFCMP.cond.{S/D} +defm : PatCCVrVrF; +defm : PatCCVrVrF; +defm : PatCCVrVrF; + +defm : PatCCVrVrF; +defm : PatCCVrVrF; +defm : PatCCVrVrF; + +defm : PatCCVrVrF; +defm : PatCCVrVrF; +defm : PatCCVrVrF; + +defm : PatCCVrVrF; +defm : PatCCVrVrF; +defm : PatCCVrVrF; + +defm : PatCCVrVrF; +defm : PatCCVrVrF; + +// VINSGR2VR_{B/H/W/D} +def : Pat<(vector_insert v16i8:$vd, GRLenVT:$rj, uimm4:$imm), + (VINSGR2VR_B v16i8:$vd, GRLenVT:$rj, uimm4:$imm)>; +def : Pat<(vector_insert v8i16:$vd, GRLenVT:$rj, uimm3:$imm), + (VINSGR2VR_H v8i16:$vd, GRLenVT:$rj, uimm3:$imm)>; +def : Pat<(vector_insert v4i32:$vd, GRLenVT:$rj, uimm2:$imm), + (VINSGR2VR_W v4i32:$vd, GRLenVT:$rj, uimm2:$imm)>; +def : Pat<(vector_insert v2i64:$vd, GRLenVT:$rj, uimm1:$imm), + (VINSGR2VR_D v2i64:$vd, GRLenVT:$rj, uimm1:$imm)>; + +def : Pat<(vector_insert v4f32:$vd, FPR32:$fj, uimm2:$imm), + (VINSGR2VR_W $vd, (COPY_TO_REGCLASS FPR32:$fj, GPR), uimm2:$imm)>; +def : Pat<(vector_insert v2f64:$vd, FPR64:$fj, uimm1:$imm), + (VINSGR2VR_D $vd, (COPY_TO_REGCLASS FPR64:$fj, GPR), uimm1:$imm)>; + +// VPICKVE2GR_{B/H/W}[U] +def : Pat<(loongarch_vpick_sext_elt v16i8:$vd, uimm4:$imm, i8), + (VPICKVE2GR_B v16i8:$vd, uimm4:$imm)>; +def : Pat<(loongarch_vpick_sext_elt v8i16:$vd, uimm3:$imm, i16), + (VPICKVE2GR_H v8i16:$vd, uimm3:$imm)>; +def : Pat<(loongarch_vpick_sext_elt v4i32:$vd, uimm2:$imm, i32), + (VPICKVE2GR_W v4i32:$vd, uimm2:$imm)>; + +def : Pat<(loongarch_vpick_zext_elt v16i8:$vd, uimm4:$imm, i8), + (VPICKVE2GR_BU v16i8:$vd, uimm4:$imm)>; +def : Pat<(loongarch_vpick_zext_elt v8i16:$vd, uimm3:$imm, i16), + (VPICKVE2GR_HU v8i16:$vd, uimm3:$imm)>; +def : Pat<(loongarch_vpick_zext_elt v4i32:$vd, uimm2:$imm, i32), + (VPICKVE2GR_WU v4i32:$vd, uimm2:$imm)>; + +// VREPLGR2VR_{B/H/W/D} +def : Pat<(lsxsplati8 GPR:$rj), (VREPLGR2VR_B GPR:$rj)>; +def : Pat<(lsxsplati16 GPR:$rj), (VREPLGR2VR_H GPR:$rj)>; +def : Pat<(lsxsplati32 GPR:$rj), (VREPLGR2VR_W GPR:$rj)>; +def : Pat<(lsxsplati64 GPR:$rj), (VREPLGR2VR_D GPR:$rj)>; + +// VREPLVE_{B/H/W/D} +def : Pat<(loongarch_vreplve v16i8:$vj, GRLenVT:$rk), + (VREPLVE_B v16i8:$vj, GRLenVT:$rk)>; +def : Pat<(loongarch_vreplve v8i16:$vj, GRLenVT:$rk), + (VREPLVE_H v8i16:$vj, GRLenVT:$rk)>; +def : Pat<(loongarch_vreplve v4i32:$vj, GRLenVT:$rk), + (VREPLVE_W v4i32:$vj, GRLenVT:$rk)>; +def : Pat<(loongarch_vreplve v2i64:$vj, GRLenVT:$rk), + (VREPLVE_D v2i64:$vj, GRLenVT:$rk)>; + +// VREPLVEI_{W/D} +def : Pat<(lsxsplatf32 FPR32:$fj), + (VREPLVEI_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)>; +def : Pat<(lsxsplatf64 FPR64:$fj), + (VREPLVEI_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)>; + +// Loads/Stores +foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { + defm : LdPat; + def : RegRegLdPat; + defm : StPat; + def : RegRegStPat; +} + +// Vector extraction with constant index. +def : Pat<(i64 (vector_extract v16i8:$vj, uimm4:$imm)), + (VPICKVE2GR_B v16i8:$vj, uimm4:$imm)>; +def : Pat<(i64 (vector_extract v8i16:$vj, uimm3:$imm)), + (VPICKVE2GR_H v8i16:$vj, uimm3:$imm)>; +def : Pat<(i64 (vector_extract v4i32:$vj, uimm2:$imm)), + (VPICKVE2GR_W v4i32:$vj, uimm2:$imm)>; +def : Pat<(i64 (vector_extract v2i64:$vj, uimm1:$imm)), + (VPICKVE2GR_D v2i64:$vj, uimm1:$imm)>; +def : Pat<(f32 (vector_extract v4f32:$vj, uimm2:$imm)), + (f32 (EXTRACT_SUBREG (VREPLVEI_W v4f32:$vj, uimm2:$imm), sub_32))>; +def : Pat<(f64 (vector_extract v2f64:$vj, uimm1:$imm)), + (f64 (EXTRACT_SUBREG (VREPLVEI_D v2f64:$vj, uimm1:$imm), sub_64))>; + +// Vector extraction with variable index. +def : Pat<(i64 (vector_extract v16i8:$vj, i64:$rk)), + (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_B v16i8:$vj, + i64:$rk), + sub_32)), + GPR), (i64 24))>; +def : Pat<(i64 (vector_extract v8i16:$vj, i64:$rk)), + (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_H v8i16:$vj, + i64:$rk), + sub_32)), + GPR), (i64 16))>; +def : Pat<(i64 (vector_extract v4i32:$vj, i64:$rk)), + (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_W v4i32:$vj, i64:$rk), + sub_32)), + GPR)>; +def : Pat<(i64 (vector_extract v2i64:$vj, i64:$rk)), + (COPY_TO_REGCLASS (f64 (EXTRACT_SUBREG (VREPLVE_D v2i64:$vj, i64:$rk), + sub_64)), + GPR)>; +def : Pat<(f32 (vector_extract v4f32:$vj, i64:$rk)), + (f32 (EXTRACT_SUBREG (VREPLVE_W v4f32:$vj, i64:$rk), sub_32))>; +def : Pat<(f64 (vector_extract v2f64:$vj, i64:$rk)), + (f64 (EXTRACT_SUBREG (VREPLVE_D v2f64:$vj, i64:$rk), sub_64))>; + +// vselect +def : Pat<(v16i8 (vselect LSX128:$vd, (v16i8 (SplatPat_uimm8 uimm8:$imm)), + LSX128:$vj)), + (VBITSELI_B LSX128:$vd, LSX128:$vj, uimm8:$imm)>; +foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in + def : Pat<(vt (vselect LSX128:$va, LSX128:$vk, LSX128:$vj)), + (VBITSEL_V LSX128:$vj, LSX128:$vk, LSX128:$va)>; + +// fneg +def : Pat<(fneg (v4f32 LSX128:$vj)), (VBITREVI_W LSX128:$vj, 31)>; +def : Pat<(fneg (v2f64 LSX128:$vj)), (VBITREVI_D LSX128:$vj, 63)>; + +// VFFINT_{S_W/D_L} +def : Pat<(v4f32 (sint_to_fp v4i32:$vj)), (VFFINT_S_W v4i32:$vj)>; +def : Pat<(v2f64 (sint_to_fp v2i64:$vj)), (VFFINT_D_L v2i64:$vj)>; + +// VFFINT_{S_WU/D_LU} +def : Pat<(v4f32 (uint_to_fp v4i32:$vj)), (VFFINT_S_WU v4i32:$vj)>; +def : Pat<(v2f64 (uint_to_fp v2i64:$vj)), (VFFINT_D_LU v2i64:$vj)>; + +// VFTINTRZ_{W_S/L_D} +def : Pat<(v4i32 (fp_to_sint v4f32:$vj)), (VFTINTRZ_W_S v4f32:$vj)>; +def : Pat<(v2i64 (fp_to_sint v2f64:$vj)), (VFTINTRZ_L_D v2f64:$vj)>; + +// VFTINTRZ_{W_SU/L_DU} +def : Pat<(v4i32 (fp_to_uint v4f32:$vj)), (VFTINTRZ_WU_S v4f32:$vj)>; +def : Pat<(v2i64 (fp_to_uint v2f64:$vj)), (VFTINTRZ_LU_D v2f64:$vj)>; + +} // Predicates = [HasExtLSX] + +/// Intrinsic pattern + +class deriveLSXIntrinsic { + Intrinsic ret = !cast(!tolower("int_loongarch_lsx_"#Inst)); +} + +let Predicates = [HasExtLSX] in { + +// vty: v16i8/v8i16/v4i32/v2i64 +// Pat<(Intrinsic vty:$vj, vty:$vk), +// (LAInst vty:$vj, vty:$vk)>; +foreach Inst = ["VSADD_B", "VSADD_BU", "VSSUB_B", "VSSUB_BU", + "VHADDW_H_B", "VHADDW_HU_BU", "VHSUBW_H_B", "VHSUBW_HU_BU", + "VADDWEV_H_B", "VADDWOD_H_B", "VSUBWEV_H_B", "VSUBWOD_H_B", + "VADDWEV_H_BU", "VADDWOD_H_BU", "VSUBWEV_H_BU", "VSUBWOD_H_BU", + "VADDWEV_H_BU_B", "VADDWOD_H_BU_B", + "VAVG_B", "VAVG_BU", "VAVGR_B", "VAVGR_BU", + "VABSD_B", "VABSD_BU", "VADDA_B", "VMUH_B", "VMUH_BU", + "VMULWEV_H_B", "VMULWOD_H_B", "VMULWEV_H_BU", "VMULWOD_H_BU", + "VMULWEV_H_BU_B", "VMULWOD_H_BU_B", "VSIGNCOV_B", + "VANDN_V", "VORN_V", "VROTR_B", "VSRLR_B", "VSRAR_B", + "VSEQ_B", "VSLE_B", "VSLE_BU", "VSLT_B", "VSLT_BU", + "VPACKEV_B", "VPACKOD_B", "VPICKEV_B", "VPICKOD_B", + "VILVL_B", "VILVH_B"] in + def : Pat<(deriveLSXIntrinsic.ret + (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), + (!cast(Inst) LSX128:$vj, LSX128:$vk)>; +foreach Inst = ["VSADD_H", "VSADD_HU", "VSSUB_H", "VSSUB_HU", + "VHADDW_W_H", "VHADDW_WU_HU", "VHSUBW_W_H", "VHSUBW_WU_HU", + "VADDWEV_W_H", "VADDWOD_W_H", "VSUBWEV_W_H", "VSUBWOD_W_H", + "VADDWEV_W_HU", "VADDWOD_W_HU", "VSUBWEV_W_HU", "VSUBWOD_W_HU", + "VADDWEV_W_HU_H", "VADDWOD_W_HU_H", + "VAVG_H", "VAVG_HU", "VAVGR_H", "VAVGR_HU", + "VABSD_H", "VABSD_HU", "VADDA_H", "VMUH_H", "VMUH_HU", + "VMULWEV_W_H", "VMULWOD_W_H", "VMULWEV_W_HU", "VMULWOD_W_HU", + "VMULWEV_W_HU_H", "VMULWOD_W_HU_H", "VSIGNCOV_H", "VROTR_H", + "VSRLR_H", "VSRAR_H", "VSRLN_B_H", "VSRAN_B_H", "VSRLRN_B_H", + "VSRARN_B_H", "VSSRLN_B_H", "VSSRAN_B_H", "VSSRLN_BU_H", + "VSSRAN_BU_H", "VSSRLRN_B_H", "VSSRARN_B_H", "VSSRLRN_BU_H", + "VSSRARN_BU_H", + "VSEQ_H", "VSLE_H", "VSLE_HU", "VSLT_H", "VSLT_HU", + "VPACKEV_H", "VPACKOD_H", "VPICKEV_H", "VPICKOD_H", + "VILVL_H", "VILVH_H"] in + def : Pat<(deriveLSXIntrinsic.ret + (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), + (!cast(Inst) LSX128:$vj, LSX128:$vk)>; +foreach Inst = ["VSADD_W", "VSADD_WU", "VSSUB_W", "VSSUB_WU", + "VHADDW_D_W", "VHADDW_DU_WU", "VHSUBW_D_W", "VHSUBW_DU_WU", + "VADDWEV_D_W", "VADDWOD_D_W", "VSUBWEV_D_W", "VSUBWOD_D_W", + "VADDWEV_D_WU", "VADDWOD_D_WU", "VSUBWEV_D_WU", "VSUBWOD_D_WU", + "VADDWEV_D_WU_W", "VADDWOD_D_WU_W", + "VAVG_W", "VAVG_WU", "VAVGR_W", "VAVGR_WU", + "VABSD_W", "VABSD_WU", "VADDA_W", "VMUH_W", "VMUH_WU", + "VMULWEV_D_W", "VMULWOD_D_W", "VMULWEV_D_WU", "VMULWOD_D_WU", + "VMULWEV_D_WU_W", "VMULWOD_D_WU_W", "VSIGNCOV_W", "VROTR_W", + "VSRLR_W", "VSRAR_W", "VSRLN_H_W", "VSRAN_H_W", "VSRLRN_H_W", + "VSRARN_H_W", "VSSRLN_H_W", "VSSRAN_H_W", "VSSRLN_HU_W", + "VSSRAN_HU_W", "VSSRLRN_H_W", "VSSRARN_H_W", "VSSRLRN_HU_W", + "VSSRARN_HU_W", + "VSEQ_W", "VSLE_W", "VSLE_WU", "VSLT_W", "VSLT_WU", + "VPACKEV_W", "VPACKOD_W", "VPICKEV_W", "VPICKOD_W", + "VILVL_W", "VILVH_W"] in + def : Pat<(deriveLSXIntrinsic.ret + (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)), + (!cast(Inst) LSX128:$vj, LSX128:$vk)>; +foreach Inst = ["VADD_Q", "VSUB_Q", + "VSADD_D", "VSADD_DU", "VSSUB_D", "VSSUB_DU", + "VHADDW_Q_D", "VHADDW_QU_DU", "VHSUBW_Q_D", "VHSUBW_QU_DU", + "VADDWEV_Q_D", "VADDWOD_Q_D", "VSUBWEV_Q_D", "VSUBWOD_Q_D", + "VADDWEV_Q_DU", "VADDWOD_Q_DU", "VSUBWEV_Q_DU", "VSUBWOD_Q_DU", + "VADDWEV_Q_DU_D", "VADDWOD_Q_DU_D", + "VAVG_D", "VAVG_DU", "VAVGR_D", "VAVGR_DU", + "VABSD_D", "VABSD_DU", "VADDA_D", "VMUH_D", "VMUH_DU", + "VMULWEV_Q_D", "VMULWOD_Q_D", "VMULWEV_Q_DU", "VMULWOD_Q_DU", + "VMULWEV_Q_DU_D", "VMULWOD_Q_DU_D", "VSIGNCOV_D", "VROTR_D", + "VSRLR_D", "VSRAR_D", "VSRLN_W_D", "VSRAN_W_D", "VSRLRN_W_D", + "VSRARN_W_D", "VSSRLN_W_D", "VSSRAN_W_D", "VSSRLN_WU_D", + "VSSRAN_WU_D", "VSSRLRN_W_D", "VSSRARN_W_D", "VSSRLRN_WU_D", + "VSSRARN_WU_D", "VFFINT_S_L", + "VSEQ_D", "VSLE_D", "VSLE_DU", "VSLT_D", "VSLT_DU", + "VPACKEV_D", "VPACKOD_D", "VPICKEV_D", "VPICKOD_D", + "VILVL_D", "VILVH_D"] in + def : Pat<(deriveLSXIntrinsic.ret + (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)), + (!cast(Inst) LSX128:$vj, LSX128:$vk)>; + +// vty: v16i8/v8i16/v4i32/v2i64 +// Pat<(Intrinsic vty:$vd, vty:$vj, vty:$vk), +// (LAInst vty:$vd, vty:$vj, vty:$vk)>; +foreach Inst = ["VMADDWEV_H_B", "VMADDWOD_H_B", "VMADDWEV_H_BU", + "VMADDWOD_H_BU", "VMADDWEV_H_BU_B", "VMADDWOD_H_BU_B"] in + def : Pat<(deriveLSXIntrinsic.ret + (v8i16 LSX128:$vd), (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), + (!cast(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>; +foreach Inst = ["VMADDWEV_W_H", "VMADDWOD_W_H", "VMADDWEV_W_HU", + "VMADDWOD_W_HU", "VMADDWEV_W_HU_H", "VMADDWOD_W_HU_H"] in + def : Pat<(deriveLSXIntrinsic.ret + (v4i32 LSX128:$vd), (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), + (!cast(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>; +foreach Inst = ["VMADDWEV_D_W", "VMADDWOD_D_W", "VMADDWEV_D_WU", + "VMADDWOD_D_WU", "VMADDWEV_D_WU_W", "VMADDWOD_D_WU_W"] in + def : Pat<(deriveLSXIntrinsic.ret + (v2i64 LSX128:$vd), (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)), + (!cast(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>; +foreach Inst = ["VMADDWEV_Q_D", "VMADDWOD_Q_D", "VMADDWEV_Q_DU", + "VMADDWOD_Q_DU", "VMADDWEV_Q_DU_D", "VMADDWOD_Q_DU_D"] in + def : Pat<(deriveLSXIntrinsic.ret + (v2i64 LSX128:$vd), (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)), + (!cast(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>; + +// vty: v16i8/v8i16/v4i32/v2i64 +// Pat<(Intrinsic vty:$vj), +// (LAInst vty:$vj)>; +foreach Inst = ["VEXTH_H_B", "VEXTH_HU_BU", + "VMSKLTZ_B", "VMSKGEZ_B", "VMSKNZ_B", + "VCLO_B"] in + def : Pat<(deriveLSXIntrinsic.ret (v16i8 LSX128:$vj)), + (!cast(Inst) LSX128:$vj)>; +foreach Inst = ["VEXTH_W_H", "VEXTH_WU_HU", "VMSKLTZ_H", + "VCLO_H", "VFCVTL_S_H", "VFCVTH_S_H"] in + def : Pat<(deriveLSXIntrinsic.ret (v8i16 LSX128:$vj)), + (!cast(Inst) LSX128:$vj)>; +foreach Inst = ["VEXTH_D_W", "VEXTH_DU_WU", "VMSKLTZ_W", + "VCLO_W", "VFFINT_S_W", "VFFINT_S_WU", + "VFFINTL_D_W", "VFFINTH_D_W"] in + def : Pat<(deriveLSXIntrinsic.ret (v4i32 LSX128:$vj)), + (!cast(Inst) LSX128:$vj)>; +foreach Inst = ["VEXTH_Q_D", "VEXTH_QU_DU", "VMSKLTZ_D", + "VEXTL_Q_D", "VEXTL_QU_DU", + "VCLO_D", "VFFINT_D_L", "VFFINT_D_LU"] in + def : Pat<(deriveLSXIntrinsic.ret (v2i64 LSX128:$vj)), + (!cast(Inst) LSX128:$vj)>; + +// Pat<(Intrinsic timm:$imm) +// (LAInst timm:$imm)>; +def : Pat<(int_loongarch_lsx_vldi timm:$imm), + (VLDI (to_valid_timm timm:$imm))>; +foreach Inst = ["VREPLI_B", "VREPLI_H", "VREPLI_W", "VREPLI_D"] in + def : Pat<(deriveLSXIntrinsic.ret timm:$imm), + (!cast("Pseudo"#Inst) (to_valid_timm timm:$imm))>; + +// vty: v16i8/v8i16/v4i32/v2i64 +// Pat<(Intrinsic vty:$vj, timm:$imm) +// (LAInst vty:$vj, timm:$imm)>; +foreach Inst = ["VSAT_B", "VSAT_BU", "VNORI_B", "VROTRI_B", "VSLLWIL_H_B", + "VSLLWIL_HU_BU", "VSRLRI_B", "VSRARI_B", + "VSEQI_B", "VSLEI_B", "VSLEI_BU", "VSLTI_B", "VSLTI_BU", + "VREPLVEI_B", "VBSLL_V", "VBSRL_V", "VSHUF4I_B"] in + def : Pat<(deriveLSXIntrinsic.ret (v16i8 LSX128:$vj), timm:$imm), + (!cast(Inst) LSX128:$vj, (to_valid_timm timm:$imm))>; +foreach Inst = ["VSAT_H", "VSAT_HU", "VROTRI_H", "VSLLWIL_W_H", + "VSLLWIL_WU_HU", "VSRLRI_H", "VSRARI_H", + "VSEQI_H", "VSLEI_H", "VSLEI_HU", "VSLTI_H", "VSLTI_HU", + "VREPLVEI_H", "VSHUF4I_H"] in + def : Pat<(deriveLSXIntrinsic.ret (v8i16 LSX128:$vj), timm:$imm), + (!cast(Inst) LSX128:$vj, (to_valid_timm timm:$imm))>; +foreach Inst = ["VSAT_W", "VSAT_WU", "VROTRI_W", "VSLLWIL_D_W", + "VSLLWIL_DU_WU", "VSRLRI_W", "VSRARI_W", + "VSEQI_W", "VSLEI_W", "VSLEI_WU", "VSLTI_W", "VSLTI_WU", + "VREPLVEI_W", "VSHUF4I_W"] in + def : Pat<(deriveLSXIntrinsic.ret (v4i32 LSX128:$vj), timm:$imm), + (!cast(Inst) LSX128:$vj, (to_valid_timm timm:$imm))>; +foreach Inst = ["VSAT_D", "VSAT_DU", "VROTRI_D", "VSRLRI_D", "VSRARI_D", + "VSEQI_D", "VSLEI_D", "VSLEI_DU", "VSLTI_D", "VSLTI_DU", + "VPICKVE2GR_D", "VPICKVE2GR_DU", + "VREPLVEI_D"] in + def : Pat<(deriveLSXIntrinsic.ret (v2i64 LSX128:$vj), timm:$imm), + (!cast(Inst) LSX128:$vj, (to_valid_timm timm:$imm))>; + +// vty: v16i8/v8i16/v4i32/v2i64 +// Pat<(Intrinsic vty:$vd, vty:$vj, timm:$imm) +// (LAInst vty:$vd, vty:$vj, timm:$imm)>; +foreach Inst = ["VSRLNI_B_H", "VSRANI_B_H", "VSRLRNI_B_H", "VSRARNI_B_H", + "VSSRLNI_B_H", "VSSRANI_B_H", "VSSRLNI_BU_H", "VSSRANI_BU_H", + "VSSRLRNI_B_H", "VSSRARNI_B_H", "VSSRLRNI_BU_H", "VSSRARNI_BU_H", + "VFRSTPI_B", "VBITSELI_B", "VEXTRINS_B"] in + def : Pat<(deriveLSXIntrinsic.ret + (v16i8 LSX128:$vd), (v16i8 LSX128:$vj), timm:$imm), + (!cast(Inst) LSX128:$vd, LSX128:$vj, + (to_valid_timm timm:$imm))>; +foreach Inst = ["VSRLNI_H_W", "VSRANI_H_W", "VSRLRNI_H_W", "VSRARNI_H_W", + "VSSRLNI_H_W", "VSSRANI_H_W", "VSSRLNI_HU_W", "VSSRANI_HU_W", + "VSSRLRNI_H_W", "VSSRARNI_H_W", "VSSRLRNI_HU_W", "VSSRARNI_HU_W", + "VFRSTPI_H", "VEXTRINS_H"] in + def : Pat<(deriveLSXIntrinsic.ret + (v8i16 LSX128:$vd), (v8i16 LSX128:$vj), timm:$imm), + (!cast(Inst) LSX128:$vd, LSX128:$vj, + (to_valid_timm timm:$imm))>; +foreach Inst = ["VSRLNI_W_D", "VSRANI_W_D", "VSRLRNI_W_D", "VSRARNI_W_D", + "VSSRLNI_W_D", "VSSRANI_W_D", "VSSRLNI_WU_D", "VSSRANI_WU_D", + "VSSRLRNI_W_D", "VSSRARNI_W_D", "VSSRLRNI_WU_D", "VSSRARNI_WU_D", + "VPERMI_W", "VEXTRINS_W"] in + def : Pat<(deriveLSXIntrinsic.ret + (v4i32 LSX128:$vd), (v4i32 LSX128:$vj), timm:$imm), + (!cast(Inst) LSX128:$vd, LSX128:$vj, + (to_valid_timm timm:$imm))>; +foreach Inst = ["VSRLNI_D_Q", "VSRANI_D_Q", "VSRLRNI_D_Q", "VSRARNI_D_Q", + "VSSRLNI_D_Q", "VSSRANI_D_Q", "VSSRLNI_DU_Q", "VSSRANI_DU_Q", + "VSSRLRNI_D_Q", "VSSRARNI_D_Q", "VSSRLRNI_DU_Q", "VSSRARNI_DU_Q", + "VSHUF4I_D", "VEXTRINS_D"] in + def : Pat<(deriveLSXIntrinsic.ret + (v2i64 LSX128:$vd), (v2i64 LSX128:$vj), timm:$imm), + (!cast(Inst) LSX128:$vd, LSX128:$vj, + (to_valid_timm timm:$imm))>; + +// vty: v16i8/v8i16/v4i32/v2i64 +// Pat<(Intrinsic vty:$vd, vty:$vj, vty:$vk), +// (LAInst vty:$vd, vty:$vj, vty:$vk)>; +foreach Inst = ["VFRSTP_B", "VBITSEL_V", "VSHUF_B"] in + def : Pat<(deriveLSXIntrinsic.ret + (v16i8 LSX128:$vd), (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), + (!cast(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>; +foreach Inst = ["VFRSTP_H", "VSHUF_H"] in + def : Pat<(deriveLSXIntrinsic.ret + (v8i16 LSX128:$vd), (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), + (!cast(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>; +def : Pat<(int_loongarch_lsx_vshuf_w (v4i32 LSX128:$vd), (v4i32 LSX128:$vj), + (v4i32 LSX128:$vk)), + (VSHUF_W LSX128:$vd, LSX128:$vj, LSX128:$vk)>; +def : Pat<(int_loongarch_lsx_vshuf_d (v2i64 LSX128:$vd), (v2i64 LSX128:$vj), + (v2i64 LSX128:$vk)), + (VSHUF_D LSX128:$vd, LSX128:$vj, LSX128:$vk)>; + +// vty: v4f32/v2f64 +// Pat<(Intrinsic vty:$vj, vty:$vk, vty:$va), +// (LAInst vty:$vj, vty:$vk, vty:$va)>; +foreach Inst = ["VFMSUB_S", "VFNMADD_S", "VFNMSUB_S"] in + def : Pat<(deriveLSXIntrinsic.ret + (v4f32 LSX128:$vj), (v4f32 LSX128:$vk), (v4f32 LSX128:$va)), + (!cast(Inst) LSX128:$vj, LSX128:$vk, LSX128:$va)>; +foreach Inst = ["VFMSUB_D", "VFNMADD_D", "VFNMSUB_D"] in + def : Pat<(deriveLSXIntrinsic.ret + (v2f64 LSX128:$vj), (v2f64 LSX128:$vk), (v2f64 LSX128:$va)), + (!cast(Inst) LSX128:$vj, LSX128:$vk, LSX128:$va)>; + +// vty: v4f32/v2f64 +// Pat<(Intrinsic vty:$vj, vty:$vk), +// (LAInst vty:$vj, vty:$vk)>; +foreach Inst = ["VFMAX_S", "VFMIN_S", "VFMAXA_S", "VFMINA_S", "VFCVT_H_S", + "VFCMP_CAF_S", "VFCMP_CUN_S", "VFCMP_CEQ_S", "VFCMP_CUEQ_S", + "VFCMP_CLT_S", "VFCMP_CULT_S", "VFCMP_CLE_S", "VFCMP_CULE_S", + "VFCMP_CNE_S", "VFCMP_COR_S", "VFCMP_CUNE_S", + "VFCMP_SAF_S", "VFCMP_SUN_S", "VFCMP_SEQ_S", "VFCMP_SUEQ_S", + "VFCMP_SLT_S", "VFCMP_SULT_S", "VFCMP_SLE_S", "VFCMP_SULE_S", + "VFCMP_SNE_S", "VFCMP_SOR_S", "VFCMP_SUNE_S"] in + def : Pat<(deriveLSXIntrinsic.ret + (v4f32 LSX128:$vj), (v4f32 LSX128:$vk)), + (!cast(Inst) LSX128:$vj, LSX128:$vk)>; +foreach Inst = ["VFMAX_D", "VFMIN_D", "VFMAXA_D", "VFMINA_D", "VFCVT_S_D", + "VFTINTRNE_W_D", "VFTINTRZ_W_D", "VFTINTRP_W_D", "VFTINTRM_W_D", + "VFTINT_W_D", + "VFCMP_CAF_D", "VFCMP_CUN_D", "VFCMP_CEQ_D", "VFCMP_CUEQ_D", + "VFCMP_CLT_D", "VFCMP_CULT_D", "VFCMP_CLE_D", "VFCMP_CULE_D", + "VFCMP_CNE_D", "VFCMP_COR_D", "VFCMP_CUNE_D", + "VFCMP_SAF_D", "VFCMP_SUN_D", "VFCMP_SEQ_D", "VFCMP_SUEQ_D", + "VFCMP_SLT_D", "VFCMP_SULT_D", "VFCMP_SLE_D", "VFCMP_SULE_D", + "VFCMP_SNE_D", "VFCMP_SOR_D", "VFCMP_SUNE_D"] in + def : Pat<(deriveLSXIntrinsic.ret + (v2f64 LSX128:$vj), (v2f64 LSX128:$vk)), + (!cast(Inst) LSX128:$vj, LSX128:$vk)>; + +// vty: v4f32/v2f64 +// Pat<(Intrinsic vty:$vj), +// (LAInst vty:$vj)>; +foreach Inst = ["VFLOGB_S", "VFCLASS_S", "VFSQRT_S", "VFRECIP_S", "VFRSQRT_S", + "VFRINT_S", "VFCVTL_D_S", "VFCVTH_D_S", + "VFRINTRNE_S", "VFRINTRZ_S", "VFRINTRP_S", "VFRINTRM_S", + "VFTINTRNE_W_S", "VFTINTRZ_W_S", "VFTINTRP_W_S", "VFTINTRM_W_S", + "VFTINT_W_S", "VFTINTRZ_WU_S", "VFTINT_WU_S", + "VFTINTRNEL_L_S", "VFTINTRNEH_L_S", "VFTINTRZL_L_S", + "VFTINTRZH_L_S", "VFTINTRPL_L_S", "VFTINTRPH_L_S", + "VFTINTRML_L_S", "VFTINTRMH_L_S", "VFTINTL_L_S", + "VFTINTH_L_S"] in + def : Pat<(deriveLSXIntrinsic.ret (v4f32 LSX128:$vj)), + (!cast(Inst) LSX128:$vj)>; +foreach Inst = ["VFLOGB_D", "VFCLASS_D", "VFSQRT_D", "VFRECIP_D", "VFRSQRT_D", + "VFRINT_D", + "VFRINTRNE_D", "VFRINTRZ_D", "VFRINTRP_D", "VFRINTRM_D", + "VFTINTRNE_L_D", "VFTINTRZ_L_D", "VFTINTRP_L_D", "VFTINTRM_L_D", + "VFTINT_L_D", "VFTINTRZ_LU_D", "VFTINT_LU_D"] in + def : Pat<(deriveLSXIntrinsic.ret (v2f64 LSX128:$vj)), + (!cast(Inst) LSX128:$vj)>; + +// load +def : Pat<(int_loongarch_lsx_vld GPR:$rj, timm:$imm), + (VLD GPR:$rj, (to_valid_timm timm:$imm))>; +def : Pat<(int_loongarch_lsx_vldx GPR:$rj, GPR:$rk), + (VLDX GPR:$rj, GPR:$rk)>; + +def : Pat<(int_loongarch_lsx_vldrepl_b GPR:$rj, timm:$imm), + (VLDREPL_B GPR:$rj, (to_valid_timm timm:$imm))>; +def : Pat<(int_loongarch_lsx_vldrepl_h GPR:$rj, timm:$imm), + (VLDREPL_H GPR:$rj, (to_valid_timm timm:$imm))>; +def : Pat<(int_loongarch_lsx_vldrepl_w GPR:$rj, timm:$imm), + (VLDREPL_W GPR:$rj, (to_valid_timm timm:$imm))>; +def : Pat<(int_loongarch_lsx_vldrepl_d GPR:$rj, timm:$imm), + (VLDREPL_D GPR:$rj, (to_valid_timm timm:$imm))>; + +// store +def : Pat<(int_loongarch_lsx_vst LSX128:$vd, GPR:$rj, timm:$imm), + (VST LSX128:$vd, GPR:$rj, (to_valid_timm timm:$imm))>; +def : Pat<(int_loongarch_lsx_vstx LSX128:$vd, GPR:$rj, GPR:$rk), + (VSTX LSX128:$vd, GPR:$rj, GPR:$rk)>; + +def : Pat<(int_loongarch_lsx_vstelm_b v16i8:$vd, GPR:$rj, timm:$imm, timm:$idx), + (VSTELM_B v16i8:$vd, GPR:$rj, (to_valid_timm timm:$imm), + (to_valid_timm timm:$idx))>; +def : Pat<(int_loongarch_lsx_vstelm_h v8i16:$vd, GPR:$rj, timm:$imm, timm:$idx), + (VSTELM_H v8i16:$vd, GPR:$rj, (to_valid_timm timm:$imm), + (to_valid_timm timm:$idx))>; +def : Pat<(int_loongarch_lsx_vstelm_w v4i32:$vd, GPR:$rj, timm:$imm, timm:$idx), + (VSTELM_W v4i32:$vd, GPR:$rj, (to_valid_timm timm:$imm), + (to_valid_timm timm:$idx))>; +def : Pat<(int_loongarch_lsx_vstelm_d v2i64:$vd, GPR:$rj, timm:$imm, timm:$idx), + (VSTELM_D v2i64:$vd, GPR:$rj, (to_valid_timm timm:$imm), + (to_valid_timm timm:$idx))>; + } // Predicates = [HasExtLSX] diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp index 257b947a3ce43650d76d1d13ff1085681a4d2dda..092b5f1fb4426127af2e615d2e426e9c094f73c8 100644 --- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp @@ -15,6 +15,7 @@ #include "LoongArch.h" #include "LoongArchInstrInfo.h" #include "LoongArchSubtarget.h" +#include "MCTargetDesc/LoongArchBaseInfo.h" #include "MCTargetDesc/LoongArchMCTargetDesc.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -194,3 +195,25 @@ bool LoongArchRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed()); return false; } + +bool LoongArchRegisterInfo::canRealignStack(const MachineFunction &MF) const { + if (!TargetRegisterInfo::canRealignStack(MF)) + return false; + + const MachineRegisterInfo *MRI = &MF.getRegInfo(); + const LoongArchFrameLowering *TFI = getFrameLowering(MF); + + // Stack realignment requires a frame pointer. If we already started + // register allocation with frame pointer elimination, it is too late now. + if (!MRI->canReserveReg(LoongArch::R22)) + return false; + + // We may also need a base pointer if there are dynamic allocas or stack + // pointer adjustments around calls. + if (TFI->hasReservedCallFrame(MF)) + return true; + + // A base pointer is required and allowed. Check that it isn't too late to + // reserve it. + return MRI->canReserveReg(LoongArchABI::getBPReg()); +} diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h index 7e8f26b1409765244e81837765e80c408d1f1ede..d1e40254c2972ef06e3827189d51b1e0602043ad 100644 --- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h +++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h @@ -51,6 +51,7 @@ struct LoongArchRegisterInfo : public LoongArchGenRegisterInfo { bool requiresFrameIndexScavenging(const MachineFunction &MF) const override { return true; } + bool canRealignStack(const MachineFunction &MF) const override; }; } // end namespace llvm diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h index 5c173675cca4ccb0a8a89b8cee07659af9f3eea5..174e4cba83263348ee0564a3e81349790e708078 100644 --- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h +++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h @@ -44,6 +44,7 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo { bool HasLaLocalWithAbs = false; bool HasUAL = false; bool HasLinkerRelax = false; + bool HasExpAutoVec = false; unsigned GRLen = 32; MVT GRLenVT = MVT::i32; LoongArchABI::ABI TargetABI = LoongArchABI::ABI_Unknown; @@ -102,6 +103,7 @@ public: bool hasLaLocalWithAbs() const { return HasLaLocalWithAbs; } bool hasUAL() const { return HasUAL; } bool hasLinkerRelax() const { return HasLinkerRelax; } + bool hasExpAutoVec() const { return HasExpAutoVec; } MVT getGRLenVT() const { return GRLenVT; } unsigned getGRLen() const { return GRLen; } LoongArchABI::ABI getTargetABI() const { return TargetABI; } diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp index a6de86eea1166e3bd491d0ecb8536c549721a99e..d47dded9ea6ecf260c773cd6ac9684ae8a191938 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp @@ -19,4 +19,25 @@ using namespace llvm; #define DEBUG_TYPE "loongarchtti" +TypeSize LoongArchTTIImpl::getRegisterBitWidth( + TargetTransformInfo::RegisterKind K) const { + TypeSize DefSize = TargetTransformInfoImplBase::getRegisterBitWidth(K); + switch (K) { + case TargetTransformInfo::RGK_Scalar: + return TypeSize::getFixed(ST->is64Bit() ? 64 : 32); + case TargetTransformInfo::RGK_FixedWidthVector: + if (!ST->hasExpAutoVec()) + return DefSize; + if (ST->hasExtLASX()) + return TypeSize::getFixed(256); + if (ST->hasExtLSX()) + return TypeSize::getFixed(128); + [[fallthrough]]; + case TargetTransformInfo::RGK_ScalableVector: + return DefSize; + } + + llvm_unreachable("Unsupported register kind"); +} + // TODO: Implement more hooks to provide TTI machinery for LoongArch. diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h index 9e02f793ba8a91e3f777cdfd4a06118cfdc718c5..d296c9ed576fbd397db6d9b066bf0540dee2491f 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h +++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h @@ -39,6 +39,8 @@ public: : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} + TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const; + // TODO: Implement more hooks to provide TTI machinery for LoongArch. }; diff --git a/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll b/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll new file mode 100644 index 0000000000000000000000000000000000000000..af24ae64b7c741ad693855889f5d0adc3d29d465 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll @@ -0,0 +1,85 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --frame-pointer=none --mattr=+lasx < %s | FileCheck %s + +;; This test is checking that when a function allows stack realignment and +;; realignment needs were not detected before register allocation (at this +;; point, fp is not preserved), but realignment is required during register +;; allocation, the stack should not undergo realignment. + +;; Ensure that the `bstrins.d $sp, $zero, n, 0` instruction is not generated. +;; n = log2(realign_size) - 1 + +%struct.S = type { [64 x i16] } + +define dso_local noundef signext i32 @main() nounwind { +; CHECK-LABEL: main: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -272 +; CHECK-NEXT: st.d $ra, $sp, 264 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 256 # 8-byte Folded Spill +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI0_0) +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvst $xr0, $sp, 96 # 32-byte Folded Spill +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_1) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI0_1) +; CHECK-NEXT: xvld $xr1, $a0, 0 +; CHECK-NEXT: xvst $xr1, $sp, 64 # 32-byte Folded Spill +; CHECK-NEXT: xvst $xr1, $sp, 224 +; CHECK-NEXT: xvst $xr0, $sp, 192 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_2) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI0_2) +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill +; CHECK-NEXT: xvst $xr0, $sp, 160 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_3) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI0_3) +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 # 32-byte Folded Spill +; CHECK-NEXT: xvst $xr0, $sp, 128 +; CHECK-NEXT: addi.d $fp, $sp, 128 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(foo) +; CHECK-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload +; CHECK-NEXT: xvst $xr0, $sp, 224 +; CHECK-NEXT: xvld $xr0, $sp, 96 # 32-byte Folded Reload +; CHECK-NEXT: xvst $xr0, $sp, 192 +; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload +; CHECK-NEXT: xvst $xr0, $sp, 160 +; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload +; CHECK-NEXT: xvst $xr0, $sp, 128 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(bar) +; CHECK-NEXT: move $a0, $zero +; CHECK-NEXT: ld.d $fp, $sp, 256 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 264 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 272 +; CHECK-NEXT: ret +entry: + %s = alloca %struct.S, align 2 + call void @llvm.lifetime.start.p0(i64 128, ptr nonnull %s) + store <16 x i16> , ptr %s, align 2 + %0 = getelementptr inbounds [64 x i16], ptr %s, i64 0, i64 16 + store <16 x i16> , ptr %0, align 2 + %1 = getelementptr inbounds [64 x i16], ptr %s, i64 0, i64 32 + store <16 x i16> , ptr %1, align 2 + %2 = getelementptr inbounds [64 x i16], ptr %s, i64 0, i64 48 + store <16 x i16> , ptr %2, align 2 + call void @foo(ptr noundef nonnull %s) + store <16 x i16> , ptr %s, align 2 + %3 = getelementptr inbounds [64 x i16], ptr %s, i64 0, i64 16 + store <16 x i16> , ptr %3, align 2 + %4 = getelementptr inbounds [64 x i16], ptr %s, i64 0, i64 32 + store <16 x i16> , ptr %4, align 2 + %5 = getelementptr inbounds [64 x i16], ptr %s, i64 0, i64 48 + store <16 x i16> , ptr %5, align 2 + call void @bar(ptr noundef nonnull %s) + call void @llvm.lifetime.end.p0(i64 128, ptr nonnull %s) + ret i32 0 +} + +declare void @foo(ptr nocapture noundef) +declare void @bar(ptr nocapture noundef) + +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) diff --git a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll new file mode 100644 index 0000000000000000000000000000000000000000..ae6f31aaec643470e2a7da19447239f75f772264 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll @@ -0,0 +1,551 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @buildvector_v32i8_splat(ptr %dst, i8 %a0) nounwind { +; CHECK-LABEL: buildvector_v32i8_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplgr2vr.b $xr0, $a1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %insert = insertelement <32 x i8> undef, i8 %a0, i8 0 + %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer + store <32 x i8> %splat, ptr %dst + ret void +} + +define void @buildvector_v16i16_splat(ptr %dst, i16 %a0) nounwind { +; CHECK-LABEL: buildvector_v16i16_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplgr2vr.h $xr0, $a1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %insert = insertelement <16 x i16> undef, i16 %a0, i8 0 + %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer + store <16 x i16> %splat, ptr %dst + ret void +} + +define void @buildvector_v8i32_splat(ptr %dst, i32 %a0) nounwind { +; CHECK-LABEL: buildvector_v8i32_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %insert = insertelement <8 x i32> undef, i32 %a0, i8 0 + %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer + store <8 x i32> %splat, ptr %dst + ret void +} + +define void @buildvector_v4i64_splat(ptr %dst, i64 %a0) nounwind { +; CHECK-LABEL: buildvector_v4i64_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplgr2vr.d $xr0, $a1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %insert = insertelement <4 x i64> undef, i64 %a0, i8 0 + %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer + store <4 x i64> %splat, ptr %dst + ret void +} + +define void @buildvector_v8f32_splat(ptr %dst, float %a0) nounwind { +; CHECK-LABEL: buildvector_v8f32_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0 +; CHECK-NEXT: xvreplve0.w $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %insert = insertelement <8 x float> undef, float %a0, i8 0 + %splat = shufflevector <8 x float> %insert, <8 x float> undef, <8 x i32> zeroinitializer + store <8 x float> %splat, ptr %dst + ret void +} + +define void @buildvector_v4f64_splat(ptr %dst, double %a0) nounwind { +; CHECK-LABEL: buildvector_v4f64_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 +; CHECK-NEXT: xvreplve0.d $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %insert = insertelement <4 x double> undef, double %a0, i8 0 + %splat = shufflevector <4 x double> %insert, <4 x double> undef, <4 x i32> zeroinitializer + store <4 x double> %splat, ptr %dst + ret void +} + +define void @buildvector_v32i8_const_splat(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v32i8_const_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrepli.b $xr0, 1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <32 x i8> , ptr %dst + ret void +} + +define void @buildvector_v16i16_const_splat(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v16i16_const_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrepli.h $xr0, 1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <16 x i16> , ptr %dst + ret void +} + +define void @buildvector_v8i32_const_splat(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v8i32_const_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrepli.w $xr0, 1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <8 x i32> , ptr %dst + ret void +} + +define void @buildvector_v4i64_const_splat(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v4i64_const_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrepli.d $xr0, 1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <4 x i64> , ptr %dst + ret void +} + +define void @buildvector_v2f32_const_splat(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v2f32_const_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lu12i.w $a1, 260096 +; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <8 x float> , ptr %dst + ret void +} + +define void @buildvector_v4f64_const_splat(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v4f64_const_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lu52i.d $a1, $zero, 1023 +; CHECK-NEXT: xvreplgr2vr.d $xr0, $a1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <4 x double> , ptr %dst + ret void +} + +define void @buildvector_v32i8_const(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v32i8_const: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI12_0) +; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI12_0) +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <32 x i8> , ptr %dst + ret void +} + +define void @buildvector_v16i16_const(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v16i16_const: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI13_0) +; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI13_0) +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <16 x i16> , ptr %dst + ret void +} + +define void @buildvector_v8i32_const(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v8i32_const: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI14_0) +; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI14_0) +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <8 x i32> , ptr %dst + ret void +} + +define void @buildvector_v4i64_const(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v4i64_const: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI15_0) +; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI15_0) +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <4 x i64> , ptr %dst + ret void +} + +define void @buildvector_v2f32_const(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v2f32_const: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI16_0) +; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI16_0) +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <8 x float> , ptr %dst + ret void +} + +define void @buildvector_v4f64_const(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v4f64_const: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI17_0) +; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI17_0) +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <4 x double> , ptr %dst + ret void +} + +define void @buildvector_v32i8(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15, i8 %a16, i8 %a17, i8 %a18, i8 %a19, i8 %a20, i8 %a21, i8 %a22, i8 %a23, i8 %a24, i8 %a25, i8 %a26, i8 %a27, i8 %a28, i8 %a29, i8 %a30, i8 %a31) nounwind { +; CHECK-LABEL: buildvector_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 0 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a3, 2 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a4, 3 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a5, 4 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a6, 5 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a7, 6 +; CHECK-NEXT: ld.b $a1, $sp, 0 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 7 +; CHECK-NEXT: ld.b $a1, $sp, 8 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 8 +; CHECK-NEXT: ld.b $a1, $sp, 16 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 9 +; CHECK-NEXT: ld.b $a1, $sp, 24 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 10 +; CHECK-NEXT: ld.b $a1, $sp, 32 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 11 +; CHECK-NEXT: ld.b $a1, $sp, 40 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 12 +; CHECK-NEXT: ld.b $a1, $sp, 48 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 13 +; CHECK-NEXT: ld.b $a1, $sp, 56 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 14 +; CHECK-NEXT: ld.b $a1, $sp, 64 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 15 +; CHECK-NEXT: ld.b $a1, $sp, 72 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.b $a1, $sp, 80 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.b $a1, $sp, 88 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 2 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.b $a1, $sp, 96 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 3 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.b $a1, $sp, 104 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 4 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.b $a1, $sp, 112 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 5 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.b $a1, $sp, 120 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 6 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.b $a1, $sp, 128 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 7 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.b $a1, $sp, 136 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 8 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.b $a1, $sp, 144 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 9 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.b $a1, $sp, 152 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 10 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.b $a1, $sp, 160 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 11 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.b $a1, $sp, 168 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 12 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.b $a1, $sp, 176 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 13 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.b $a1, $sp, 184 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 14 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.b $a1, $sp, 192 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 15 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <32 x i8> undef, i8 %a0, i32 0 + %ins1 = insertelement <32 x i8> %ins0, i8 %a1, i32 1 + %ins2 = insertelement <32 x i8> %ins1, i8 %a2, i32 2 + %ins3 = insertelement <32 x i8> %ins2, i8 %a3, i32 3 + %ins4 = insertelement <32 x i8> %ins3, i8 %a4, i32 4 + %ins5 = insertelement <32 x i8> %ins4, i8 %a5, i32 5 + %ins6 = insertelement <32 x i8> %ins5, i8 %a6, i32 6 + %ins7 = insertelement <32 x i8> %ins6, i8 %a7, i32 7 + %ins8 = insertelement <32 x i8> %ins7, i8 %a8, i32 8 + %ins9 = insertelement <32 x i8> %ins8, i8 %a9, i32 9 + %ins10 = insertelement <32 x i8> %ins9, i8 %a10, i32 10 + %ins11 = insertelement <32 x i8> %ins10, i8 %a11, i32 11 + %ins12 = insertelement <32 x i8> %ins11, i8 %a12, i32 12 + %ins13 = insertelement <32 x i8> %ins12, i8 %a13, i32 13 + %ins14 = insertelement <32 x i8> %ins13, i8 %a14, i32 14 + %ins15 = insertelement <32 x i8> %ins14, i8 %a15, i32 15 + %ins16 = insertelement <32 x i8> %ins15, i8 %a16, i32 16 + %ins17 = insertelement <32 x i8> %ins16, i8 %a17, i32 17 + %ins18 = insertelement <32 x i8> %ins17, i8 %a18, i32 18 + %ins19 = insertelement <32 x i8> %ins18, i8 %a19, i32 19 + %ins20 = insertelement <32 x i8> %ins19, i8 %a20, i32 20 + %ins21 = insertelement <32 x i8> %ins20, i8 %a21, i32 21 + %ins22 = insertelement <32 x i8> %ins21, i8 %a22, i32 22 + %ins23 = insertelement <32 x i8> %ins22, i8 %a23, i32 23 + %ins24 = insertelement <32 x i8> %ins23, i8 %a24, i32 24 + %ins25 = insertelement <32 x i8> %ins24, i8 %a25, i32 25 + %ins26 = insertelement <32 x i8> %ins25, i8 %a26, i32 26 + %ins27 = insertelement <32 x i8> %ins26, i8 %a27, i32 27 + %ins28 = insertelement <32 x i8> %ins27, i8 %a28, i32 28 + %ins29 = insertelement <32 x i8> %ins28, i8 %a29, i32 29 + %ins30 = insertelement <32 x i8> %ins29, i8 %a30, i32 30 + %ins31 = insertelement <32 x i8> %ins30, i8 %a31, i32 31 + store <32 x i8> %ins31, ptr %dst + ret void +} + +define void @buildvector_v16i16(ptr %dst, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7, i16 %a8, i16 %a9, i16 %a10, i16 %a11, i16 %a12, i16 %a13, i16 %a14, i16 %a15) nounwind { +; CHECK-LABEL: buildvector_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a3, 2 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a4, 3 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a5, 4 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a6, 5 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a7, 6 +; CHECK-NEXT: ld.h $a1, $sp, 0 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 7 +; CHECK-NEXT: ld.h $a1, $sp, 8 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.h $a1, $sp, 16 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.h $a1, $sp, 24 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 2 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.h $a1, $sp, 32 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 3 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.h $a1, $sp, 40 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 4 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.h $a1, $sp, 48 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 5 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.h $a1, $sp, 56 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 6 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.h $a1, $sp, 64 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 7 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <16 x i16> undef, i16 %a0, i32 0 + %ins1 = insertelement <16 x i16> %ins0, i16 %a1, i32 1 + %ins2 = insertelement <16 x i16> %ins1, i16 %a2, i32 2 + %ins3 = insertelement <16 x i16> %ins2, i16 %a3, i32 3 + %ins4 = insertelement <16 x i16> %ins3, i16 %a4, i32 4 + %ins5 = insertelement <16 x i16> %ins4, i16 %a5, i32 5 + %ins6 = insertelement <16 x i16> %ins5, i16 %a6, i32 6 + %ins7 = insertelement <16 x i16> %ins6, i16 %a7, i32 7 + %ins8 = insertelement <16 x i16> %ins7, i16 %a8, i32 8 + %ins9 = insertelement <16 x i16> %ins8, i16 %a9, i32 9 + %ins10 = insertelement <16 x i16> %ins9, i16 %a10, i32 10 + %ins11 = insertelement <16 x i16> %ins10, i16 %a11, i32 11 + %ins12 = insertelement <16 x i16> %ins11, i16 %a12, i32 12 + %ins13 = insertelement <16 x i16> %ins12, i16 %a13, i32 13 + %ins14 = insertelement <16 x i16> %ins13, i16 %a14, i32 14 + %ins15 = insertelement <16 x i16> %ins14, i16 %a15, i32 15 + store <16 x i16> %ins15, ptr %dst + ret void +} + +define void @buildvector_v8i32(ptr %dst, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) nounwind { +; CHECK-LABEL: buildvector_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 0 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a2, 1 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a3, 2 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a4, 3 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a5, 4 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a6, 5 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a7, 6 +; CHECK-NEXT: ld.w $a1, $sp, 0 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 7 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <8 x i32> undef, i32 %a0, i32 0 + %ins1 = insertelement <8 x i32> %ins0, i32 %a1, i32 1 + %ins2 = insertelement <8 x i32> %ins1, i32 %a2, i32 2 + %ins3 = insertelement <8 x i32> %ins2, i32 %a3, i32 3 + %ins4 = insertelement <8 x i32> %ins3, i32 %a4, i32 4 + %ins5 = insertelement <8 x i32> %ins4, i32 %a5, i32 5 + %ins6 = insertelement <8 x i32> %ins5, i32 %a6, i32 6 + %ins7 = insertelement <8 x i32> %ins6, i32 %a7, i32 7 + store <8 x i32> %ins7, ptr %dst + ret void +} + +define void @buildvector_v4i64(ptr %dst, i64 %a0, i64 %a1, i64 %a2, i64 %a3) nounwind { +; CHECK-LABEL: buildvector_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 0 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a2, 1 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a3, 2 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a4, 3 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <4 x i64> undef, i64 %a0, i32 0 + %ins1 = insertelement <4 x i64> %ins0, i64 %a1, i32 1 + %ins2 = insertelement <4 x i64> %ins1, i64 %a2, i32 2 + %ins3 = insertelement <4 x i64> %ins2, i64 %a3, i32 3 + store <4 x i64> %ins3, ptr %dst + ret void +} + +define void @buildvector_v8f32(ptr %dst, float %a0, float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7) nounwind { +; CHECK-LABEL: buildvector_v8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movfr2gr.s $a1, $fa0 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 0 +; CHECK-NEXT: movfr2gr.s $a1, $fa1 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 1 +; CHECK-NEXT: movfr2gr.s $a1, $fa2 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 2 +; CHECK-NEXT: movfr2gr.s $a1, $fa3 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 3 +; CHECK-NEXT: movfr2gr.s $a1, $fa4 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 4 +; CHECK-NEXT: movfr2gr.s $a1, $fa5 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 5 +; CHECK-NEXT: movfr2gr.s $a1, $fa6 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 6 +; CHECK-NEXT: movfr2gr.s $a1, $fa7 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 7 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <8 x float> undef, float %a0, i32 0 + %ins1 = insertelement <8 x float> %ins0, float %a1, i32 1 + %ins2 = insertelement <8 x float> %ins1, float %a2, i32 2 + %ins3 = insertelement <8 x float> %ins2, float %a3, i32 3 + %ins4 = insertelement <8 x float> %ins3, float %a4, i32 4 + %ins5 = insertelement <8 x float> %ins4, float %a5, i32 5 + %ins6 = insertelement <8 x float> %ins5, float %a6, i32 6 + %ins7 = insertelement <8 x float> %ins6, float %a7, i32 7 + store <8 x float> %ins7, ptr %dst + ret void +} + +define void @buildvector_v4f64(ptr %dst, double %a0, double %a1, double %a2, double %a3) nounwind { +; CHECK-LABEL: buildvector_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movfr2gr.d $a1, $fa0 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 0 +; CHECK-NEXT: movfr2gr.d $a1, $fa1 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 1 +; CHECK-NEXT: movfr2gr.d $a1, $fa2 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 2 +; CHECK-NEXT: movfr2gr.d $a1, $fa3 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 3 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <4 x double> undef, double %a0, i32 0 + %ins1 = insertelement <4 x double> %ins0, double %a1, i32 1 + %ins2 = insertelement <4 x double> %ins1, double %a2, i32 2 + %ins3 = insertelement <4 x double> %ins2, double %a3, i32 3 + store <4 x double> %ins3, ptr %dst + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll b/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll new file mode 100644 index 0000000000000000000000000000000000000000..7786e399c95f40e5a0ccb99b9b922d6304d71724 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll @@ -0,0 +1,115 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @ctpop_v32i8(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctpop_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpcnt.b $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <32 x i8>, ptr %src + %res = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %v) + store <32 x i8> %res, ptr %dst + ret void +} + +define void @ctpop_v16i16(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctpop_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpcnt.h $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <16 x i16>, ptr %src + %res = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %v) + store <16 x i16> %res, ptr %dst + ret void +} + +define void @ctpop_v8i32(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctpop_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpcnt.w $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <8 x i32>, ptr %src + %res = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %v) + store <8 x i32> %res, ptr %dst + ret void +} + +define void @ctpop_v4i64(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctpop_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpcnt.d $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <4 x i64>, ptr %src + %res = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %v) + store <4 x i64> %res, ptr %dst + ret void +} + +define void @ctlz_v32i8(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctlz_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvclz.b $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <32 x i8>, ptr %src + %res = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %v, i1 false) + store <32 x i8> %res, ptr %dst + ret void +} + +define void @ctlz_v16i16(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctlz_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvclz.h $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <16 x i16>, ptr %src + %res = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %v, i1 false) + store <16 x i16> %res, ptr %dst + ret void +} + +define void @ctlz_v8i32(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctlz_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvclz.w $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <8 x i32>, ptr %src + %res = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %v, i1 false) + store <8 x i32> %res, ptr %dst + ret void +} + +define void @ctlz_v4i64(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctlz_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvclz.d $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <4 x i64>, ptr %src + %res = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %v, i1 false) + store <4 x i64> %res, ptr %dst + ret void +} + +declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>) +declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>) +declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) +declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>) +declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>, i1) +declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>, i1) +declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1) +declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) diff --git a/llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll b/llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll new file mode 100644 index 0000000000000000000000000000000000000000..af18c52b096c801a846c741219942d5bdf2db3b8 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll @@ -0,0 +1,804 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=fast < %s \ +; RUN: | FileCheck %s --check-prefix=CONTRACT-FAST +; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=on < %s \ +; RUN: | FileCheck %s --check-prefix=CONTRACT-ON +; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=off < %s \ +; RUN: | FileCheck %s --check-prefix=CONTRACT-OFF + +define void @xvfmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfmadd_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfmadd_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-ON-NEXT: xvfadd.d $xr0, $xr0, $xr1 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfmadd_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-OFF-NEXT: xvfadd.d $xr0, $xr0, $xr1 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %mul = fmul<4 x double> %v0, %v1 + %add = fadd<4 x double> %mul, %v2 + store <4 x double> %add, ptr %res + ret void +} + +define void @xvfmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfmsub_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfmsub_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr0, $xr1 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfmsub_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr0, $xr1 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %mul = fmul<4 x double> %v0, %v1 + %sub = fsub<4 x double> %mul, %v2 + store <4 x double> %sub, ptr %res + ret void +} + +define void @xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfnmadd_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfnmadd_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-ON-NEXT: xvfadd.d $xr0, $xr0, $xr1 +; CONTRACT-ON-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfnmadd_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-OFF-NEXT: xvfadd.d $xr0, $xr0, $xr1 +; CONTRACT-OFF-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %mul = fmul<4 x double> %v0, %v1 + %add = fadd<4 x double> %mul, %v2 + %negadd = fneg<4 x double> %add + store <4 x double> %negadd, ptr %res + ret void +} + +define void @xvfnmadd_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfnmadd_d_nsz: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfnmadd_d_nsz: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-ON-NEXT: xvbitrevi.d $xr1, $xr1, 63 +; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr0, $xr1 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfnmadd_d_nsz: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-OFF-NEXT: xvbitrevi.d $xr1, $xr1, 63 +; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr0, $xr1 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %negv0 = fneg nsz<4 x double> %v0 + %negv2 = fneg nsz<4 x double> %v2 + %mul = fmul nsz<4 x double> %negv0, %v1 + %add = fadd nsz<4 x double> %mul, %negv2 + store <4 x double> %add, ptr %res + ret void +} + +;; Check that xvfnmadd.d is not emitted. +define void @not_xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: not_xvfnmadd_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvbitrevi.d $xr2, $xr2, 63 +; CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: not_xvfnmadd_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-ON-NEXT: xvbitrevi.d $xr1, $xr1, 63 +; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr0, $xr1 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: not_xvfnmadd_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-OFF-NEXT: xvbitrevi.d $xr1, $xr1, 63 +; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr0, $xr1 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %negv0 = fneg<4 x double> %v0 + %negv2 = fneg<4 x double> %v2 + %mul = fmul<4 x double> %negv0, %v1 + %add = fadd<4 x double> %mul, %negv2 + store <4 x double> %add, ptr %res + ret void +} + +define void @xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfnmsub_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfnmsub_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr0, $xr1 +; CONTRACT-ON-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfnmsub_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr0, $xr1 +; CONTRACT-OFF-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %negv2 = fneg<4 x double> %v2 + %mul = fmul<4 x double> %v0, %v1 + %add = fadd<4 x double> %mul, %negv2 + %neg = fneg<4 x double> %add + store <4 x double> %neg, ptr %res + ret void +} + +define void @xvfnmsub_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfnmsub_d_nsz: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfnmsub_d_nsz: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfnmsub_d_nsz: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %negv0 = fneg nsz<4 x double> %v0 + %mul = fmul nsz<4 x double> %negv0, %v1 + %add = fadd nsz<4 x double> %mul, %v2 + store <4 x double> %add, ptr %res + ret void +} + +;; Check that xvfnmsub.d is not emitted. +define void @not_xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: not_xvfnmsub_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvbitrevi.d $xr2, $xr2, 63 +; CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: not_xvfnmsub_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: not_xvfnmsub_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %negv0 = fneg<4 x double> %v0 + %mul = fmul<4 x double> %negv0, %v1 + %add = fadd<4 x double> %mul, %v2 + store <4 x double> %add, ptr %res + ret void +} + +define void @contract_xvfmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_xvfmadd_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_xvfmadd_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_xvfmadd_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %mul = fmul contract <4 x double> %v0, %v1 + %add = fadd contract <4 x double> %mul, %v2 + store <4 x double> %add, ptr %res + ret void +} + +define void @contract_xvfmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_xvfmsub_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_xvfmsub_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_xvfmsub_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %mul = fmul contract <4 x double> %v0, %v1 + %sub = fsub contract <4 x double> %mul, %v2 + store <4 x double> %sub, ptr %res + ret void +} + +define void @contract_xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_xvfnmadd_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_xvfnmadd_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_xvfnmadd_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %mul = fmul contract <4 x double> %v0, %v1 + %add = fadd contract <4 x double> %mul, %v2 + %negadd = fneg contract <4 x double> %add + store <4 x double> %negadd, ptr %res + ret void +} + +define void @contract_xvfnmadd_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_xvfnmadd_d_nsz: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_xvfnmadd_d_nsz: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_xvfnmadd_d_nsz: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %negv0 = fneg contract nsz<4 x double> %v0 + %negv2 = fneg contract nsz<4 x double> %v2 + %mul = fmul contract nsz<4 x double> %negv0, %v1 + %add = fadd contract nsz<4 x double> %mul, %negv2 + store <4 x double> %add, ptr %res + ret void +} + +;; Check that xvfnmadd.d is not emitted. +define void @not_contract_xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: not_contract_xvfnmadd_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvbitrevi.d $xr2, $xr2, 63 +; CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: not_contract_xvfnmadd_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvbitrevi.d $xr2, $xr2, 63 +; CONTRACT-ON-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: not_contract_xvfnmadd_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvbitrevi.d $xr2, $xr2, 63 +; CONTRACT-OFF-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %negv0 = fneg contract <4 x double> %v0 + %negv2 = fneg contract <4 x double> %v2 + %mul = fmul contract <4 x double> %negv0, %v1 + %add = fadd contract <4 x double> %mul, %negv2 + store <4 x double> %add, ptr %res + ret void +} + +define void @contract_xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_xvfnmsub_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_xvfnmsub_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_xvfnmsub_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %negv2 = fneg contract <4 x double> %v2 + %mul = fmul contract <4 x double> %v0, %v1 + %add = fadd contract <4 x double> %mul, %negv2 + %neg = fneg contract <4 x double> %add + store <4 x double> %neg, ptr %res + ret void +} + +define void @contract_xvfnmsub_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_xvfnmsub_d_nsz: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_xvfnmsub_d_nsz: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_xvfnmsub_d_nsz: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %negv0 = fneg contract nsz<4 x double> %v0 + %mul = fmul contract nsz<4 x double> %negv0, %v1 + %add = fadd contract nsz<4 x double> %mul, %v2 + store <4 x double> %add, ptr %res + ret void +} + +;; Check that xvfnmsub.d is not emitted. +define void @not_contract_xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: not_contract_xvfnmsub_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvbitrevi.d $xr2, $xr2, 63 +; CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: not_contract_xvfnmsub_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvbitrevi.d $xr2, $xr2, 63 +; CONTRACT-ON-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: not_contract_xvfnmsub_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvbitrevi.d $xr2, $xr2, 63 +; CONTRACT-OFF-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %negv0 = fneg contract <4 x double> %v0 + %mul = fmul contract <4 x double> %negv0, %v1 + %add = fadd contract <4 x double> %mul, %v2 + store <4 x double> %add, ptr %res + ret void +} + +define void @xvfmadd_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfmadd_d_contract: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfmadd_d_contract: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfmadd_d_contract: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %mul = fmul contract <4 x double> %v0, %v1 + %add = fadd contract <4 x double> %mul, %v2 + store <4 x double> %add, ptr %res + ret void +} + +define void @xvfmsub_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfmsub_d_contract: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfmsub_d_contract: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfmsub_d_contract: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %mul = fmul contract <4 x double> %v0, %v1 + %sub = fsub contract <4 x double> %mul, %v2 + store <4 x double> %sub, ptr %res + ret void +} + +define void @xvfnmadd_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfnmadd_d_contract: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfnmadd_d_contract: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfnmadd_d_contract: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %mul = fmul contract <4 x double> %v0, %v1 + %add = fadd contract <4 x double> %mul, %v2 + %negadd = fneg contract <4 x double> %add + store <4 x double> %negadd, ptr %res + ret void +} + +define void @xvfnmsub_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfnmsub_d_contract: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfnmsub_d_contract: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfnmsub_d_contract: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %mul = fmul contract <4 x double> %v0, %v1 + %negv2 = fneg contract <4 x double> %v2 + %add = fadd contract <4 x double> %negv2, %mul + %negadd = fneg contract <4 x double> %add + store <4 x double> %negadd, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll b/llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll new file mode 100644 index 0000000000000000000000000000000000000000..b7b3cb3a2e665ba654d6ee4bdd73df176f30f9cb --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll @@ -0,0 +1,804 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=fast < %s \ +; RUN: | FileCheck %s --check-prefix=CONTRACT-FAST +; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=on < %s \ +; RUN: | FileCheck %s --check-prefix=CONTRACT-ON +; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=off < %s \ +; RUN: | FileCheck %s --check-prefix=CONTRACT-OFF + +define void @xvfmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfmadd_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfmadd_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-ON-NEXT: xvfadd.s $xr0, $xr0, $xr1 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfmadd_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-OFF-NEXT: xvfadd.s $xr0, $xr0, $xr1 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %mul = fmul<8 x float> %v0, %v1 + %add = fadd<8 x float> %mul, %v2 + store <8 x float> %add, ptr %res + ret void +} + +define void @xvfmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfmsub_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfmsub_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr0, $xr1 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfmsub_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr0, $xr1 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %mul = fmul<8 x float> %v0, %v1 + %sub = fsub<8 x float> %mul, %v2 + store <8 x float> %sub, ptr %res + ret void +} + +define void @xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfnmadd_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfnmadd_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-ON-NEXT: xvfadd.s $xr0, $xr0, $xr1 +; CONTRACT-ON-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfnmadd_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-OFF-NEXT: xvfadd.s $xr0, $xr0, $xr1 +; CONTRACT-OFF-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %mul = fmul<8 x float> %v0, %v1 + %add = fadd<8 x float> %mul, %v2 + %negadd = fneg<8 x float> %add + store <8 x float> %negadd, ptr %res + ret void +} + +define void @xvfnmadd_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfnmadd_s_nsz: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfnmadd_s_nsz: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-ON-NEXT: xvbitrevi.w $xr1, $xr1, 31 +; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr0, $xr1 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfnmadd_s_nsz: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-OFF-NEXT: xvbitrevi.w $xr1, $xr1, 31 +; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr0, $xr1 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %negv0 = fneg nsz<8 x float> %v0 + %negv2 = fneg nsz<8 x float> %v2 + %mul = fmul nsz<8 x float> %negv0, %v1 + %add = fadd nsz<8 x float> %mul, %negv2 + store <8 x float> %add, ptr %res + ret void +} + +;; Check that fnmadd.s is not emitted. +define void @not_xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: not_xvfnmadd_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvbitrevi.w $xr2, $xr2, 31 +; CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: not_xvfnmadd_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-ON-NEXT: xvbitrevi.w $xr1, $xr1, 31 +; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr0, $xr1 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: not_xvfnmadd_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-OFF-NEXT: xvbitrevi.w $xr1, $xr1, 31 +; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr0, $xr1 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %negv0 = fneg<8 x float> %v0 + %negv2 = fneg<8 x float> %v2 + %mul = fmul<8 x float> %negv0, %v1 + %add = fadd<8 x float> %mul, %negv2 + store <8 x float> %add, ptr %res + ret void +} + +define void @xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfnmsub_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfnmsub_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr0, $xr1 +; CONTRACT-ON-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfnmsub_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr0, $xr1 +; CONTRACT-OFF-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %negv2 = fneg<8 x float> %v2 + %mul = fmul<8 x float> %v0, %v1 + %add = fadd<8 x float> %mul, %negv2 + %neg = fneg<8 x float> %add + store <8 x float> %neg, ptr %res + ret void +} + +define void @xvfnmsub_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfnmsub_s_nsz: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfnmsub_s_nsz: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfnmsub_s_nsz: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %negv0 = fneg nsz<8 x float> %v0 + %mul = fmul nsz<8 x float> %negv0, %v1 + %add = fadd nsz<8 x float> %mul, %v2 + store <8 x float> %add, ptr %res + ret void +} + +;; Check that fnmsub.s is not emitted. +define void @not_xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: not_xvfnmsub_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvbitrevi.w $xr2, $xr2, 31 +; CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: not_xvfnmsub_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: not_xvfnmsub_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %negv0 = fneg<8 x float> %v0 + %mul = fmul<8 x float> %negv0, %v1 + %add = fadd<8 x float> %mul, %v2 + store <8 x float> %add, ptr %res + ret void +} + +define void @contract_xvfmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_xvfmadd_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_xvfmadd_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_xvfmadd_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %mul = fmul contract <8 x float> %v0, %v1 + %add = fadd contract <8 x float> %mul, %v2 + store <8 x float> %add, ptr %res + ret void +} + +define void @contract_xvfmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_xvfmsub_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_xvfmsub_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_xvfmsub_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %mul = fmul contract <8 x float> %v0, %v1 + %sub = fsub contract <8 x float> %mul, %v2 + store <8 x float> %sub, ptr %res + ret void +} + +define void @contract_xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_xvfnmadd_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_xvfnmadd_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_xvfnmadd_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %mul = fmul contract <8 x float> %v0, %v1 + %add = fadd contract <8 x float> %mul, %v2 + %negadd = fneg contract <8 x float> %add + store <8 x float> %negadd, ptr %res + ret void +} + +define void @contract_xvfnmadd_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_xvfnmadd_s_nsz: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_xvfnmadd_s_nsz: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_xvfnmadd_s_nsz: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %negv0 = fneg contract nsz<8 x float> %v0 + %negv2 = fneg contract nsz<8 x float> %v2 + %mul = fmul contract nsz<8 x float> %negv0, %v1 + %add = fadd contract nsz<8 x float> %mul, %negv2 + store <8 x float> %add, ptr %res + ret void +} + +;; Check that fnmadd.s is not emitted. +define void @not_contract_xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: not_contract_xvfnmadd_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvbitrevi.w $xr2, $xr2, 31 +; CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: not_contract_xvfnmadd_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvbitrevi.w $xr2, $xr2, 31 +; CONTRACT-ON-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: not_contract_xvfnmadd_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvbitrevi.w $xr2, $xr2, 31 +; CONTRACT-OFF-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %negv0 = fneg contract <8 x float> %v0 + %negv2 = fneg contract <8 x float> %v2 + %mul = fmul contract <8 x float> %negv0, %v1 + %add = fadd contract <8 x float> %mul, %negv2 + store <8 x float> %add, ptr %res + ret void +} + +define void @contract_xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_xvfnmsub_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_xvfnmsub_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_xvfnmsub_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %negv2 = fneg contract <8 x float> %v2 + %mul = fmul contract <8 x float> %v0, %v1 + %add = fadd contract <8 x float> %mul, %negv2 + %neg = fneg contract <8 x float> %add + store <8 x float> %neg, ptr %res + ret void +} + +define void @contract_xvfnmsub_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_xvfnmsub_s_nsz: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_xvfnmsub_s_nsz: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_xvfnmsub_s_nsz: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %negv0 = fneg contract nsz<8 x float> %v0 + %mul = fmul contract nsz<8 x float> %negv0, %v1 + %add = fadd contract nsz<8 x float> %mul, %v2 + store <8 x float> %add, ptr %res + ret void +} + +;; Check that fnmsub.s is not emitted. +define void @not_contract_xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: not_contract_xvfnmsub_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvbitrevi.w $xr2, $xr2, 31 +; CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: not_contract_xvfnmsub_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvbitrevi.w $xr2, $xr2, 31 +; CONTRACT-ON-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: not_contract_xvfnmsub_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvbitrevi.w $xr2, $xr2, 31 +; CONTRACT-OFF-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %negv0 = fneg contract <8 x float> %v0 + %mul = fmul contract <8 x float> %negv0, %v1 + %add = fadd contract <8 x float> %mul, %v2 + store <8 x float> %add, ptr %res + ret void +} + +define void @xvfmadd_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfmadd_s_contract: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfmadd_s_contract: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfmadd_s_contract: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %mul = fmul contract <8 x float> %v0, %v1 + %add = fadd contract <8 x float> %mul, %v2 + store <8 x float> %add, ptr %res + ret void +} + +define void @xvfmsub_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfmsub_s_contract: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfmsub_s_contract: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfmsub_s_contract: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %mul = fmul contract <8 x float> %v0, %v1 + %sub = fsub contract <8 x float> %mul, %v2 + store <8 x float> %sub, ptr %res + ret void +} + +define void @xvfnmadd_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfnmadd_s_contract: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfnmadd_s_contract: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfnmadd_s_contract: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %mul = fmul contract <8 x float> %v0, %v1 + %add = fadd contract <8 x float> %mul, %v2 + %negadd = fneg contract <8 x float> %add + store <8 x float> %negadd, ptr %res + ret void +} + +define void @xvfnmsub_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfnmsub_s_contract: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfnmsub_s_contract: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfnmsub_s_contract: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %mul = fmul contract <8 x float> %v0, %v1 + %negv2 = fneg contract <8 x float> %v2 + %add = fadd contract <8 x float> %negv2, %mul + %negadd = fneg contract <8 x float> %add + store <8 x float> %negadd, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll b/llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll new file mode 100644 index 0000000000000000000000000000000000000000..c4a881bdeae9f1b81c756971141c75fb157b1d38 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +;; fsqrt +define void @sqrt_v8f32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sqrt_v8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvfsqrt.s $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0, align 16 + %sqrt = call <8 x float> @llvm.sqrt.v8f32 (<8 x float> %v0) + store <8 x float> %sqrt, ptr %res, align 16 + ret void +} + +define void @sqrt_v4f64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sqrt_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvfsqrt.d $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0, align 16 + %sqrt = call <4 x double> @llvm.sqrt.v4f64 (<4 x double> %v0) + store <4 x double> %sqrt, ptr %res, align 16 + ret void +} + +;; 1.0 / (fsqrt vec) +define void @one_div_sqrt_v8f32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: one_div_sqrt_v8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvfrsqrt.s $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0, align 16 + %sqrt = call <8 x float> @llvm.sqrt.v8f32 (<8 x float> %v0) + %div = fdiv <8 x float> , %sqrt + store <8 x float> %div, ptr %res, align 16 + ret void +} + +define void @one_div_sqrt_v4f64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: one_div_sqrt_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvfrsqrt.d $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0, align 16 + %sqrt = call <4 x double> @llvm.sqrt.v4f64 (<4 x double> %v0) + %div = fdiv <4 x double> , %sqrt + store <4 x double> %div, ptr %res, align 16 + ret void +} + +declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) +declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-absd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-absd.ll new file mode 100644 index 0000000000000000000000000000000000000000..bf54f44357b03493163cebd7dd094117d47fa2eb --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-absd.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvabsd_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvabsd_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvabsd.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvabsd_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvabsd_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvabsd.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvabsd_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvabsd_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvabsd.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvabsd_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvabsd_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvabsd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvabsd_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvabsd_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvabsd.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvabsd_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvabsd_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvabsd.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvabsd_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvabsd_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvabsd.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvabsd_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvabsd_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvabsd.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-add.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-add.ll new file mode 100644 index 0000000000000000000000000000000000000000..0c2f2ace29fc9384e1b652bb690af9dbfa919680 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-add.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvadd_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvadd_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvadd_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvadd_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvadd_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvadd_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvadd_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvadd_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvadd_q(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvadd_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvadd.q $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-adda.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-adda.ll new file mode 100644 index 0000000000000000000000000000000000000000..c1258d53e913ee81c7df474bada02bdeff3ec4f7 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-adda.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvadda_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvadda_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvadda.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvadda_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvadda_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvadda.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvadda_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvadda_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvadda.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvadda_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvadda_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvadda.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..4998847f091009b07edb0c11bdeca83749f0b524 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvaddi_bu_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvaddi.bu: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvaddi_bu_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvaddi.bu: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> %va, i32 32) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvaddi_hu_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvaddi.hu: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvaddi_hu_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvaddi.hu: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> %va, i32 32) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvaddi_wu_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvaddi.wu: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvaddi_wu_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvaddi.wu: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> %va, i32 32) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvaddi_du_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvaddi.du: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvaddi_du_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvaddi.du: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> %va, i32 32) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..f25f0e61a28e17228403d2147bbf8ad79f56383d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvaddi_bu(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvaddi_hu(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvaddi_wu(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvaddi_du(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi.ll new file mode 100644 index 0000000000000000000000000000000000000000..09b5d07a0151cd401e580ce313edc96e5e43fa84 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvaddi_bu(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvaddi_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddi.bu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvaddi_hu(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvaddi_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddi.hu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvaddi_wu(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvaddi_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddi.wu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvaddi_du(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvaddi_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddi.du $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addw.ll new file mode 100644 index 0000000000000000000000000000000000000000..ef7a1b5a50efb144d28083f20e71a8ec89952672 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addw.ll @@ -0,0 +1,290 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvaddwev_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwev_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwev.h.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvaddwev_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwev_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwev.w.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvaddwev_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwev_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwev.d.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvaddwev_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwev_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwev.q.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvaddwev_h_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwev_h_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwev.h.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvaddwev_w_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwev_w_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwev.w.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvaddwev_d_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwev_d_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwev.d.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvaddwev_q_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwev_q_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwev.q.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvaddwev_h_bu_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwev_h_bu_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwev.h.bu.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvaddwev_w_hu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwev_w_hu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwev.w.hu.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvaddwev_d_wu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwev_d_wu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwev.d.wu.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvaddwev_q_du_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwev_q_du_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwev.q.du.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvaddwod_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwod_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwod.h.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvaddwod_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwod_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwod.w.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvaddwod_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwod_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwod.d.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvaddwod_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwod_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwod.q.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvaddwod_h_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwod_h_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwod.h.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvaddwod_w_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwod_w_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwod.w.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvaddwod_d_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwod_d_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwod.d.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvaddwod_q_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwod_q_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwod.q.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvaddwod_h_bu_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwod_h_bu_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwod.h.bu.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvaddwod_w_hu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwod_w_hu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwod.w.hu.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvaddwod_d_wu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwod_d_wu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwod.d.wu.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvaddwod_q_du_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwod_q_du_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwod.q.du.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-and.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-and.ll new file mode 100644 index 0000000000000000000000000000000000000000..15f3a8094770b050404b8034e02b4a717dcd11f2 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-and.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvand_v(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvand_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..60f0b765f9546e160691bdc110a711be8b4a16ad --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvandi_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvandi.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvandi_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvandi.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> %va, i32 256) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..1273dc6b450b51726f7ded830acee768c09f5e2f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvandi_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi.ll new file mode 100644 index 0000000000000000000000000000000000000000..88cf142d696823758fbfeadf64f685aaf0745a1c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvandi_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvandi_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvandi.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andn.ll new file mode 100644 index 0000000000000000000000000000000000000000..f385ef3661cb9dcac057afc1c1e33a964fd14d21 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andn.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvandn_v(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvandn_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avg.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avg.ll new file mode 100644 index 0000000000000000000000000000000000000000..488d3b96b00384b520d5c84207a7526acc234450 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avg.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvavg_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvavg_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvavg.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvavg_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvavg_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvavg.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvavg_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvavg_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvavg.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvavg_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvavg_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvavg.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvavg_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvavg_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvavg.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvavg_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvavg_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvavg.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvavg_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvavg_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvavg.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvavg_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvavg_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvavg.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avgr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avgr.ll new file mode 100644 index 0000000000000000000000000000000000000000..b5ab5a5366aafe5aa404c3fa3ba68eb94ba2ecd9 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avgr.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvavgr_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvavgr_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvavgr.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvavgr_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvavgr_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvavgr.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvavgr_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvavgr_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvavgr.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvavgr_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvavgr_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvavgr.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvavgr_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvavgr_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvavgr.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvavgr_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvavgr_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvavgr.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvavgr_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvavgr_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvavgr.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvavgr_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvavgr_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvavgr.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..ecc287e89bbc00910982f23989144682936018f6 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvbitclri_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitclri.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvbitclri_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitclri.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> %va, i32 8) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvbitclri_h_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitclri.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvbitclri_h_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitclri.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> %va, i32 16) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvbitclri_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitclri.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvbitclri_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitclri.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> %va, i32 32) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvbitclri_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitclri.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvbitclri_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitclri.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> %va, i32 64) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..09da85411082b67353443116ce8abfd2232595c9 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvbitclri_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvbitclri_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvbitclri_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvbitclri_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr.ll new file mode 100644 index 0000000000000000000000000000000000000000..cec71bab2fe84cdb394c09c0d7de6283c25d50db --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvbitclr_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvbitclr_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitclr.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvbitclr_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvbitclr_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitclr.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvbitclr_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvbitclr_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitclr.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvbitclr_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvbitclr_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitclr.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvbitclri_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvbitclri_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitclri.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvbitclri_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvbitclri_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitclri.h $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvbitclri_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvbitclri_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitclri.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvbitclri_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvbitclri_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitclri.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..dff0884fdd5aa8cefcd53e5c34b7bc01bb9f03b8 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvbitrevi_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitrevi.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvbitrevi_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitrevi.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> %va, i32 8) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvbitrevi_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitrevi.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvbitrevi_h_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitrevi.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> %va, i32 16) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvbitrevi_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitrevi.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvbitrevi_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitrevi.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> %va, i32 32) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvbitrevi_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitrevi.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvbitrevi_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitrevi.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> %va, i32 64) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..e1aef1a82f0c10b06953b6daa0450600bc762cbf --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvbitrevi_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvbitrevi_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvbitrevi_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvbitrevi_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev.ll new file mode 100644 index 0000000000000000000000000000000000000000..fb4f9fbc2e4b39ab85a979ee09135b2466f9da41 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvbitrev_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvbitrev_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitrev.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvbitrev_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvbitrev_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitrev.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvbitrev_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvbitrev_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitrev.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvbitrev_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvbitrev_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitrev.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvbitrevi_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvbitrevi_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitrevi.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvbitrevi_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvbitrevi_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitrevi.h $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvbitrevi_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvbitrevi_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitrevi.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvbitrevi_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvbitrevi_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitrevi.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitsel.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitsel.ll new file mode 100644 index 0000000000000000000000000000000000000000..2e91407590ac16d789270d7f6cb2abca8d8252a9 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitsel.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8>, <32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvbitsel_v(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { +; CHECK-LABEL: lasx_xvbitsel_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitsel.v $xr0, $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..3f6fd44f842c65669c1ec252d7368905104b9e8b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvbitseli_b_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitseli.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> %va, <32 x i8> %vb, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvbitseli_b_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitseli.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> %va, <32 x i8> %vb, i32 256) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..40533ab96d86aa8b43fbecf04bd88bb95a4de470 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvbitseli_b(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> %va, <32 x i8> %vb, i32 %c) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli.ll new file mode 100644 index 0000000000000000000000000000000000000000..79dd55cbfef9881df9753d557c73fb93e9f647a0 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvbitseli_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvbitseli_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitseli.b $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> %va, <32 x i8> %vb, i32 1) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..17a77ece7775b8d4e7f5ebc2add71f882a2025b7 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvbitseti_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitseti.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvbitseti_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitseti.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> %va, i32 8) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvbitseti_h_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitseti.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvbitseti_h_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitseti.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> %va, i32 16) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvbitseti_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitseti.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvbitseti_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitseti.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> %va, i32 32) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvbitseti_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitseti.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvbitseti_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitseti.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> %va, i32 64) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..613285804e0e4b832110f1754c0361d105476f04 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvbitseti_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvbitseti_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvbitseti_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvbitseti_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset.ll new file mode 100644 index 0000000000000000000000000000000000000000..83d1f0ef60c63c04a8f53e28d52ff2a0813734c2 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvbitset_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvbitset_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitset.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvbitset_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvbitset_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitset.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvbitset_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvbitset_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitset.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvbitset_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvbitset_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitset.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvbitseti_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvbitseti_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitseti.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvbitseti_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvbitseti_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitseti.h $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvbitseti_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvbitseti_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitseti.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvbitseti_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvbitseti_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitseti.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..1da08a633bd2b864fac441735216be54986c5721 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8>, i32) + +define <32 x i8> @lasx_xvbsll_v_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbsll.v: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvbsll_v_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbsll.v: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> %va, i32 32) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..e19a3232c179705aaae8d5f8678a7624b5122ed2 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8>, i32) + +define <32 x i8> @lasx_xvbsll_v(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll.ll new file mode 100644 index 0000000000000000000000000000000000000000..cbb63ced5cc00dd0b95f1ebc2e0337daad3a34c0 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8>, i32) + +define <32 x i8> @lasx_xvbsll_v(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvbsll_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbsll.v $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..5d2b63391e677140fc67e56b128ca459595c31fa --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8>, i32) + +define <32 x i8> @lasx_xvbsrl_v_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbsrl.v: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvbsrl_v_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbsrl.v: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> %va, i32 32) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..8dfd0ca579b84f7bcfdffbcf3851062a4aa08ba8 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8>, i32) + +define <32 x i8> @lasx_xvbsrl_v(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl.ll new file mode 100644 index 0000000000000000000000000000000000000000..b0c26cbe3e35c367874cb155c1aa5f464a8d2a23 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8>, i32) + +define <32 x i8> @lasx_xvbsrl_v(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvbsrl_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clo.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clo.ll new file mode 100644 index 0000000000000000000000000000000000000000..29b2be03d54eca44af2cac54e0007bdf0c3c059c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clo.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8>) + +define <32 x i8> @lasx_xvclo_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvclo_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvclo.b $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> %va) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16>) + +define <16 x i16> @lasx_xvclo_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvclo_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvclo.h $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> %va) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32>) + +define <8 x i32> @lasx_xvclo_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvclo_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvclo.w $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64>) + +define <4 x i64> @lasx_xvclo_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvclo_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvclo.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> %va) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clz.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clz.ll new file mode 100644 index 0000000000000000000000000000000000000000..5247ceedbd146ee59edc6398631496c0166247b5 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clz.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8>) + +define <32 x i8> @lasx_xvclz_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvclz_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvclz.b $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> %va) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16>) + +define <16 x i16> @lasx_xvclz_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvclz_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvclz.h $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> %va) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32>) + +define <8 x i32> @lasx_xvclz_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvclz_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvclz.w $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64>) + +define <4 x i64> @lasx_xvclz_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvclz_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvclz.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> %va) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-div.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-div.ll new file mode 100644 index 0000000000000000000000000000000000000000..813204092e944af1d27782d30f2a34d3c2200ca6 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-div.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvdiv_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvdiv_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvdiv.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvdiv_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvdiv_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvdiv.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvdiv_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvdiv_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvdiv.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvdiv_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvdiv_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvdiv.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvdiv_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvdiv_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvdiv.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvdiv_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvdiv_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvdiv.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvdiv_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvdiv_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvdiv.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvdiv_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvdiv_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvdiv.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ext2xv.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ext2xv.ll new file mode 100644 index 0000000000000000000000000000000000000000..48721b52af00913533887e382b3702cce55e3953 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ext2xv.ll @@ -0,0 +1,146 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8>) + +define <16 x i16> @lasx_vext2xv_h_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_vext2xv_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vext2xv.h.b $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> %va) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8>) + +define <8 x i32> @lasx_vext2xv_w_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_vext2xv_w_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vext2xv.w.b $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8>) + +define <4 x i64> @lasx_vext2xv_d_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_vext2xv_d_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vext2xv.d.b $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> %va) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16>) + +define <8 x i32> @lasx_vext2xv_w_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_vext2xv_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vext2xv.w.h $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16>) + +define <4 x i64> @lasx_vext2xv_d_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_vext2xv_d_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vext2xv.d.h $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> %va) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32>) + +define <4 x i64> @lasx_vext2xv_d_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_vext2xv_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vext2xv.d.w $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> %va) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8>) + +define <16 x i16> @lasx_vext2xv_hu_bu(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_vext2xv_hu_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vext2xv.hu.bu $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> %va) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8>) + +define <8 x i32> @lasx_vext2xv_wu_bu(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_vext2xv_wu_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vext2xv.wu.bu $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8>) + +define <4 x i64> @lasx_vext2xv_du_bu(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_vext2xv_du_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vext2xv.du.bu $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> %va) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16>) + +define <8 x i32> @lasx_vext2xv_wu_hu(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_vext2xv_wu_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vext2xv.wu.hu $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16>) + +define <4 x i64> @lasx_vext2xv_du_hu(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_vext2xv_du_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vext2xv.du.hu $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> %va) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32>) + +define <4 x i64> @lasx_vext2xv_du_wu(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_vext2xv_du_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vext2xv.du.wu $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> %va) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-exth.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-exth.ll new file mode 100644 index 0000000000000000000000000000000000000000..543589e61b12f7b5b8b420769694c4beb4212110 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-exth.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8>) + +define <16 x i16> @lasx_xvexth_h_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvexth_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvexth.h.b $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> %va) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16>) + +define <8 x i32> @lasx_xvexth_w_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvexth_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvexth.w.h $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32>) + +define <4 x i64> @lasx_xvexth_d_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvexth_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvexth.d.w $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> %va) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64>) + +define <4 x i64> @lasx_xvexth_q_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvexth_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvexth.q.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> %va) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8>) + +define <16 x i16> @lasx_xvexth_hu_bu(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvexth_hu_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvexth.hu.bu $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> %va) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16>) + +define <8 x i32> @lasx_xvexth_wu_hu(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvexth_wu_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvexth.wu.hu $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32>) + +define <4 x i64> @lasx_xvexth_du_wu(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvexth_du_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvexth.du.wu $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> %va) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64>) + +define <4 x i64> @lasx_xvexth_qu_du(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvexth_qu_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvexth.qu.du $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> %va) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extl.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extl.ll new file mode 100644 index 0000000000000000000000000000000000000000..7040c8c784cdfea6d47b5c003febf3895bcae2c2 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extl.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64>) + +define <4 x i64> @lasx_xvextl_q_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvextl_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvextl.q.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> %va) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64>) + +define <4 x i64> @lasx_xvextl_qu_du(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvextl_qu_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvextl.qu.du $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> %va) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..1301b8a146eb7d334e120433f6d041fac4121d75 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvextrins_b_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvextrins.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> %va, <32 x i8> %vb, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvextrins_b_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvextrins.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> %va, <32 x i8> %vb, i32 256) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvextrins_h_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvextrins.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> %va, <16 x i16> %vb, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvextrins_h_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvextrins.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> %va, <16 x i16> %vb, i32 256) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvextrins_w_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvextrins.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> %va, <8 x i32> %vb, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvextrins_w_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvextrins.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> %va, <8 x i32> %vb, i32 256) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvextrins_d_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvextrins.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> %va, <4 x i64> %vb, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvextrins_d_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvextrins.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> %va, <4 x i64> %vb, i32 256) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..bca8f8b3c778fbd2d43245c536cfc0e2d2d4a768 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvextrins_b(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> %va, <32 x i8> %vb, i32 %c) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvextrins_h(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> %va, <16 x i16> %vb, i32 %c) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvextrins_w(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> %va, <8 x i32> %vb, i32 %c) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvextrins_d(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> %va, <4 x i64> %vb, i32 %c) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins.ll new file mode 100644 index 0000000000000000000000000000000000000000..c8774a7b29c0b5f5463c6a656441ca3d77aa82f9 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvextrins_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvextrins_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvextrins.b $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> %va, <32 x i8> %vb, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvextrins_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvextrins_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvextrins.h $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> %va, <16 x i16> %vb, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvextrins_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvextrins_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvextrins.w $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> %va, <8 x i32> %vb, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvextrins_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvextrins_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvextrins.d $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> %va, <4 x i64> %vb, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fadd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fadd.ll new file mode 100644 index 0000000000000000000000000000000000000000..563a0ce9e384d5c6e53d4c9ffec243ff7d835d58 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fadd.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float>, <8 x float>) + +define <8 x float> @lasx_xvfadd_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfadd_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfadd.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> %va, <8 x float> %vb) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double>, <4 x double>) + +define <4 x double> @lasx_xvfadd_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfadd_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> %va, <4 x double> %vb) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fclass.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fclass.ll new file mode 100644 index 0000000000000000000000000000000000000000..901ca5bb026019469430bc1ae8f6b6fc1d1d7975 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fclass.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float>) + +define <8 x i32> @lasx_xvfclass_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvfclass_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfclass.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double>) + +define <4 x i64> @lasx_xvfclass_d(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvfclass_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfclass.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> %va) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcmp.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcmp.ll new file mode 100644 index 0000000000000000000000000000000000000000..b01f908e71af5b43e337ad8f76c7f4595bcb4106 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcmp.ll @@ -0,0 +1,530 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_caf_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_caf_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.caf.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_caf_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_caf_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.caf.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_cun_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_cun_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.cun.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_cun_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_cun_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.cun.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_ceq_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_ceq_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.ceq.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_ceq_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_ceq_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.ceq.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_cueq_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_cueq_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.cueq.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_cueq_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_cueq_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.cueq.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_clt_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_clt_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_clt_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_clt_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_cult_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_cult_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.cult.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_cult_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_cult_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.cult.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_cle_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_cle_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.cle.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_cle_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_cle_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.cle.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_cule_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_cule_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.cule.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_cule_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_cule_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.cule.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_cne_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_cne_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.cne.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_cne_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_cne_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.cne.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_cor_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_cor_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.cor.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_cor_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_cor_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.cor.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_cune_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_cune_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.cune.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_cune_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_cune_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.cune.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_saf_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_saf_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.saf.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_saf_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_saf_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.saf.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_sun_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_sun_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.sun.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_sun_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_sun_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.sun.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_seq_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_seq_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.seq.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_seq_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_seq_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.seq.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_sueq_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_sueq_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.sueq.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_sueq_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_sueq_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.sueq.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_slt_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_slt_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.slt.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_slt_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_slt_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.slt.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_sult_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_sult_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.sult.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_sult_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_sult_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.sult.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_sle_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_sle_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.sle.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_sle_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_sle_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.sle.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_sule_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_sule_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.sule.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_sule_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_sule_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.sule.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_sne_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_sne_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.sne.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_sne_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_sne_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.sne.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_sor_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_sor_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.sor.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_sor_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_sor_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.sor.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_sune_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_sune_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.sune.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_sune_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_sune_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.sune.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvt.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvt.ll new file mode 100644 index 0000000000000000000000000000000000000000..82bf1d3df72c6c2f87550b5b492fa2039b44f9b0 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvt.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float>, <8 x float>) + +define <16 x i16> @lasx_xvfcvt_h_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcvt_h_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcvt.h.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> %va, <8 x float> %vb) + ret <16 x i16> %res +} + +declare <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double>, <4 x double>) + +define <8 x float> @lasx_xvfcvt_s_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcvt_s_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcvt.s.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> %va, <4 x double> %vb) + ret <8 x float> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvth.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvth.ll new file mode 100644 index 0000000000000000000000000000000000000000..e1a6a2923e6770505473939238719fa00f4eb963 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvth.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16>) + +define <8 x float> @lasx_xvfcvth_s_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvfcvth_s_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcvth.s.h $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> %va) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float>) + +define <4 x double> @lasx_xvfcvth_d_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvfcvth_d_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcvth.d.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> %va) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvtl.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvtl.ll new file mode 100644 index 0000000000000000000000000000000000000000..0b3e693c7f51de7f08e05556b4a6caca41c67800 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvtl.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16>) + +define <8 x float> @lasx_xvfcvtl_s_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvfcvtl_s_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcvtl.s.h $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> %va) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float>) + +define <4 x double> @lasx_xvfcvtl_d_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvfcvtl_d_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcvtl.d.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> %va) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fdiv.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fdiv.ll new file mode 100644 index 0000000000000000000000000000000000000000..49923ddd4e8dec5c65296b9177c34549e00e42d2 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fdiv.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float>, <8 x float>) + +define <8 x float> @lasx_xvfdiv_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfdiv_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfdiv.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> %va, <8 x float> %vb) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double>, <4 x double>) + +define <4 x double> @lasx_xvfdiv_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfdiv_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfdiv.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> %va, <4 x double> %vb) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ffint.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ffint.ll new file mode 100644 index 0000000000000000000000000000000000000000..24da0bd3383877fc88e1e9a310f49ce167641a7c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ffint.ll @@ -0,0 +1,86 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32>) + +define <8 x float> @lasx_xvffint_s_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvffint_s_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvffint.s.w $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> %va) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64>) + +define <4 x double> @lasx_xvffint_d_l(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvffint_d_l: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvffint.d.l $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> %va) + ret <4 x double> %res +} + +declare <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32>) + +define <8 x float> @lasx_xvffint_s_wu(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvffint_s_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvffint.s.wu $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> %va) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64>) + +define <4 x double> @lasx_xvffint_d_lu(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvffint_d_lu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvffint.d.lu $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> %va) + ret <4 x double> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32>) + +define <4 x double> @lasx_xvffintl_d_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvffintl_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvffintl.d.w $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> %va) + ret <4 x double> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32>) + +define <4 x double> @lasx_xvffinth_d_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvffinth_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvffinth.d.w $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> %va) + ret <4 x double> %res +} + +declare <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64>, <4 x i64>) + +define <8 x float> @lasx_xvffint_s_l(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvffint_s_l: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvffint.s.l $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> %va, <4 x i64> %vb) + ret <8 x float> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-flogb.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-flogb.ll new file mode 100644 index 0000000000000000000000000000000000000000..bccef4504d70e2a136d095be5e201c34fadb800f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-flogb.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float>) + +define <8 x float> @lasx_xvflogb_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvflogb_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvflogb.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> %va) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double>) + +define <4 x double> @lasx_xvflogb_d(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvflogb_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvflogb.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> %va) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmadd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmadd.ll new file mode 100644 index 0000000000000000000000000000000000000000..0fc06f97166028542e2dd590282f942654de295b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmadd.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float>, <8 x float>, <8 x float>) + +define <8 x float> @lasx_xvfmadd_s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) nounwind { +; CHECK-LABEL: lasx_xvfmadd_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double>, <4 x double>, <4 x double>) + +define <4 x double> @lasx_xvfmadd_d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) nounwind { +; CHECK-LABEL: lasx_xvfmadd_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmax.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmax.ll new file mode 100644 index 0000000000000000000000000000000000000000..2422fa0c00d8bfd71d7921d2b648561471dad12b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmax.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float>, <8 x float>) + +define <8 x float> @lasx_xvfmax_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfmax_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfmax.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> %va, <8 x float> %vb) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double>, <4 x double>) + +define <4 x double> @lasx_xvfmax_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfmax_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfmax.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> %va, <4 x double> %vb) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmaxa.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmaxa.ll new file mode 100644 index 0000000000000000000000000000000000000000..cd9ccc656aef668d2311702b0d8ed37ecba26b06 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmaxa.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float>, <8 x float>) + +define <8 x float> @lasx_xvfmaxa_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfmaxa_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfmaxa.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> %va, <8 x float> %vb) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double>, <4 x double>) + +define <4 x double> @lasx_xvfmaxa_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfmaxa_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfmaxa.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> %va, <4 x double> %vb) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmin.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmin.ll new file mode 100644 index 0000000000000000000000000000000000000000..effb3f9e1d75a8ec52b1ffc2b229a98fccb06c73 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmin.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float>, <8 x float>) + +define <8 x float> @lasx_xvfmin_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfmin_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfmin.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> %va, <8 x float> %vb) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double>, <4 x double>) + +define <4 x double> @lasx_xvfmin_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfmin_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfmin.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> %va, <4 x double> %vb) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmina.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmina.ll new file mode 100644 index 0000000000000000000000000000000000000000..753a6f31ba061c7429644a83acabc8c1a6788e99 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmina.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float>, <8 x float>) + +define <8 x float> @lasx_xvfmina_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfmina_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfmina.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> %va, <8 x float> %vb) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double>, <4 x double>) + +define <4 x double> @lasx_xvfmina_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfmina_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfmina.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> %va, <4 x double> %vb) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmsub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmsub.ll new file mode 100644 index 0000000000000000000000000000000000000000..57909d0dd1689f692dee38dce3d646fdc9c7b59a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmsub.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float>, <8 x float>, <8 x float>) + +define <8 x float> @lasx_xvfmsub_s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) nounwind { +; CHECK-LABEL: lasx_xvfmsub_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double>, <4 x double>, <4 x double>) + +define <4 x double> @lasx_xvfmsub_d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) nounwind { +; CHECK-LABEL: lasx_xvfmsub_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmul.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmul.ll new file mode 100644 index 0000000000000000000000000000000000000000..9cad6f38306618c5f7c2199b05abbb37033051d1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmul.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float>, <8 x float>) + +define <8 x float> @lasx_xvfmul_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfmul_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfmul.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> %va, <8 x float> %vb) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double>, <4 x double>) + +define <4 x double> @lasx_xvfmul_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfmul_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfmul.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> %va, <4 x double> %vb) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmadd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmadd.ll new file mode 100644 index 0000000000000000000000000000000000000000..c30993590f98a89039d6741aeb3696b0856c6364 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmadd.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float>, <8 x float>, <8 x float>) + +define <8 x float> @lasx_xvfnmadd_s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) nounwind { +; CHECK-LABEL: lasx_xvfnmadd_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double>, <4 x double>, <4 x double>) + +define <4 x double> @lasx_xvfnmadd_d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) nounwind { +; CHECK-LABEL: lasx_xvfnmadd_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmsub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmsub.ll new file mode 100644 index 0000000000000000000000000000000000000000..2e7ca695be62567cdeabdadade4c39d731ffc68d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmsub.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float>, <8 x float>, <8 x float>) + +define <8 x float> @lasx_xvfnmsub_s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) nounwind { +; CHECK-LABEL: lasx_xvfnmsub_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double>, <4 x double>, <4 x double>) + +define <4 x double> @lasx_xvfnmsub_d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) nounwind { +; CHECK-LABEL: lasx_xvfnmsub_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecip.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecip.ll new file mode 100644 index 0000000000000000000000000000000000000000..da3a26df2824e871ebd018496cfaedf5c3df6965 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecip.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float>) + +define <8 x float> @lasx_xvfrecip_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvfrecip_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrecip.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> %va) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double>) + +define <4 x double> @lasx_xvfrecip_d(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvfrecip_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrecip.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> %va) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frint.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frint.ll new file mode 100644 index 0000000000000000000000000000000000000000..ddead27cd14b5b01879f541a4d13d48288cf5aa2 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frint.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float>) + +define <8 x float> @lasx_xvfrintrne_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvfrintrne_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrintrne.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> %va) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double>) + +define <4 x double> @lasx_xvfrintrne_d(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvfrintrne_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrintrne.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> %va) + ret <4 x double> %res +} + +declare <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float>) + +define <8 x float> @lasx_xvfrintrz_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvfrintrz_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrintrz.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> %va) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double>) + +define <4 x double> @lasx_xvfrintrz_d(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvfrintrz_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrintrz.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> %va) + ret <4 x double> %res +} + +declare <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float>) + +define <8 x float> @lasx_xvfrintrp_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvfrintrp_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrintrp.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> %va) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double>) + +define <4 x double> @lasx_xvfrintrp_d(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvfrintrp_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrintrp.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> %va) + ret <4 x double> %res +} + +declare <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float>) + +define <8 x float> @lasx_xvfrintrm_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvfrintrm_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrintrm.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> %va) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double>) + +define <4 x double> @lasx_xvfrintrm_d(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvfrintrm_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrintrm.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> %va) + ret <4 x double> %res +} + +declare <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float>) + +define <8 x float> @lasx_xvfrint_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvfrint_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrint.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> %va) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double>) + +define <4 x double> @lasx_xvfrint_d(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvfrint_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrint.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> %va) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrt.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrt.ll new file mode 100644 index 0000000000000000000000000000000000000000..6efa8122baf1804855516b559ddd5fd13f51e095 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrt.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float>) + +define <8 x float> @lasx_xvfrsqrt_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvfrsqrt_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrsqrt.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> %va) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double>) + +define <4 x double> @lasx_xvfrsqrt_d(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvfrsqrt_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrsqrt.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> %va) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..64b4632669d29dc79bd38a0f8d273b5128e8db61 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-invalid-imm.ll @@ -0,0 +1,33 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvfrstpi_b_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvfrstpi.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> %va, <32 x i8> %vb, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvfrstpi_b_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvfrstpi.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> %va, <32 x i8> %vb, i32 32) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvfrstpi_h_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvfrstpi.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> %va, <16 x i16> %vb, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvfrstpi_h_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvfrstpi.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> %va, <16 x i16> %vb, i32 32) + ret <16 x i16> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..ca92cff9b2d1ecb76fc769254a9b55c7c3abc6d8 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-non-imm.ll @@ -0,0 +1,19 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvfrstpi_b(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> %va, <32 x i8> %vb, i32 %c) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvfrstpi_h(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> %va, <16 x i16> %vb, i32 %c) + ret <16 x i16> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp.ll new file mode 100644 index 0000000000000000000000000000000000000000..e83e55a52a113dbc03a476fcd6c8ef9b901aa063 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8>, <32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvfrstp_b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { +; CHECK-LABEL: lasx_xvfrstp_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrstp.b $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16>, <16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvfrstp_h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { +; CHECK-LABEL: lasx_xvfrstp_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrstp.h $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) + ret <16 x i16> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvfrstpi_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvfrstpi_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrstpi.b $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> %va, <32 x i8> %vb, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvfrstpi_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvfrstpi_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrstpi.h $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> %va, <16 x i16> %vb, i32 1) + ret <16 x i16> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsqrt.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsqrt.ll new file mode 100644 index 0000000000000000000000000000000000000000..a13333d8d81c29afe7f86417e9b2cb9e30a13059 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsqrt.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float>) + +define <8 x float> @lasx_xvfsqrt_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvfsqrt_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfsqrt.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> %va) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double>) + +define <4 x double> @lasx_xvfsqrt_d(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvfsqrt_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfsqrt.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> %va) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsub.ll new file mode 100644 index 0000000000000000000000000000000000000000..b52774a03618ddbaff23857476eafac71a9e8b8e --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsub.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float>, <8 x float>) + +define <8 x float> @lasx_xvfsub_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfsub_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfsub.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> %va, <8 x float> %vb) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double>, <4 x double>) + +define <4 x double> @lasx_xvfsub_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfsub_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfsub.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> %va, <4 x double> %vb) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ftint.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ftint.ll new file mode 100644 index 0000000000000000000000000000000000000000..74cd507f16d2631fbb3d3a67842c50f081c32025 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ftint.ll @@ -0,0 +1,350 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float>) + +define <8 x i32> @lasx_xvftintrne_w_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvftintrne_w_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrne.w.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double>) + +define <4 x i64> @lasx_xvftintrne_l_d(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvftintrne_l_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrne.l.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> %va) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float>) + +define <8 x i32> @lasx_xvftintrz_w_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvftintrz_w_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrz.w.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double>) + +define <4 x i64> @lasx_xvftintrz_l_d(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvftintrz_l_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrz.l.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> %va) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float>) + +define <8 x i32> @lasx_xvftintrp_w_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvftintrp_w_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrp.w.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double>) + +define <4 x i64> @lasx_xvftintrp_l_d(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvftintrp_l_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrp.l.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> %va) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float>) + +define <8 x i32> @lasx_xvftintrm_w_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvftintrm_w_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrm.w.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double>) + +define <4 x i64> @lasx_xvftintrm_l_d(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvftintrm_l_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrm.l.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> %va) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float>) + +define <8 x i32> @lasx_xvftint_w_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvftint_w_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftint.w.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double>) + +define <4 x i64> @lasx_xvftint_l_d(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvftint_l_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftint.l.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> %va) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float>) + +define <8 x i32> @lasx_xvftintrz_wu_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvftintrz_wu_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrz.wu.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double>) + +define <4 x i64> @lasx_xvftintrz_lu_d(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvftintrz_lu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrz.lu.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> %va) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float>) + +define <8 x i32> @lasx_xvftint_wu_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvftint_wu_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftint.wu.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double>) + +define <4 x i64> @lasx_xvftint_lu_d(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvftint_lu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftint.lu.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> %va) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double>, <4 x double>) + +define <8 x i32> @lasx_xvftintrne_w_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvftintrne_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrne.w.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> %va, <4 x double> %vb) + ret <8 x i32> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double>, <4 x double>) + +define <8 x i32> @lasx_xvftintrz_w_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvftintrz_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrz.w.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> %va, <4 x double> %vb) + ret <8 x i32> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double>, <4 x double>) + +define <8 x i32> @lasx_xvftintrp_w_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvftintrp_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrp.w.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> %va, <4 x double> %vb) + ret <8 x i32> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double>, <4 x double>) + +define <8 x i32> @lasx_xvftintrm_w_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvftintrm_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrm.w.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> %va, <4 x double> %vb) + ret <8 x i32> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double>, <4 x double>) + +define <8 x i32> @lasx_xvftint_w_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvftint_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftint.w.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> %va, <4 x double> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float>) + +define <4 x i64> @lasx_xvftintrnel_l_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvftintrnel_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrnel.l.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> %va) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float>) + +define <4 x i64> @lasx_xvftintrneh_l_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvftintrneh_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrneh.l.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> %va) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float>) + +define <4 x i64> @lasx_xvftintrzl_l_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvftintrzl_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrzl.l.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> %va) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float>) + +define <4 x i64> @lasx_xvftintrzh_l_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvftintrzh_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrzh.l.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> %va) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float>) + +define <4 x i64> @lasx_xvftintrpl_l_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvftintrpl_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrpl.l.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> %va) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float>) + +define <4 x i64> @lasx_xvftintrph_l_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvftintrph_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrph.l.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> %va) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float>) + +define <4 x i64> @lasx_xvftintrml_l_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvftintrml_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrml.l.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> %va) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float>) + +define <4 x i64> @lasx_xvftintrmh_l_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvftintrmh_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrmh.l.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> %va) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float>) + +define <4 x i64> @lasx_xvftintl_l_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvftintl_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintl.l.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> %va) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float>) + +define <4 x i64> @lasx_xvftinth_l_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvftinth_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftinth.l.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> %va) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-haddw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-haddw.ll new file mode 100644 index 0000000000000000000000000000000000000000..2c64ab23806b5c8c160fbfb4221b666b9390e138 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-haddw.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvhaddw_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvhaddw_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhaddw.h.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvhaddw_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvhaddw_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhaddw.w.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvhaddw_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvhaddw_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhaddw.d.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvhaddw_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvhaddw_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhaddw.q.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvhaddw_hu_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvhaddw_hu_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhaddw.hu.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvhaddw_wu_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvhaddw_wu_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhaddw.wu.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvhaddw_du_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvhaddw_du_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhaddw.du.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvhaddw_qu_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvhaddw_qu_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhaddw.qu.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-hsubw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-hsubw.ll new file mode 100644 index 0000000000000000000000000000000000000000..a5223c1d89a04413a0f7c5fa852db8df6dfe1080 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-hsubw.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvhsubw_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvhsubw_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhsubw.h.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvhsubw_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvhsubw_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhsubw.w.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvhsubw_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvhsubw_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhsubw.d.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvhsubw_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvhsubw_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhsubw.q.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvhsubw_hu_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvhsubw_hu_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhsubw.hu.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvhsubw_wu_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvhsubw_wu_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhsubw.wu.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvhsubw_du_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvhsubw_du_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhsubw.du.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvhsubw_qu_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvhsubw_qu_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhsubw.qu.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ilv.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ilv.ll new file mode 100644 index 0000000000000000000000000000000000000000..c9d0ca6b0324a205f56721ec4b373e6f07360279 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ilv.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvilvl_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvilvl_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvilvl.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvilvl_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvilvl_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvilvl.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvilvl_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvilvl_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvilvl.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvilvl_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvilvl_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvilvl.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvilvh_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvilvh_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvilvh.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvilvh_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvilvh_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvilvh.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvilvh_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvilvh_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvilvh.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvilvh_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvilvh_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvilvh.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..4982f2c7d43a9237568513d262c0e569a949ac21 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-invalid-imm.ll @@ -0,0 +1,33 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32>, i32, i32) + +define <8 x i32> @lasx_xvinsgr2vr_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvinsgr2vr.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> %va, i32 1, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvinsgr2vr_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvinsgr2vr.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> %va, i32 1, i32 8) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64>, i64, i32) + +define <4 x i64> @lasx_xvinsgr2vr_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvinsgr2vr.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> %va, i64 1, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvinsgr2vr_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvinsgr2vr.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> %va, i64 1, i32 4) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..3accabf6dbd9892c8576193969e196ebea29be12 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-non-imm.ll @@ -0,0 +1,19 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32>, i32, i32) + +define <8 x i32> @lasx_xvinsgr2vr_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> %va, i32 1, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64>, i64, i32) + +define <4 x i64> @lasx_xvinsgr2vr_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> %va, i64 1, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr.ll new file mode 100644 index 0000000000000000000000000000000000000000..ea98c96464aed825b02547924c89977ed123a7c5 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32>, i32, i32) + +define <8 x i32> @lasx_xvinsgr2vr_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvinsgr2vr_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ori $a0, $zero, 1 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> %va, i32 1, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64>, i64, i32) + +define <4 x i64> @lasx_xvinsgr2vr_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvinsgr2vr_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ori $a0, $zero, 1 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> %va, i64 1, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..a54fa8515fbafe91a9d338467481e13304db4c7f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-invalid-imm.ll @@ -0,0 +1,33 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvinsve0_w_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvinsve0.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> %va, <8 x i32> %vb, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvinsve0_w_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvinsve0.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> %va, <8 x i32> %vb, i32 8) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvinsve0_d_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvinsve0.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> %va, <4 x i64> %vb, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvinsve0_d_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvinsve0.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> %va, <4 x i64> %vb, i32 4) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..53e59db11aa69108e4e7279c1d4963a02e2bb521 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-non-imm.ll @@ -0,0 +1,19 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvinsve0_w(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> %va, <8 x i32> %vb, i32 %c) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvinsve0_d(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> %va, <4 x i64> %vb, i32 %c) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0.ll new file mode 100644 index 0000000000000000000000000000000000000000..27ae819c4144c573088aa52c31331b0cf15da634 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvinsve0_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvinsve0_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvinsve0.w $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> %va, <8 x i32> %vb, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvinsve0_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvinsve0_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvinsve0.d $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> %va, <4 x i64> %vb, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..20dd8a45d7f02ec3e954b87aece413aa5cbc78a9 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvld(i8*, i32) + +define <32 x i8> @lasx_xvld_lo(i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvld: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvld(i8* %p, i32 -2049) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvld_hi(i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvld: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvld(i8* %p, i32 2048) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..b23436a4483235e7e14cf33a7a2acdedd4b06cfc --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvld(i8*, i32) + +define <32 x i8> @lasx_xvld(i8* %p, i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvld(i8* %p, i32 %a) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld.ll new file mode 100644 index 0000000000000000000000000000000000000000..5ffc629db4668bf1409b73e486ce5287e471d62f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvld(i8*, i32) + +define <32 x i8> @lasx_xvld(i8* %p) nounwind { +; CHECK-LABEL: lasx_xvld: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvld(i8* %p, i32 1) + ret <32 x i8> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvldx(i8*, i64) + +define <32 x i8> @lasx_xvldx(i8* %p, i64 %b) nounwind { +; CHECK-LABEL: lasx_xvldx: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvldx $xr0, $a0, $a1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvldx(i8* %p, i64 %b) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..f3dd3650cf8a409b533e900b26ad940708a9fc35 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-invalid-imm.ll @@ -0,0 +1,81 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <4 x i64> @llvm.loongarch.lasx.xvldi(i32) + +define <4 x i64> @lasx_xvldi_lo() nounwind { +; CHECK: llvm.loongarch.lasx.xvldi: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 -4097) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvldi_hi() nounwind { +; CHECK: llvm.loongarch.lasx.xvldi: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 4096) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32) + +define <32 x i8> @lasx_xvrepli_b_lo() nounwind { +; CHECK: llvm.loongarch.lasx.xvrepli.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 -513) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvrepli_b_hi() nounwind { +; CHECK: llvm.loongarch.lasx.xvrepli.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 512) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32) + +define <16 x i16> @lasx_xvrepli_h_lo() nounwind { +; CHECK: llvm.loongarch.lasx.xvrepli.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 -513) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvrepli_h_hi() nounwind { +; CHECK: llvm.loongarch.lasx.xvrepli.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 512) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32) + +define <8 x i32> @lasx_xvrepli_w_lo() nounwind { +; CHECK: llvm.loongarch.lasx.xvrepli.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 -513) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvrepli_w_hi() nounwind { +; CHECK: llvm.loongarch.lasx.xvrepli.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 512) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32) + +define <4 x i64> @lasx_xvrepli_d_lo() nounwind { +; CHECK: llvm.loongarch.lasx.xvrepli.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 -513) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvrepli_d_hi() nounwind { +; CHECK: llvm.loongarch.lasx.xvrepli.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 512) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..6466818bf674b3976bd0931de9d356dabd333a97 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-non-imm.ll @@ -0,0 +1,46 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <4 x i64> @llvm.loongarch.lasx.xvldi(i32) + +define <4 x i64> @lasx_xvldi(i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 %a) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32) + +define <32 x i8> @lasx_xvrepli_b(i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 %a) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32) + +define <16 x i16> @lasx_xvrepli_h(i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 %a) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32) + +define <8 x i32> @lasx_xvrepli_w(i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 %a) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32) + +define <4 x i64> @lasx_xvrepli_d(i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 %a) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi.ll new file mode 100644 index 0000000000000000000000000000000000000000..59f79dd32af367f56f7a1930d51e2b175505d32b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <4 x i64> @llvm.loongarch.lasx.xvldi(i32) + +define <4 x i64> @lasx_xvldi() nounwind { +; CHECK-LABEL: lasx_xvldi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvldi $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32) + +define <32 x i8> @lasx_xvrepli_b() nounwind { +; CHECK-LABEL: lasx_xvrepli_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrepli.b $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32) + +define <16 x i16> @lasx_xvrepli_h() nounwind { +; CHECK-LABEL: lasx_xvrepli_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrepli.h $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32) + +define <8 x i32> @lasx_xvrepli_w() nounwind { +; CHECK-LABEL: lasx_xvrepli_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrepli.w $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32) + +define <4 x i64> @lasx_xvrepli_d() nounwind { +; CHECK-LABEL: lasx_xvrepli_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrepli.d $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..cb62a839985a32c2b3ac0228253fe6af4187166b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8*, i32) + +define <32 x i8> @lasx_xvldrepl_b_lo(i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvldrepl.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8* %p, i32 -2049) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvldrepl_b_hi(i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvldrepl.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8* %p, i32 2048) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8*, i32) + +define <16 x i16> @lasx_xvldrepl_h_lo(i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvldrepl.h: argument out of range or not a multiple of 2. +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8* %p, i32 -2050) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvldrepl_h_hi(i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvldrepl.h: argument out of range or not a multiple of 2. +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8* %p, i32 2048) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8*, i32) + +define <8 x i32> @lasx_xvldrepl_w_lo(i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvldrepl.w: argument out of range or not a multiple of 4. +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8* %p, i32 -2052) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvldrepl_w_hi(i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvldrepl.w: argument out of range or not a multiple of 4. +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8* %p, i32 2048) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8*, i32) + +define <4 x i64> @lasx_xvldrepl_d_lo(i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvldrepl.d: argument out of range or not a multiple of 8. +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8* %p, i32 -2056) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvldrepl_d_hi(i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvldrepl.d: argument out of range or not a multiple of 8. +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8* %p, i32 2048) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..075d663b0dd7af52bab80a2c9466a9ef8340322a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8*, i32) + +define <32 x i8> @lasx_xvldrepl_b(i8* %p, i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8* %p, i32 %a) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8*, i32) + +define <16 x i16> @lasx_xvldrepl_h(i8* %p, i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8* %p, i32 %a) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8*, i32) + +define <8 x i32> @lasx_xvldrepl_w(i8* %p, i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8* %p, i32 %a) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8*, i32) + +define <4 x i64> @lasx_xvldrepl_d(i8* %p, i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8* %p, i32 %a) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl.ll new file mode 100644 index 0000000000000000000000000000000000000000..ae6abdf81cbc58dc93fb88c3300cef8d36819d34 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8*, i32) + +define <32 x i8> @lasx_xvldrepl_b(i8* %p) nounwind { +; CHECK-LABEL: lasx_xvldrepl_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvldrepl.b $xr0, $a0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8* %p, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8*, i32) + +define <16 x i16> @lasx_xvldrepl_h(i8* %p) nounwind { +; CHECK-LABEL: lasx_xvldrepl_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvldrepl.h $xr0, $a0, 2 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8* %p, i32 2) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8*, i32) + +define <8 x i32> @lasx_xvldrepl_w(i8* %p) nounwind { +; CHECK-LABEL: lasx_xvldrepl_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvldrepl.w $xr0, $a0, 4 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8* %p, i32 4) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8*, i32) + +define <4 x i64> @lasx_xvldrepl_d(i8* %p) nounwind { +; CHECK-LABEL: lasx_xvldrepl_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvldrepl.d $xr0, $a0, 8 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8* %p, i32 8) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-madd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-madd.ll new file mode 100644 index 0000000000000000000000000000000000000000..d3b09396727e7919007f091034f21fdeed3ff798 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-madd.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8>, <32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvmadd_b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { +; CHECK-LABEL: lasx_xvmadd_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmadd.b $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16>, <16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvmadd_h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { +; CHECK-LABEL: lasx_xvmadd_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmadd.h $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32>, <8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvmadd_w(<8 x i32> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { +; CHECK-LABEL: lasx_xvmadd_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmadd.w $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> %va, <8 x i32> %vb, <8 x i32> %vc) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64>, <4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmadd_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { +; CHECK-LABEL: lasx_xvmadd_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmadd.d $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-maddw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-maddw.ll new file mode 100644 index 0000000000000000000000000000000000000000..146624a764a22e325ddf13375250fd034422b7ff --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-maddw.ll @@ -0,0 +1,290 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16>, <32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvmaddwev_h_b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwev_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwev.h.b $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32>, <16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvmaddwev_w_h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwev_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwev.w.h $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64>, <8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvmaddwev_d_w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwev_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwev.d.w $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64>, <4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmaddwev_q_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwev_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwev.q.d $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16>, <32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvmaddwev_h_bu(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwev_h_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwev.h.bu $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32>, <16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvmaddwev_w_hu(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwev_w_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwev.w.hu $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64>, <8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvmaddwev_d_wu(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwev_d_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwev.d.wu $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64>, <4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmaddwev_q_du(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwev_q_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwev.q.du $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16>, <32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvmaddwev_h_bu_b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwev_h_bu_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwev.h.bu.b $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32>, <16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvmaddwev_w_hu_h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwev_w_hu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwev.w.hu.h $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64>, <8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvmaddwev_d_wu_w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwev_d_wu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwev.d.wu.w $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64>, <4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmaddwev_q_du_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwev_q_du_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwev.q.du.d $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16>, <32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvmaddwod_h_b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwod_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwod.h.b $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32>, <16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvmaddwod_w_h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwod_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwod.w.h $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64>, <8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvmaddwod_d_w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwod_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwod.d.w $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64>, <4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmaddwod_q_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwod_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwod.q.d $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16>, <32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvmaddwod_h_bu(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwod_h_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwod.h.bu $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32>, <16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvmaddwod_w_hu(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwod_w_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwod.w.hu $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64>, <8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvmaddwod_d_wu(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwod_d_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwod.d.wu $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64>, <4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmaddwod_q_du(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwod_q_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwod.q.du $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16>, <32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvmaddwod_h_bu_b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwod_h_bu_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwod.h.bu.b $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32>, <16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvmaddwod_w_hu_h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwod_w_hu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwod.w.hu.h $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64>, <8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvmaddwod_d_wu_w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwod_d_wu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwod.d.wu.w $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64>, <4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmaddwod_q_du_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwod_q_du_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwod.q.du.d $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..a671e9979b2febf56334b9f6608c03da4f911f23 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvmaxi_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmaxi.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> %va, i32 -17) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvmaxi_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmaxi.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> %va, i32 16) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvmaxi_h_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmaxi.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> %va, i32 -17) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvmaxi_h_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmaxi.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> %va, i32 16) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvmaxi_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmaxi.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> %va, i32 -17) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvmaxi_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmaxi.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> %va, i32 16) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvmaxi_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmaxi.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> %va, i32 -17) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvmaxi_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmaxi.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> %va, i32 16) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvmaxi_bu_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmaxi.bu: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvmaxi_bu_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmaxi.bu: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> %va, i32 32) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvmaxi_hu_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmaxi.hu: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvmaxi_hu_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmaxi.hu: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> %va, i32 32) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvmaxi_wu_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmaxi.wu: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvmaxi_wu_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmaxi.wu: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> %va, i32 32) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvmaxi_du_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmaxi.du: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvmaxi_du_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmaxi.du: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> %va, i32 32) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..b85798b53c92d178d6b012b9027902444f337451 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvmaxi_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvmaxi_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvmaxi_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvmaxi_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvmaxi_bu(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvmaxi_hu(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvmaxi_wu(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvmaxi_du(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max.ll new file mode 100644 index 0000000000000000000000000000000000000000..9cf09df4439ad924086ef3388bf60e37b4856430 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max.ll @@ -0,0 +1,194 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvmax_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvmax_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmax.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvmax_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvmax_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmax.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvmax_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvmax_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmax.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmax_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvmax_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmax.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvmaxi_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvmaxi_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaxi.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvmaxi_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvmaxi_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaxi.h $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvmaxi_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvmaxi_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaxi.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvmaxi_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvmaxi_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaxi.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_vmax_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_vmax_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmax.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvmax_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvmax_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmax.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvmax_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvmax_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmax.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmax_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvmax_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmax.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvmaxi_bu(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvmaxi_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaxi.bu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvmaxi_hu(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvmaxi_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaxi.hu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvmaxi_wu(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvmaxi_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaxi.wu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvmaxi_du(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvmaxi_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaxi.du $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..5ed4104c295fab07d323ab7c559ec8c97cc0f026 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvmini_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmini.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> %va, i32 -17) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvmini_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmini.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> %va, i32 16) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvmini_h_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmini.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> %va, i32 -17) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvmini_h_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmini.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> %va, i32 16) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvmini_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmini.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> %va, i32 -17) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvmini_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmini.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> %va, i32 16) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvmini_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmini.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> %va, i32 -17) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvmini_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmini.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> %va, i32 16) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvmini_bu_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmini.bu: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvmini_bu_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmini.bu: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> %va, i32 32) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvmini_hu_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmini.hu: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvmini_hu_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmini.hu: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> %va, i32 32) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvmini_wu_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmini.wu: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvmini_wu_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmini.wu: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> %va, i32 32) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvmini_du_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmini.du: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvmini_du_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmini.du: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> %va, i32 32) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..b81931977aad43b725d943dda9fd4d7fe3d4ecd2 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvmini_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvmini_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvmini_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvmini_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvmini_bu(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvmini_hu(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvmini_wu(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvmini_du(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min.ll new file mode 100644 index 0000000000000000000000000000000000000000..c94b1e4ea44cb7712c7a22e22540e8db1b2621ab --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min.ll @@ -0,0 +1,194 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvmin_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvmin_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmin.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvmin_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvmin_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmin.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvmin_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvmin_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmin.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmin_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvmin_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmin.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvmini_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvmini_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmini.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvmini_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvmini_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmini.h $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvmini_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvmini_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmini.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvmini_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvmini_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmini.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvmin_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvmin_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmin.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvmin_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvmin_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmin.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvmin_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvmin_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmin.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmin_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvmin_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmin.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvmini_bu(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvmini_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmini.bu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvmini_hu(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvmini_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmini.hu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvmini_wu(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvmini_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmini.wu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvmini_du(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvmini_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmini.du $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mod.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mod.ll new file mode 100644 index 0000000000000000000000000000000000000000..a177246bb23508a24159fe4fcda46423d50bdad6 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mod.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvmod_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvmod_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmod.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvmod_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvmod_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmod.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvmod_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvmod_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmod.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmod_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvmod_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmod.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvmod_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvmod_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmod.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvmod_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvmod_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmod.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvmod_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvmod_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmod.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmod_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvmod_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmod.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskgez.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskgez.ll new file mode 100644 index 0000000000000000000000000000000000000000..da87c20ad6ee0ae0ba3e20c762bb8b6dbb09b9d8 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskgez.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8>) + +define <32 x i8> @lasx_xvmskgez_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvmskgez_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmskgez.b $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> %va) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskltz.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskltz.ll new file mode 100644 index 0000000000000000000000000000000000000000..b2218487535c634371707d763bcba6b54af856d9 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskltz.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8>) + +define <32 x i8> @lasx_xvmskltz_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvmskltz_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmskltz.b $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> %va) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16>) + +define <16 x i16> @lasx_xvmskltz_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvmskltz_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmskltz.h $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> %va) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32>) + +define <8 x i32> @lasx_xvmskltz_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvmskltz_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmskltz.w $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64>) + +define <4 x i64> @lasx_xvmskltz_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvmskltz_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmskltz.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> %va) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msknz.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msknz.ll new file mode 100644 index 0000000000000000000000000000000000000000..becd2c883a7ed71d9f232f8430fdb90cdc607f02 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msknz.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8>) + +define <32 x i8> @lasx_xvmsknz_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvmsknz_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmsknz.b $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> %va) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msub.ll new file mode 100644 index 0000000000000000000000000000000000000000..c89f9578b77d7fe760d9255ab1039c0f35430af4 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msub.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8>, <32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvmsub_b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { +; CHECK-LABEL: lasx_xvmsub_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmsub.b $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16>, <16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvmsub_h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { +; CHECK-LABEL: lasx_xvmsub_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmsub.h $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32>, <8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvmsub_w(<8 x i32> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { +; CHECK-LABEL: lasx_xvmsub_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmsub.w $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> %va, <8 x i32> %vb, <8 x i32> %vc) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64>, <4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmsub_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { +; CHECK-LABEL: lasx_xvmsub_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmsub.d $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-muh.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-muh.ll new file mode 100644 index 0000000000000000000000000000000000000000..97461512ce1665fb38afb5f601a432d3edd950fe --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-muh.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvmuh_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvmuh_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmuh.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvmuh_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvmuh_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmuh.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvmuh_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvmuh_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmuh.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmuh_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvmuh_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmuh.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvmuh_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvmuh_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmuh.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvmuh_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvmuh_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmuh.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvmuh_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvmuh_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmuh.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmuh_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvmuh_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmuh.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mul.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mul.ll new file mode 100644 index 0000000000000000000000000000000000000000..d5d852e58a9f9ce6573b02e41bbc58f0f083b19b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mul.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvmul_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvmul_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmul.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvmul_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvmul_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmul.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvmul_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvmul_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmul.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmul_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvmul_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmul.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mulw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mulw.ll new file mode 100644 index 0000000000000000000000000000000000000000..f69e64aa76980e852534c185ced9958301d87731 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mulw.ll @@ -0,0 +1,290 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvmulwev_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwev_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwev.h.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvmulwev_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwev_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwev.w.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvmulwev_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwev_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwev.d.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmulwev_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwev_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwev.q.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvmulwev_h_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwev_h_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwev.h.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvmulwev_w_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwev_w_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwev.w.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvmulwev_d_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwev_d_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwev.d.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmulwev_q_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwev_q_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwev.q.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvmulwev_h_bu_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwev_h_bu_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwev.h.bu.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvmulwev_w_hu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwev_w_hu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwev.w.hu.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvmulwev_d_wu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwev_d_wu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwev.d.wu.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmulwev_q_du_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwev_q_du_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwev.q.du.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvmulwod_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwod_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwod.h.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvmulwod_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwod_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwod.w.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvmulwod_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwod_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwod.d.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmulwod_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwod_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwod.q.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvmulwod_h_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwod_h_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwod.h.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvmulwod_w_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwod_w_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwod.w.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvmulwod_d_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwod_d_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwod.d.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmulwod_q_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwod_q_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwod.q.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvmulwod_h_bu_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwod_h_bu_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwod.h.bu.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvmulwod_w_hu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwod_w_hu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwod.w.hu.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvmulwod_d_wu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwod_d_wu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwod.d.wu.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmulwod_q_du_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwod_q_du_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwod.q.du.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-neg.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-neg.ll new file mode 100644 index 0000000000000000000000000000000000000000..ecbedf33465787ced0a707dc773e067b86e0c97f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-neg.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8>) + +define <32 x i8> @lasx_xvneg_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvneg_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvneg.b $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> %va) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16>) + +define <16 x i16> @lasx_xvneg_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvneg_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvneg.h $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> %va) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32>) + +define <8 x i32> @lasx_xvneg_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvneg_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvneg.w $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64>) + +define <4 x i64> @lasx_xvneg_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvneg_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvneg.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> %va) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nor.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nor.ll new file mode 100644 index 0000000000000000000000000000000000000000..674746b7624ec6c9a76ede0b49f77a4d48e3cc56 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nor.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvnor_v(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvnor_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..1130e094bf1f97ef86b0b9f4d743b84ff62f38ef --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvnori_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvnori.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvnori_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvnori.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> %va, i32 256) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..8f2333064d642fd0b4ce96295f9604828f301e2c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvnori_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori.ll new file mode 100644 index 0000000000000000000000000000000000000000..55eebf87ee921e054776d26c5ffa430191441e3a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvnori_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvnori_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvnori.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-or.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-or.ll new file mode 100644 index 0000000000000000000000000000000000000000..16462cfafc54a1ea1c48253ae8aa6f5cfa654a71 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-or.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvor_v(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvor_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..90dec8e55f2d83b60630909ccd5b800d23e7723d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvori_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvori.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvori_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvori.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> %va, i32 256) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..ae6571d98f4af186c6bf8a604d2ae0cfdc162276 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvori_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori.ll new file mode 100644 index 0000000000000000000000000000000000000000..8e53d88bac37460f3206a0935fbdb4d87f55cfa6 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvori_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvori_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvori.b $xr0, $xr0, 3 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> %va, i32 3) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-orn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-orn.ll new file mode 100644 index 0000000000000000000000000000000000000000..3a335cdd3716705f7e2a34a59fac279f1e51ff13 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-orn.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvorn_v(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvorn_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvorn.v $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pack.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pack.ll new file mode 100644 index 0000000000000000000000000000000000000000..512b3023491720d6df35d2c626f9daef6fafb742 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pack.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvpackev_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvpackev_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpackev.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvpackev_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvpackev_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpackev.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvpackev_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvpackev_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpackev.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvpackev_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvpackev_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpackev.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvpackod_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvpackod_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpackod.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvpackod_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvpackod_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpackod.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvpackod_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvpackod_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpackod.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvpackod_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvpackod_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpackod.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pcnt.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pcnt.ll new file mode 100644 index 0000000000000000000000000000000000000000..d77f1d2082c8d7209e401a481d9fcceeb546515a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pcnt.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8>) + +define <32 x i8> @lasx_xvpcnt_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvpcnt_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpcnt.b $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> %va) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16>) + +define <16 x i16> @lasx_xvpcnt_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvpcnt_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpcnt.h $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> %va) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32>) + +define <8 x i32> @lasx_xvpcnt_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvpcnt_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpcnt.w $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64>) + +define <4 x i64> @lasx_xvpcnt_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvpcnt_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpcnt.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> %va) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-perm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-perm.ll new file mode 100644 index 0000000000000000000000000000000000000000..4ec434edd4ec73af6608bd9912b862aca1e295da --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-perm.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvperm_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvperm_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvperm.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..41f4856bd8f71d29e489fcc4efc0ab35d4886dec --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-invalid-imm.ll @@ -0,0 +1,49 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvpermi_w_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvpermi.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> %va, <8 x i32> %vb, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvpermi_w_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvpermi.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> %va, <8 x i32> %vb, i32 256) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvpermi_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvpermi.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvpermi_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvpermi.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> %va, i32 256) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvpermi_q_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvpermi.q: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> %va, <32 x i8> %vb, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvpermi_q_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvpermi.q: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> %va, <32 x i8> %vb, i32 256) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..afb335c5d6cabd2274695209a3ac48d2484e0edd --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-non-imm.ll @@ -0,0 +1,28 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvpermi_w(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> %va, <8 x i32> %vb, i32 %c) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvpermi_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvpermi_q(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> %va, <32 x i8> %vb, i32 %c) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi.ll new file mode 100644 index 0000000000000000000000000000000000000000..0d9f9daabc44887efdda3375c15dd7e39490315a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvpermi_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvpermi_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpermi.w $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> %va, <8 x i32> %vb, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvpermi_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvpermi_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpermi.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvpermi_q(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvpermi_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> %va, <32 x i8> %vb, i32 1) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pick.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pick.ll new file mode 100644 index 0000000000000000000000000000000000000000..bbd6d693ca0b32f6ca0cd54a0a8ccf62b8bf230e --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pick.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvpickev_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvpickev_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickev.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvpickev_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvpickev_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickev.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvpickev_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvpickev_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickev.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvpickev_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvpickev_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickev.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvpickod_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvpickod_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickod.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvpickod_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvpickod_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickod.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvpickod_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvpickod_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickod.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvpickod_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvpickod_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickod.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..cfc6ec42874e1ff818e3e18af2a1a24f5462d47c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvpickve_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvpickve.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvpickve_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvpickve.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> %va, i32 8) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvpickve_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvpickve.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvpickve_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvpickve.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> %va, i32 4) + ret <4 x i64> %res +} + +declare <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float>, i32) + +define <8 x float> @lasx_xvpickve_w_f_lo(<8 x float> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvpickve.w.f: argument out of range +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> %va, i32 -1) + ret <8 x float> %res +} + +define <8 x float> @lasx_xvpickve_w_f_hi(<8 x float> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvpickve.w.f: argument out of range +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> %va, i32 8) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double>, i32) + +define <4 x double> @lasx_xvpickve_d_f_lo(<4 x double> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvpickve.d.f: argument out of range +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> %va, i32 -1) + ret <4 x double> %res +} + +define <4 x double> @lasx_xvpickve_d_f_hi(<4 x double> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvpickve.d.f: argument out of range +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> %va, i32 4) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..be1f19a8973709d76ed9eb0a92b44c8914c5f174 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvpickve_w(<8 x i32> %va, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> %va, i32 %c) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvpickve_d(<4 x i64> %va, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> %va, i32 %c) + ret <4 x i64> %res +} + +declare <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float>, i32) + +define <8 x float> @lasx_xvpickve_w_f(<8 x float> %va, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> %va, i32 %c) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double>, i32) + +define <4 x double> @lasx_xvpickve_d_f(<4 x double> %va, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> %va, i32 %c) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve.ll new file mode 100644 index 0000000000000000000000000000000000000000..546777bc72ab4896893c4273d8fcabcc7b820501 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvpickve_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvpickve_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickve.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvpickve_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvpickve_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickve.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} + +declare <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float>, i32) + +define <8 x float> @lasx_xvpickve_w_f(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvpickve_w_f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickve.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> %va, i32 1) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double>, i32) + +define <4 x double> @lasx_xvpickve_d_f(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvpickve_d_f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickve.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> %va, i32 1) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..93056b272dfc518569766ddb800abfaf16c1d942 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32>, i32) + +define i32 @lasx_xvpickve2gr_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvpickve2gr.w: argument out of range +entry: + %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> %va, i32 -1) + ret i32 %res +} + +define i32 @lasx_xvpickve2gr_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvpickve2gr.w: argument out of range +entry: + %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> %va, i32 8) + ret i32 %res +} + +declare i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64>, i32) + +define i64 @lasx_xvpickve2gr_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvpickve2gr.d: argument out of range +entry: + %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> %va, i32 -1) + ret i64 %res +} + +define i64 @lasx_xvpickve2gr_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvpickve2gr.d: argument out of range +entry: + %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> %va, i32 4) + ret i64 %res +} + +declare i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32>, i32) + +define i32 @lasx_xvpickve2gr_wu_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvpickve2gr.wu: argument out of range +entry: + %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> %va, i32 -1) + ret i32 %res +} + +define i32 @lasx_xvpickve2gr_wu_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvpickve2gr.wu: argument out of range +entry: + %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> %va, i32 8) + ret i32 %res +} + +declare i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64>, i32) + +define i64 @lasx_xvpickve2gr_du_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvpickve2gr.du: argument out of range +entry: + %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> %va, i32 -1) + ret i64 %res +} + +define i64 @lasx_xvpickve2gr_du_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvpickve2gr.du: argument out of range +entry: + %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> %va, i32 4) + ret i64 %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..0fa8c94adc60cd714895925f6c576307f03a3bdc --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32>, i32) + +define i32 @lasx_xvpickve2gr_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> %va, i32 %b) + ret i32 %res +} + +declare i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64>, i32) + +define i64 @lasx_xvpickve2gr_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> %va, i32 %b) + ret i64 %res +} + +declare i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32>, i32) + +define i32 @lasx_xvpickve2gr_wu(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> %va, i32 %b) + ret i32 %res +} + +declare i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64>, i32) + +define i64 @lasx_xvpickve2gr_du(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> %va, i32 %b) + ret i64 %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr.ll new file mode 100644 index 0000000000000000000000000000000000000000..0617e7424321bdf435b25ebaf0dc175fbdd073eb --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr.ll @@ -0,0 +1,53 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + + + + +declare i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32>, i32) + +define i32 @lasx_xvpickve2gr_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvpickve2gr_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> %va, i32 1) + ret i32 %res +} + +declare i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64>, i32) + +define i64 @lasx_xvpickve2gr_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvpickve2gr_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> %va, i32 1) + ret i64 %res +} + +declare i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32>, i32) + +define i32 @lasx_xvpickve2gr_wu(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvpickve2gr_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> %va, i32 1) + ret i32 %res +} + +declare i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64>, i32) + +define i64 @lasx_xvpickve2gr_du(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvpickve2gr_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickve2gr.du $a0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> %va, i32 1) + ret i64 %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..a0cb309c54e19fe78faa87792e132fea610d4d25 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvrepl128vei_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvrepl128vei.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvrepl128vei_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvrepl128vei.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> %va, i32 16) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvrepl128vei_h_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvrepl128vei.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvrepl128vei_h_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvrepl128vei.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> %va, i32 8) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvrepl128vei_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvrepl128vei.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvrepl128vei_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvrepl128vei.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> %va, i32 4) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvrepl128vei_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvrepl128vei.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvrepl128vei_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvrepl128vei.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> %va, i32 2) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..c537ffa66ba7f5e88fb6562286b7f6990d7f1ccd --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvrepl128vei_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvrepl128vei_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvrepl128vei_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvrepl128vei_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei.ll new file mode 100644 index 0000000000000000000000000000000000000000..25fab44f461f56db1ef4c7bcd3b5045ec4524a85 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvrepl128vei_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvrepl128vei_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrepl128vei.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvrepl128vei_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvrepl128vei_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrepl128vei.h $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvrepl128vei_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvrepl128vei_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrepl128vei.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvrepl128vei_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvrepl128vei_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrepl128vei.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replgr2vr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replgr2vr.ll new file mode 100644 index 0000000000000000000000000000000000000000..c71abd2205c6712a703c77cac00de76980e88964 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replgr2vr.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32) + +define <32 x i8> @lasx_xvreplgr2vr_b(i32 %a) nounwind { +; CHECK-LABEL: lasx_xvreplgr2vr_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplgr2vr.b $xr0, $a0 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 %a) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32) + +define <16 x i16> @lasx_xvreplgr2vr_h(i32 %a) nounwind { +; CHECK-LABEL: lasx_xvreplgr2vr_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplgr2vr.h $xr0, $a0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 %a) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32) + +define <8 x i32> @lasx_xvreplgr2vr_w(i32 %a) nounwind { +; CHECK-LABEL: lasx_xvreplgr2vr_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplgr2vr.w $xr0, $a0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 %a) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64) + +define <4 x i64> @lasx_xvreplgr2vr_d(i64 %a) nounwind { +; CHECK-LABEL: lasx_xvreplgr2vr_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplgr2vr.d $xr0, $a0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 %a) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve.ll new file mode 100644 index 0000000000000000000000000000000000000000..21d36ff7bb5ee06fb394ab9971dcbd5125d90553 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvreplve_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK-LABEL: lasx_xvreplve_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplve.b $xr0, $xr0, $a0 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvreplve_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK-LABEL: lasx_xvreplve_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplve.h $xr0, $xr0, $a0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvreplve_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK-LABEL: lasx_xvreplve_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvreplve_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK-LABEL: lasx_xvreplve_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve0.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve0.ll new file mode 100644 index 0000000000000000000000000000000000000000..7996bb36ef03cb91067721c3626d3710da3009f9 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve0.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8>) + +define <32 x i8> @lasx_xvreplve0_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvreplve0_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplve0.b $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> %va) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16>) + +define <16 x i16> @lasx_xvreplve0_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvreplve0_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplve0.h $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> %va) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32>) + +define <8 x i32> @lasx_xvreplve0_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvreplve0_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplve0.w $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64>) + +define <4 x i64> @lasx_xvreplve0_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvreplve0_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplve0.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> %va) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8>) + +define <32 x i8> @lasx_xvreplve0_q(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvreplve0_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplve0.q $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> %va) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..40abdf49760500a7aa4de23459a02f4c5f9ce9e9 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvrotri_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvrotri.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvrotri_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvrotri.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> %va, i32 8) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvrotri_h_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvrotri.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvrotri_h_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvrotri.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> %va, i32 16) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvrotri_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvrotri.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvrotri_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvrotri.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> %va, i32 32) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvrotri_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvrotri.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvrotri_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvrotri.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> %va, i32 64) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..dd38301d0534560ea7bfccecb993d35f2f14aa1c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvrotri_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvrotri_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvrotri_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvrotri_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr.ll new file mode 100644 index 0000000000000000000000000000000000000000..64d2773864e9f016243b921c968f12f17ae380db --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvrotr_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvrotr_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrotr.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvrotr_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvrotr_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrotr.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvrotr_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvrotr_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrotr.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvrotr_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvrotr_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrotr.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvrotri_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvrotri_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrotri.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvrotri_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvrotri_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrotri.h $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvrotri_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvrotri_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrotri.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvrotri_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvrotri_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrotri.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sadd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sadd.ll new file mode 100644 index 0000000000000000000000000000000000000000..54a5e2e9c8332d3c955d1f044245e2172cecaec4 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sadd.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvsadd_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsadd_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsadd.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvsadd_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsadd_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsadd.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvsadd_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsadd_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsadd.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvsadd_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsadd_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvsadd_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsadd_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsadd.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvsadd_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsadd_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsadd.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvsadd_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsadd_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsadd.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvsadd_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsadd_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsadd.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..839fbc9990d34c09fad08aad7ff47fa153882eec --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsat_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsat.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvsat_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsat.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> %va, i32 8) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsat_h_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsat.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvsat_h_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsat.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> %va, i32 16) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsat_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsat.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvsat_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsat.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> %va, i32 32) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsat_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsat.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvsat_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsat.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> %va, i32 64) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsat_bu_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsat.bu: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvsat_bu_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsat.bu: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> %va, i32 8) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsat_hu_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsat.hu: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvsat_hu_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsat.hu: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> %va, i32 16) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsat_wu_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsat.wu: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvsat_wu_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsat.wu: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> %va, i32 32) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsat_du_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsat.du: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvsat_du_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsat.du: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> %va, i32 64) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..b73b32ebd3b021ff5781ff392a240ee6f7c86b42 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsat_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsat_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsat_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsat_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsat_bu(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsat_hu(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsat_wu(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsat_du(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat.ll new file mode 100644 index 0000000000000000000000000000000000000000..293b9dc9eb4d9f309711b2bb65ce0429ca7d4547 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsat_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvsat_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsat.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsat_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvsat_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsat.h $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsat_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvsat_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsat.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsat_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvsat_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsat.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsat_bu(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvsat_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsat.bu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsat_hu(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvsat_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsat.hu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsat_wu(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvsat_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsat.wu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsat_du(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvsat_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsat.du $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..bb6ef0cc6574cce8f25c0124781a9f792fdaf7bf --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvseqi_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvseqi.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> %va, i32 -17) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvseqi_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvseqi.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> %va, i32 16) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvseqi_h_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvseqi.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> %va, i32 -17) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvseqi_h_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvseqi.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> %va, i32 16) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvseqi_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvseqi.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> %va, i32 -17) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvseqi_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvseqi.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> %va, i32 16) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvseqi_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvseqi.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> %va, i32 -17) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvseqi_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvseqi.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> %va, i32 16) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..fb2c6206da7b9531958bf1d27cead752cc83b58f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvseqi_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvseqi_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvseqi_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvseqi_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq.ll new file mode 100644 index 0000000000000000000000000000000000000000..83bc93c88c73c3bd26fc381e3324a6454b877731 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvseq_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvseq_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvseq.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvseq_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvseq_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvseq.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvseq_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvseq_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvseq.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvseq_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvseq_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvseq.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvseqi_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvseqi_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvseqi.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvseqi_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvseqi_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvseqi.h $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvseqi_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvseqi_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvseqi.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvseqi_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvseqi_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvseqi.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-set.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-set.ll new file mode 100644 index 0000000000000000000000000000000000000000..6e3e2e0330f525a9566e63cf20dd423ea5fa4146 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-set.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare i32 @llvm.loongarch.lasx.xbz.v(<32 x i8>) + +define i32 @lasx_xbz_v(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xbz_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvseteqz.v $fcc0, $xr0 +; CHECK-NEXT: bcnez $fcc0, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> %va) + ret i32 %res +} + +declare i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8>) + +define i32 @lasx_xbnz_v(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xbnz_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsetnez.v $fcc0, $xr0 +; CHECK-NEXT: bcnez $fcc0, .LBB1_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> %va) + ret i32 %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setallnez.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setallnez.ll new file mode 100644 index 0000000000000000000000000000000000000000..a466b78bf8d2d0af2e607fae4ee3fac1ac851873 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setallnez.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8>) + +define i32 @lasx_xbnz_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xbnz_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsetallnez.b $fcc0, $xr0 +; CHECK-NEXT: bcnez $fcc0, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> %va) + ret i32 %res +} + +declare i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16>) + +define i32 @lasx_xbnz_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xbnz_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsetallnez.h $fcc0, $xr0 +; CHECK-NEXT: bcnez $fcc0, .LBB1_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> %va) + ret i32 %res +} + +declare i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32>) + +define i32 @lasx_xbnz_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xbnz_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsetallnez.w $fcc0, $xr0 +; CHECK-NEXT: bcnez $fcc0, .LBB2_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB2_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> %va) + ret i32 %res +} + +declare i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64>) + +define i32 @lasx_xbnz_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xbnz_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsetallnez.d $fcc0, $xr0 +; CHECK-NEXT: bcnez $fcc0, .LBB3_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB3_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> %va) + ret i32 %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setanyeqz.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setanyeqz.ll new file mode 100644 index 0000000000000000000000000000000000000000..36e65fc5b32811fa17f0f1629695800c07aded55 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setanyeqz.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare i32 @llvm.loongarch.lasx.xbz.b(<32 x i8>) + +define i32 @lasx_xbz_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xbz_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsetanyeqz.b $fcc0, $xr0 +; CHECK-NEXT: bcnez $fcc0, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> %va) + ret i32 %res +} + +declare i32 @llvm.loongarch.lasx.xbz.h(<16 x i16>) + +define i32 @lasx_xbz_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xbz_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsetanyeqz.h $fcc0, $xr0 +; CHECK-NEXT: bcnez $fcc0, .LBB1_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> %va) + ret i32 %res +} + +declare i32 @llvm.loongarch.lasx.xbz.w(<8 x i32>) + +define i32 @lasx_xbz_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xbz_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsetanyeqz.w $fcc0, $xr0 +; CHECK-NEXT: bcnez $fcc0, .LBB2_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB2_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> %va) + ret i32 %res +} + +declare i32 @llvm.loongarch.lasx.xbz.d(<4 x i64>) + +define i32 @lasx_xbz_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xbz_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsetanyeqz.d $fcc0, $xr0 +; CHECK-NEXT: bcnez $fcc0, .LBB3_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB3_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> %va) + ret i32 %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf.ll new file mode 100644 index 0000000000000000000000000000000000000000..9b9140f6ad62178b4255e67b1d3b44502b16d8b9 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8>, <32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvshuf_b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { +; CHECK-LABEL: lasx_xvshuf_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvshuf.b $xr0, $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16>, <16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvshuf_h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { +; CHECK-LABEL: lasx_xvshuf_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvshuf.h $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32>, <8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvshuf_w(<8 x i32> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { +; CHECK-LABEL: lasx_xvshuf_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvshuf.w $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> %va, <8 x i32> %vb, <8 x i32> %vc) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64>, <4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvshuf_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { +; CHECK-LABEL: lasx_xvshuf_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvshuf.d $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..9217d1f6a05da2c7a152bc34d55087585282e434 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvshuf4i_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvshuf4i.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvshuf4i_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvshuf4i.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> %va, i32 256) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvshuf4i_h_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvshuf4i.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvshuf4i_h_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvshuf4i.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> %va, i32 256) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvshuf4i_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvshuf4i.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvshuf4i_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvshuf4i.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> %va, i32 256) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvshuf4i_d_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvshuf4i.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> %va, <4 x i64> %vb, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvshuf4i_d_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvshuf4i.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> %va, <4 x i64> %vb, i32 256) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..8d6d1c69419380e9430586fd20d9434d6f5235d6 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvshuf4i_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvshuf4i_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvshuf4i_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvshuf4i_d(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> %va, <4 x i64> %vb, i32 %c) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i.ll new file mode 100644 index 0000000000000000000000000000000000000000..31205086759c404892bd829ed696a9ae5699b507 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvshuf4i_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvshuf4i_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvshuf4i.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvshuf4i_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvshuf4i_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvshuf4i.h $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvshuf4i_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvshuf4i_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvshuf4i.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvshuf4i_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvshuf4i_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvshuf4i.d $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> %va, <4 x i64> %vb, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-signcov.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-signcov.ll new file mode 100644 index 0000000000000000000000000000000000000000..e6c6d8ccd0d3507caaf304d01103e62aa4dc33fd --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-signcov.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvsigncov_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsigncov_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsigncov.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvsigncov_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsigncov_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsigncov.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvsigncov_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsigncov_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsigncov.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvsigncov_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsigncov_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsigncov.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..5b10aca9801d67fba3ac1f5990baa52e2233c1d6 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvslei_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslei.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> %va, i32 -17) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvslei_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslei.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> %va, i32 16) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvslei_h_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslei.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> %va, i32 -17) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvslei_h_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslei.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> %va, i32 16) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvslei_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslei.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> %va, i32 -17) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvslei_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslei.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> %va, i32 16) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvslei_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslei.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> %va, i32 -17) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvslei_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslei.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> %va, i32 16) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvslei_bu_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslei.bu: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvslei_bu_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslei.bu: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> %va, i32 32) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvslei_hu_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslei.hu: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvslei_hu_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslei.hu: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> %va, i32 32) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvslei_wu_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslei.wu: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvslei_wu_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslei.wu: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> %va, i32 32) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvslei_du_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslei.du: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvslei_du_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslei.du: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> %va, i32 32) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..903bc10d88b789614b515165e656d745df7ecb1e --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvslei_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvslei_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvslei_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvslei_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvslei_bu(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvslei_hu(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvslei_wu(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvslei_du(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle.ll new file mode 100644 index 0000000000000000000000000000000000000000..8895efc84b845d9b0dc3c445b4650817c168138d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle.ll @@ -0,0 +1,194 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvsle_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsle_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsle.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvsle_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsle_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsle.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvsle_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsle_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsle.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvsle_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsle_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsle.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvslei_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvslei_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslei.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvslei_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvslei_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslei.h $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvslei_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvslei_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslei.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvslei_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvslei_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslei.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvsle_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsle_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsle.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvsle_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsle_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsle.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvsle_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsle_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsle.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvsle_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsle_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsle.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvslei_bu(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvslei_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslei.bu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvslei_hu(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvslei_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslei.hu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvslei_wu(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvslei_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslei.wu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvslei_du(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvslei_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslei.du $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..bf8205376a6c2e4837b6d63b8080c62a2df269a6 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvslli_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslli.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvslli_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslli.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> %va, i32 8) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvslli_h_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslli.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvslli_h_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslli.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> %va, i32 16) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvslli_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslli.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvslli_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslli.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> %va, i32 32) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvslli_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslli.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvslli_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslli.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> %va, i32 64) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..b5368a86b5c3bc06b5742251d7bd8f19d86e2a6c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvslli_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvslli_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvslli_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvslli_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll.ll new file mode 100644 index 0000000000000000000000000000000000000000..14110b613dbe3027b82559bf6c1a64540e47dc06 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvsll_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsll_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsll.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvsll_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsll_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsll.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvsll_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsll_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsll.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvsll_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsll_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsll.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvslli_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvslli_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslli.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvslli_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvslli_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslli.h $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvslli_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvslli_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslli.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvslli_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvslli_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslli.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..18803767d6c01c6b407149b75aea53afdb08e108 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-invalid-imm.ll @@ -0,0 +1,97 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8>, i32) + +define <16 x i16> @lasx_xvsllwil_h_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsllwil.h.b: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvsllwil_h_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsllwil.h.b: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> %va, i32 8) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16>, i32) + +define <8 x i32> @lasx_xvsllwil_w_h_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsllwil.w.h: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvsllwil_w_h_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsllwil.w.h: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> %va, i32 16) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32>, i32) + +define <4 x i64> @lasx_xvsllwil_d_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsllwil.d.w: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvsllwil_d_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsllwil.d.w: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> %va, i32 32) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8>, i32) + +define <16 x i16> @lasx_xvsllwil_hu_bu_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsllwil.hu.bu: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvsllwil_hu_bu_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsllwil.hu.bu: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> %va, i32 8) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16>, i32) + +define <8 x i32> @lasx_xvsllwil_wu_hu_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsllwil.wu.hu: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvsllwil_wu_hu_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsllwil.wu.hu: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> %va, i32 16) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32>, i32) + +define <4 x i64> @lasx_xvsllwil_du_wu_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsllwil.du.wu: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvsllwil_du_wu_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsllwil.du.wu: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> %va, i32 32) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..3f5d4d6316715216dee55f59693bae7dd9e36fbf --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-non-imm.ll @@ -0,0 +1,55 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8>, i32) + +define <16 x i16> @lasx_xvsllwil_h_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16>, i32) + +define <8 x i32> @lasx_xvsllwil_w_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32>, i32) + +define <4 x i64> @lasx_xvsllwil_d_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> %va, i32 %b) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8>, i32) + +define <16 x i16> @lasx_xvsllwil_hu_bu(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16>, i32) + +define <8 x i32> @lasx_xvsllwil_wu_hu(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32>, i32) + +define <4 x i64> @lasx_xvsllwil_du_wu(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil.ll new file mode 100644 index 0000000000000000000000000000000000000000..a72b8a6cbb4f4155894fc5dc7b6251acd1aae83b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8>, i32) + +define <16 x i16> @lasx_xvsllwil_h_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvsllwil_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsllwil.h.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16>, i32) + +define <8 x i32> @lasx_xvsllwil_w_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvsllwil_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsllwil.w.h $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32>, i32) + +define <4 x i64> @lasx_xvsllwil_d_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvsllwil_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsllwil.d.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> %va, i32 1) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8>, i32) + +define <16 x i16> @lasx_xvsllwil_hu_bu(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvsllwil_hu_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsllwil.hu.bu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16>, i32) + +define <8 x i32> @lasx_xvsllwil_wu_hu(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvsllwil_wu_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsllwil.wu.hu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32>, i32) + +define <4 x i64> @lasx_xvsllwil_du_wu(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvsllwil_du_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsllwil.du.wu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..dc0567da4e47e476de8807f7080c93c0dcf03712 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvslti_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslti.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> %va, i32 -17) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvslti_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslti.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> %va, i32 16) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvslti_h_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslti.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> %va, i32 -17) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvslti_h_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslti.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> %va, i32 16) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvslti_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslti.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> %va, i32 -17) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvslti_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslti.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> %va, i32 16) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvslti_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslti.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> %va, i32 -17) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvslti_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslti.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> %va, i32 16) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvslti_bu_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslti.bu: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvslti_bu_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslti.bu: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> %va, i32 32) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvslti_hu_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslti.hu: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvslti_hu_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslti.hu: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> %va, i32 32) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvslti_wu_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslti.wu: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvslti_wu_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslti.wu: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> %va, i32 32) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvslti_du_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslti.du: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvslti_du_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslti.du: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> %va, i32 32) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..a2cedc8d3ef34cff77b1790c620abca473746bac --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvslti_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvslti_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvslti_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvslti_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvslti_bu(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvslti_hu(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvslti_wu(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvslti_du(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt.ll new file mode 100644 index 0000000000000000000000000000000000000000..3ea87adff110a264ab0a44dc771ff215a17dbe52 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt.ll @@ -0,0 +1,194 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvslt_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvslt_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslt.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvslt_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvslt_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslt.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvslt_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvslt_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslt.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvslt_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvslt_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslt.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvslti_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvslti_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslti.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvslti_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvslti_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslti.h $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvslti_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvslti_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslti.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvslti_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvslti_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslti.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvslt_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvslt_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslt.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvslt_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvslt_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslt.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvslt_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvslt_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslt.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvslt_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvslt_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslt.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvslti_bu(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvslti_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslti.bu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvslti_hu(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvslti_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslti.hu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvslti_wu(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvslti_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslti.wu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvslti_du(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvslti_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslti.du $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..15b522d5e7e3ae91a56cd73619468d3cb3deed5c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsrai_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrai.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvsrai_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrai.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> %va, i32 8) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsrai_h_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrai.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvsrai_h_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrai.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> %va, i32 16) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsrai_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrai.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvsrai_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrai.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> %va, i32 32) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsrai_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrai.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvsrai_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrai.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> %va, i32 64) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..fefee7246ae6db2585799dc90d90a4dc6cfe0884 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsrai_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsrai_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsrai_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsrai_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra.ll new file mode 100644 index 0000000000000000000000000000000000000000..a7498682559bd362e9098991801e813789e44f43 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvsra_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsra_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsra.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvsra_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsra_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsra.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvsra_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsra_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsra.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvsra_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsra_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsra.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsrai_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvsrai_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrai.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsrai_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvsrai_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrai.h $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsrai_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvsrai_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrai.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsrai_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvsrai_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrai.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sran.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sran.ll new file mode 100644 index 0000000000000000000000000000000000000000..f59ae4c196621ffe7387099af08df45c0ed72fdc --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sran.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16>, <16 x i16>) + +define <32 x i8> @lasx_xvsran_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsran_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsran.b.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> %va, <16 x i16> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32>, <8 x i32>) + +define <16 x i16> @lasx_xvsran_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsran_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsran.h.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> %va, <8 x i32> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64>, <4 x i64>) + +define <8 x i32> @lasx_xvsran_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsran_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsran.w.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> %va, <4 x i64> %vb) + ret <8 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..bedbfc4889d202df9484280cfb74f1cf9c0bd899 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvsrani_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrani.b.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvsrani_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrani.b.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvsrani_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrani.h.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvsrani_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrani.h.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvsrani_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrani.w.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvsrani_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrani.w.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvsrani_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrani.d.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvsrani_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrani.d.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..3c17f2b6090a9bd2af5eb0c3859a2579c1eb8804 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvsrani_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvsrani_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvsrani_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvsrani_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani.ll new file mode 100644 index 0000000000000000000000000000000000000000..91fb90da9c525bb0d43c964976895f52a95e6438 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvsrani_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrani_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrani.b.h $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvsrani_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrani_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrani.h.w $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvsrani_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrani_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrani.w.d $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvsrani_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrani_d_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrani.d.q $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..e417e3cc5bbfef497d4223fb8ea74ee24ae7a451 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsrari_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrari.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvsrari_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrari.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> %va, i32 8) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsrari_h_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrari.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvsrari_h_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrari.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> %va, i32 16) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsrari_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrari.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvsrari_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrari.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> %va, i32 32) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsrari_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrari.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvsrari_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrari.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> %va, i32 64) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..15fed7966f1c22f11f529184b108bc006269c148 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsrari_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsrari_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsrari_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsrari_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar.ll new file mode 100644 index 0000000000000000000000000000000000000000..e2c160557c4dc05d0eae7d6ba23f83c88ac57c04 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvsrar_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrar_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrar.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvsrar_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrar_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrar.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvsrar_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrar_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrar.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvsrar_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrar_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrar.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsrari_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvsrari_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrari.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsrari_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvsrari_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrari.h $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsrari_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvsrari_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrari.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsrari_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvsrari_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrari.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarn.ll new file mode 100644 index 0000000000000000000000000000000000000000..02dd989773ca11ca7c595b3da4f3033a78caf930 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarn.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16>, <16 x i16>) + +define <32 x i8> @lasx_xvsrarn_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrarn_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrarn.b.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> %va, <16 x i16> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32>, <8 x i32>) + +define <16 x i16> @lasx_xvsrarn_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrarn_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrarn.h.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> %va, <8 x i32> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64>, <4 x i64>) + +define <8 x i32> @lasx_xvsrarn_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrarn_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrarn.w.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> %va, <4 x i64> %vb) + ret <8 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..83e977827e2d0eb5d707d7f0f0f437dc89bf7414 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvsrarni_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrarni.b.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvsrarni_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrarni.b.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvsrarni_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrarni.h.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvsrarni_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrarni.h.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvsrarni_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrarni.w.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvsrarni_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrarni.w.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvsrarni_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrarni.d.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvsrarni_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrarni.d.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..eb577a29fb33b5dc60af02e9d08d6ab4eac07b91 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvsrarni_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvsrarni_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvsrarni_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvsrarni_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni.ll new file mode 100644 index 0000000000000000000000000000000000000000..a7d2c37397936c0d72524d0d929f98eb4c8fb034 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvsrarni_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrarni_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrarni.b.h $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvsrarni_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrarni_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrarni.h.w $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvsrarni_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrarni_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrarni.w.d $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvsrarni_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrarni_d_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrarni.d.q $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..3ab02dcb97edd8eb0077a556243d6ae190494308 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsrli_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrli.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvsrli_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrli.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> %va, i32 8) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsrli_h_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrli.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvsrli_h_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrli.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> %va, i32 16) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsrli_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrli.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvsrli_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrli.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> %va, i32 32) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsrli_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrli.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvsrli_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrli.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> %va, i32 64) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..bc085aeaa232a06c813fdf3082db505c2c653f74 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsrli_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsrli_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsrli_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsrli_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl.ll new file mode 100644 index 0000000000000000000000000000000000000000..7b2992f2ca3bca6e3f52265f5ad7935c26559b1e --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvsrl_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrl_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrl.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvsrl_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrl_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrl.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvsrl_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrl_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrl.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvsrl_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrl_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrl.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsrli_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvsrli_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsrli_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvsrli_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrli.h $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsrli_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvsrli_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrli.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsrli_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvsrli_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrli.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srln.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srln.ll new file mode 100644 index 0000000000000000000000000000000000000000..dc5c0e016ea0a7e83f07115093d4f8447355b779 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srln.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16>, <16 x i16>) + +define <32 x i8> @lasx_xvsrln_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrln_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrln.b.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> %va, <16 x i16> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32>, <8 x i32>) + +define <16 x i16> @lasx_xvsrln_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrln_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrln.h.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> %va, <8 x i32> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64>, <4 x i64>) + +define <8 x i32> @lasx_xvsrln_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrln_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrln.w.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> %va, <4 x i64> %vb) + ret <8 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..9e7c94305630b51a371aedde0c40ee9d777e95fd --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvsrlni_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlni.b.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvsrlni_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlni.b.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvsrlni_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlni.h.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvsrlni_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlni.h.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvsrlni_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlni.w.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvsrlni_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlni.w.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvsrlni_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlni.d.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvsrlni_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlni.d.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..66d8004700034e41e9430cefd15b6aa24baa89f2 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvsrlni_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvsrlni_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvsrlni_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvsrlni_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni.ll new file mode 100644 index 0000000000000000000000000000000000000000..0301ebb195e266b177770cce420650b7fec1afb1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvsrlni_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrlni_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlni.b.h $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvsrlni_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrlni_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlni.h.w $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvsrlni_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrlni_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlni.w.d $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvsrlni_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrlni_d_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlni.d.q $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..52621ddc6f49a37a91e4cd283e5b1379d50f350c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsrlri_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlri.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvsrlri_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlri.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> %va, i32 8) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsrlri_h_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlri.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvsrlri_h_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlri.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> %va, i32 16) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsrlri_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlri.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvsrlri_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlri.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> %va, i32 32) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsrlri_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlri.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvsrlri_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlri.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> %va, i32 64) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..5663e3475b1224a58bb27187d39c602237f9d6c4 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsrlri_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsrlri_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsrlri_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsrlri_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr.ll new file mode 100644 index 0000000000000000000000000000000000000000..e04504158e27463ec8b1cf003d60ea5900e3c116 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvsrlr_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrlr_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlr.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvsrlr_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrlr_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlr.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvsrlr_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrlr_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlr.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvsrlr_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrlr_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlr.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsrlri_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvsrlri_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlri.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsrlri_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvsrlri_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlri.h $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsrlri_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvsrlri_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlri.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsrlri_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvsrlri_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlri.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrn.ll new file mode 100644 index 0000000000000000000000000000000000000000..1e7df379c6e1e007cd8a80e4f5bc3426daef8c3b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrn.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16>, <16 x i16>) + +define <32 x i8> @lasx_xvsrlrn_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrlrn_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlrn.b.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> %va, <16 x i16> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32>, <8 x i32>) + +define <16 x i16> @lasx_xvsrlrn_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrlrn_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlrn.h.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> %va, <8 x i32> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64>, <4 x i64>) + +define <8 x i32> @lasx_xvsrlrn_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrlrn_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlrn.w.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> %va, <4 x i64> %vb) + ret <8 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..2d65a75b175a3d6cd6292148dae4931da4b61982 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvsrlrni_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlrni.b.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvsrlrni_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlrni.b.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvsrlrni_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlrni.h.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvsrlrni_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlrni.h.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvsrlrni_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlrni.w.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvsrlrni_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlrni.w.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvsrlrni_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlrni.d.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvsrlrni_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlrni.d.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..82da0d21d013e0b9fc5fa49db3f41b68c775a702 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvsrlrni_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvsrlrni_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvsrlrni_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvsrlrni_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni.ll new file mode 100644 index 0000000000000000000000000000000000000000..56dbafe8b1ac38d1850d202eef91c3c446a41942 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvsrlrni_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrlrni_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlrni.b.h $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvsrlrni_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrlrni_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlrni.h.w $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvsrlrni_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrlrni_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlrni.w.d $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvsrlrni_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrlrni_d_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlrni.d.q $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssran.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssran.ll new file mode 100644 index 0000000000000000000000000000000000000000..da1857dad14512d3bd549fa84d431b2975c97987 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssran.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16>, <16 x i16>) + +define <32 x i8> @lasx_xvssran_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvssran_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssran.b.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> %va, <16 x i16> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32>, <8 x i32>) + +define <16 x i16> @lasx_xvssran_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvssran_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssran.h.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> %va, <8 x i32> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64>, <4 x i64>) + +define <8 x i32> @lasx_xvssran_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvssran_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssran.w.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> %va, <4 x i64> %vb) + ret <8 x i32> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16>, <16 x i16>) + +define <32 x i8> @lasx_xvssran_bu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvssran_bu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssran.bu.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> %va, <16 x i16> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32>, <8 x i32>) + +define <16 x i16> @lasx_xvssran_hu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvssran_hu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssran.hu.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> %va, <8 x i32> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64>, <4 x i64>) + +define <8 x i32> @lasx_xvssran_wu_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvssran_wu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssran.wu.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> %va, <4 x i64> %vb) + ret <8 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..e10d5d7bd4882c35c039118685db85ff22e4c850 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrani_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrani.b.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvssrani_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrani.b.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrani_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrani.h.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvssrani_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrani.h.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrani_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrani.w.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvssrani_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrani.w.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrani_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrani.d.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvssrani_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrani.d.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrani_bu_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrani.bu.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvssrani_bu_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrani.bu.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 16) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrani_hu_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrani.hu.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvssrani_hu_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrani.hu.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 32) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrani_wu_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrani.wu.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvssrani_wu_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrani.wu.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 64) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrani_du_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrani.du.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvssrani_du_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrani.du.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> %va, <4 x i64> %vb, i32 128) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..a928cc2de8c81818e127aecbb38b4777268ea324 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrani_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrani_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrani_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrani_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrani_bu_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrani_hu_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrani_wu_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrani_du_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani.ll new file mode 100644 index 0000000000000000000000000000000000000000..9efa659b4a1e0f26b1c62c914fc2fd9fea673234 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrani_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrani_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrani.b.h $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrani_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrani_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrani.h.w $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrani_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrani_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrani.w.d $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrani_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrani_d_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrani.d.q $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrani_bu_h(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrani_bu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrani.bu.h $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrani_hu_w(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrani_hu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrani.hu.w $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrani_wu_d(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrani_wu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrani.wu.d $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrani_du_q(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrani_du_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrani.du.q $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> %va, <4 x i64> %vb, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarn.ll new file mode 100644 index 0000000000000000000000000000000000000000..b5d59ff06f4d140ac86706079ee42b6b93aa1152 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarn.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16>, <16 x i16>) + +define <32 x i8> @lasx_xvssrarn_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrarn_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrarn.b.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> %va, <16 x i16> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32>, <8 x i32>) + +define <16 x i16> @lasx_xvssrarn_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrarn_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrarn.h.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> %va, <8 x i32> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64>, <4 x i64>) + +define <8 x i32> @lasx_xvssrarn_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrarn_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrarn.w.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> %va, <4 x i64> %vb) + ret <8 x i32> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16>, <16 x i16>) + +define <32 x i8> @lasx_xvssrarn_bu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrarn_bu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrarn.bu.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> %va, <16 x i16> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32>, <8 x i32>) + +define <16 x i16> @lasx_xvssrarn_hu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrarn_hu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrarn.hu.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> %va, <8 x i32> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64>, <4 x i64>) + +define <8 x i32> @lasx_xvssrarn_wu_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrarn_wu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrarn.wu.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> %va, <4 x i64> %vb) + ret <8 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..42cd6ac99754e3a21f46d65a14fbae74c30da1c1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrarni_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrarni.b.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvssrarni_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrarni.b.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrarni_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrarni.h.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvssrarni_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrarni.h.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrarni_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrarni.w.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvssrarni_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrarni.w.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrarni_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrarni.d.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvssrarni_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrarni.d.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrarni_bu_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrarni.bu.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvssrarni_bu_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrarni.bu.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 16) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrarni_hu_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrarni.hu.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvssrarni_hu_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrarni.hu.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 32) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrarni_wu_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrarni.wu.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvssrarni_wu_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrarni.wu.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 64) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrarni_du_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrarni.du.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvssrarni_du_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrarni.du.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 128) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..f050e7d79b0f50887ab6a9436d7d2a4f55da202b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrarni_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrarni_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrarni_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrarni_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrarni_bu_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrarni_hu_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrarni_wu_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrarni_du_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni.ll new file mode 100644 index 0000000000000000000000000000000000000000..da411dad645bbdd61a4486781866df4a1f6522a3 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrarni_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrarni_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrarni.b.h $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrarni_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrarni_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrarni.h.w $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrarni_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrarni_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrarni.w.d $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrarni_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrarni_d_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrarni.d.q $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrarni_bu_h(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrarni_bu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrarni.bu.h $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrarni_hu_w(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrarni_hu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrarni.hu.w $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrarni_wu_d(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrarni_wu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrarni.wu.d $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrarni_du_q(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrarni_du_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrarni.du.q $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrln.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrln.ll new file mode 100644 index 0000000000000000000000000000000000000000..c60b5bdf81a03a001528d2b970c74cc55befefdb --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrln.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16>, <16 x i16>) + +define <32 x i8> @lasx_xvssrln_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrln_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrln.b.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> %va, <16 x i16> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32>, <8 x i32>) + +define <16 x i16> @lasx_xvssrln_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrln_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrln.h.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> %va, <8 x i32> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64>, <4 x i64>) + +define <8 x i32> @lasx_xvssrln_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrln_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrln.w.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> %va, <4 x i64> %vb) + ret <8 x i32> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16>, <16 x i16>) + +define <32 x i8> @lasx_xvssrln_bu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrln_bu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrln.bu.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> %va, <16 x i16> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32>, <8 x i32>) + +define <16 x i16> @lasx_xvssrln_hu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrln_hu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrln.hu.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> %va, <8 x i32> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64>, <4 x i64>) + +define <8 x i32> @lasx_xvssrln_wu_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrln_wu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrln.wu.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> %va, <4 x i64> %vb) + ret <8 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..26be21a83aa4da6e7f77094f3bb0d38639251cf8 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrlni_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlni.b.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvssrlni_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlni.b.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrlni_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlni.h.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvssrlni_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlni.h.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrlni_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlni.w.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvssrlni_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlni.w.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrlni_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlni.d.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvssrlni_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlni.d.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrlni_bu_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlni.bu.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvssrlni_bu_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlni.bu.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 16) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrlni_hu_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlni.hu.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvssrlni_hu_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlni.hu.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 32) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrlni_wu_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlni.wu.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvssrlni_wu_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlni.wu.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 64) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrlni_du_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlni.du.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvssrlni_du_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlni.du.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 128) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..72da2a746dd5d69106b7c33bc5a7d79f3340a3af --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrlni_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrlni_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrlni_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrlni_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrlni_bu_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrlni_hu_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrlni_wu_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrlni_du_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni.ll new file mode 100644 index 0000000000000000000000000000000000000000..e57dd426bde8ce0eac17b7ee58f20c1e5983a99c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrlni_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlni_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlni.b.h $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrlni_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlni_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlni.h.w $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrlni_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlni_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlni.w.d $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrlni_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlni_d_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlni.d.q $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrlni_bu_h(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlni_bu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlni.bu.h $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrlni_hu_w(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlni_hu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlni.hu.w $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrlni_wu_d(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlni_wu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlni.wu.d $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrlni_du_q(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlni_du_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlni.du.q $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrn.ll new file mode 100644 index 0000000000000000000000000000000000000000..774cf1bd5e84977362ee25eb7f7c731c3e68e890 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrn.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16>, <16 x i16>) + +define <32 x i8> @lasx_xvssrlrn_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlrn_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlrn.b.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> %va, <16 x i16> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32>, <8 x i32>) + +define <16 x i16> @lasx_xvssrlrn_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlrn_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlrn.h.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> %va, <8 x i32> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64>, <4 x i64>) + +define <8 x i32> @lasx_xvssrlrn_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlrn_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlrn.w.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> %va, <4 x i64> %vb) + ret <8 x i32> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16>, <16 x i16>) + +define <32 x i8> @lasx_xvssrlrn_bu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlrn_bu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlrn.bu.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> %va, <16 x i16> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32>, <8 x i32>) + +define <16 x i16> @lasx_xvssrlrn_hu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlrn_hu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlrn.hu.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> %va, <8 x i32> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64>, <4 x i64>) + +define <8 x i32> @lasx_xvssrlrn_wu_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlrn_wu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlrn.wu.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> %va, <4 x i64> %vb) + ret <8 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..cd778e2c0627d2a2c2f1856b82705f4a562b047c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrlrni_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlrni.b.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvssrlrni_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlrni.b.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrlrni_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlrni.h.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvssrlrni_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlrni.h.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrlrni_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlrni.w.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvssrlrni_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlrni.w.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrlrni_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlrni.d.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvssrlrni_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlrni.d.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrlrni_bu_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlrni.bu.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvssrlrni_bu_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlrni.bu.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 16) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrlrni_hu_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlrni.hu.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvssrlrni_hu_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlrni.hu.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 32) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrlrni_wu_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlrni.wu.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvssrlrni_wu_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlrni.wu.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 64) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrlrni_du_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlrni.du.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvssrlrni_du_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlrni.du.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 128) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..a10c543291499f13896ed6903c6b3d97a718551f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrlrni_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrlrni_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrlrni_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrlrni_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrlrni_bu_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrlrni_hu_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrlrni_wu_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrlrni_du_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni.ll new file mode 100644 index 0000000000000000000000000000000000000000..9a80516d8d7838e178e1b9eed629283e18dddb1f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrlrni_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlrni_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlrni.b.h $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrlrni_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlrni_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlrni.h.w $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrlrni_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlrni_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlrni.w.d $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrlrni_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlrni_d_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlrni.d.q $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrlrni_bu_h(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlrni_bu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlrni.bu.h $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrlrni_hu_w(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlrni_hu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlrni.hu.w $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrlrni_wu_d(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlrni_wu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlrni.wu.d $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrlrni_du_q(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlrni_du_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlrni.du.q $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssub.ll new file mode 100644 index 0000000000000000000000000000000000000000..cd3ccd9f52625048fa1e7d1441700dc6e323694f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssub.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvssub_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvssub_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssub.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvssub_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvssub_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssub.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvssub_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvssub_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssub.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvssub_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvssub_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssub.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvssub_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvssub_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssub.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvssub_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvssub_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssub.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvssub_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvssub_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssub.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvssub_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvssub_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssub.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..0177f2b77b939f5490e20e3b06aa00b36b0b475a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare void @llvm.loongarch.lasx.xvst(<32 x i8>, i8*, i32) + +define void @lasx_xvst_lo(<32 x i8> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvst: argument out of range +entry: + call void @llvm.loongarch.lasx.xvst(<32 x i8> %va, i8* %p, i32 -2049) + ret void +} + +define void @lasx_xvst_hi(<32 x i8> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvst: argument out of range +entry: + call void @llvm.loongarch.lasx.xvst(<32 x i8> %va, i8* %p, i32 2048) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..c19207aad6b8cb30dd2dcba85442e1eb49bafa07 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare void @llvm.loongarch.lasx.xvst(<32 x i8>, i8*, i32) + +define void @lasx_xvst(<32 x i8> %va, i8* %p, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.lasx.xvst(<32 x i8> %va, i8* %p, i32 %b) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st.ll new file mode 100644 index 0000000000000000000000000000000000000000..b69e7b813f0c1d65d0c7ac5a1323b34e0415b2f7 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare void @llvm.loongarch.lasx.xvst(<32 x i8>, i8*, i32) + +define void @lasx_xvst(<32 x i8> %va, i8* %p) nounwind { +; CHECK-LABEL: lasx_xvst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvst $xr0, $a0, 1 +; CHECK-NEXT: ret +entry: + call void @llvm.loongarch.lasx.xvst(<32 x i8> %va, i8* %p, i32 1) + ret void +} + +declare void @llvm.loongarch.lasx.xvstx(<32 x i8>, i8*, i64) + +define void @lasx_xvstx(<32 x i8> %va, i8* %p) nounwind { +; CHECK-LABEL: lasx_xvstx: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ori $a1, $zero, 1 +; CHECK-NEXT: xvstx $xr0, $a0, $a1 +; CHECK-NEXT: ret +entry: + call void @llvm.loongarch.lasx.xvstx(<32 x i8> %va, i8* %p, i64 1) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..0ea2484e090df094460a5d0fd279384617156b34 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-invalid-imm.ll @@ -0,0 +1,121 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare void @llvm.loongarch.lasx.xvstelm.b(<32 x i8>, i8*, i32, i32) + +define void @lasx_xvstelm_b_lo(<32 x i8> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvstelm.b: argument out of range +entry: + call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 -129, i32 1) + ret void +} + +define void @lasx_xvstelm_b_hi(<32 x i8> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvstelm.b: argument out of range +entry: + call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 128, i32 1) + ret void +} + +define void @lasx_xvstelm_b_idx_lo(<32 x i8> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvstelm.b: argument out of range +entry: + call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 1, i32 -1) + ret void +} + +define void @lasx_xvstelm_b_idx_hi(<32 x i8> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvstelm.b: argument out of range +entry: + call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 1, i32 32) + ret void +} + +declare void @llvm.loongarch.lasx.xvstelm.h(<16 x i16>, i8*, i32, i32) + +define void @lasx_xvstelm_h_lo(<16 x i16> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvstelm.h: argument out of range or not a multiple of 2. +entry: + call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 -258, i32 1) + ret void +} + +define void @lasx_xvstelm_h_hi(<16 x i16> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvstelm.h: argument out of range or not a multiple of 2. +entry: + call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 256, i32 1) + ret void +} + +define void @lasx_xvstelm_h_idx_lo(<16 x i16> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvstelm.h: argument out of range or not a multiple of 2. +entry: + call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 2, i32 -1) + ret void +} + +define void @lasx_xvstelm_h_idx_hi(<16 x i16> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvstelm.h: argument out of range or not a multiple of 2. +entry: + call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 2, i32 16) + ret void +} + +declare void @llvm.loongarch.lasx.xvstelm.w(<8 x i32>, i8*, i32, i32) + +define void @lasx_xvstelm_w_lo(<8 x i32> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvstelm.w: argument out of range or not a multiple of 4. +entry: + call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 -516, i32 1) + ret void +} + +define void @lasx_xvstelm_w_hi(<8 x i32> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvstelm.w: argument out of range or not a multiple of 4. +entry: + call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 512, i32 1) + ret void +} + +define void @lasx_xvstelm_w_idx_lo(<8 x i32> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvstelm.w: argument out of range or not a multiple of 4. +entry: + call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 4, i32 -1) + ret void +} + +define void @lasx_xvstelm_w_idx_hi(<8 x i32> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvstelm.w: argument out of range or not a multiple of 4. +entry: + call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 4, i32 8) + ret void +} + +declare void @llvm.loongarch.lasx.xvstelm.d(<4 x i64>, i8*, i32, i32) + +define void @lasx_xvstelm_d_lo(<4 x i64> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvstelm.d: argument out of range or not a multiple of 8. +entry: + call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 -1032, i32 1) + ret void +} + +define void @lasx_xvstelm_d_hi(<4 x i64> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvstelm.d: argument out of range or not a multiple of 8. +entry: + call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 1024, i32 1) + ret void +} + +define void @lasx_xvstelm_d_idx_lo(<4 x i64> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvstelm.d: argument out of range or not a multiple of 8. +entry: + call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 8, i32 -1) + ret void +} + +define void @lasx_xvstelm_d_idx_hi(<4 x i64> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvstelm.d: argument out of range or not a multiple of 8. +entry: + call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 8, i32 4) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..42c7c0da174696ca381b88672b2d16036d667076 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-non-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare void @llvm.loongarch.lasx.xvstelm.b(<32 x i8>, i8*, i32, i32) + +define void @lasx_xvstelm_b(<32 x i8> %va, i8* %p, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 %b, i32 1) + ret void +} + +define void @lasx_xvstelm_b_idx(<32 x i8> %va, i8* %p, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 1, i32 %b) + ret void +} + +declare void @llvm.loongarch.lasx.xvstelm.h(<16 x i16>, i8*, i32, i32) + +define void @lasx_xvstelm_h(<16 x i16> %va, i8* %p, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 %b, i32 1) + ret void +} + +define void @lasx_xvstelm_h_idx(<16 x i16> %va, i8* %p, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 2, i32 %b) + ret void +} + +declare void @llvm.loongarch.lasx.xvstelm.w(<8 x i32>, i8*, i32, i32) + +define void @lasx_xvstelm_w(<8 x i32> %va, i8* %p, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 %b, i32 1) + ret void +} + +define void @lasx_xvstelm_w_idx(<8 x i32> %va, i8* %p, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 4, i32 %b) + ret void +} + +declare void @llvm.loongarch.lasx.xvstelm.d(<4 x i64>, i8*, i32, i32) + +define void @lasx_xvstelm_d(<4 x i64> %va, i8* %p, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 %b, i32 1) + ret void +} + +define void @lasx_xvstelm_d_idx(<4 x i64> %va, i8* %p, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 8, i32 %b) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm.ll new file mode 100644 index 0000000000000000000000000000000000000000..52ef3c4714127d4c2d25a61e9ca02caa859645b1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare void @llvm.loongarch.lasx.xvstelm.b(<32 x i8>, i8*, i32, i32) + +define void @lasx_xvstelm_b(<32 x i8> %va, i8* %p) nounwind { +; CHECK-LABEL: lasx_xvstelm_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvstelm.b $xr0, $a0, 1, 1 +; CHECK-NEXT: ret +entry: + call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 1, i32 1) + ret void +} + +declare void @llvm.loongarch.lasx.xvstelm.h(<16 x i16>, i8*, i32, i32) + +define void @lasx_xvstelm_h(<16 x i16> %va, i8* %p) nounwind { +; CHECK-LABEL: lasx_xvstelm_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvstelm.h $xr0, $a0, 2, 1 +; CHECK-NEXT: ret +entry: + call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 2, i32 1) + ret void +} + +declare void @llvm.loongarch.lasx.xvstelm.w(<8 x i32>, i8*, i32, i32) + +define void @lasx_xvstelm_w(<8 x i32> %va, i8* %p) nounwind { +; CHECK-LABEL: lasx_xvstelm_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvstelm.w $xr0, $a0, 4, 1 +; CHECK-NEXT: ret +entry: + call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 4, i32 1) + ret void +} + +declare void @llvm.loongarch.lasx.xvstelm.d(<4 x i64>, i8*, i32, i32) + +define void @lasx_xvstelm_d(<4 x i64> %va, i8* %p) nounwind { +; CHECK-LABEL: lasx_xvstelm_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvstelm.d $xr0, $a0, 8, 1 +; CHECK-NEXT: ret +entry: + call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 8, i32 1) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sub.ll new file mode 100644 index 0000000000000000000000000000000000000000..4d69dd83dcde7fdfc7e11cedce17d059f238fecc --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sub.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvsub_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsub_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsub.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvsub_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsub_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsub.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvsub_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsub_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsub.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvsub_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsub_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsub.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvsub_q(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsub_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsub.q $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..810008c17f7e5b424e59f2eeb5c15ac2b9f36099 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsubi_bu_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsubi.bu: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvsubi_bu_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsubi.bu: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> %va, i32 32) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsubi_hu_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsubi.hu: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvsubi_hu_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsubi.hu: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> %va, i32 32) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsubi_wu_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsubi.wu: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvsubi_wu_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsubi.wu: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> %va, i32 32) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsubi_du_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsubi.du: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvsubi_du_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsubi.du: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> %va, i32 32) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..924b89ce9d6c423b7b84781157efd9122933800b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsubi_bu(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsubi_hu(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsubi_wu(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsubi_du(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi.ll new file mode 100644 index 0000000000000000000000000000000000000000..cc3235ff4657d4e484eff3ef85763cbeaf933815 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsubi_bu(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvsubi_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubi.bu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsubi_hu(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvsubi_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubi.hu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsubi_wu(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvsubi_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubi.wu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsubi_du(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvsubi_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubi.du $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subw.ll new file mode 100644 index 0000000000000000000000000000000000000000..6f203e8949900beaa3d91cb8991609db99c4ac45 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subw.ll @@ -0,0 +1,194 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvsubwev_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsubwev_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwev.h.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvsubwev_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsubwev_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwev.w.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvsubwev_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsubwev_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwev.d.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvsubwev_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsubwev_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwev.q.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvsubwev_h_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsubwev_h_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwev.h.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvsubwev_w_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsubwev_w_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwev.w.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvsubwev_d_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsubwev_d_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwev.d.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvsubwev_q_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsubwev_q_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwev.q.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvsubwod_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsubwod_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwod.h.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvsubwod_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsubwod_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwod.w.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvsubwod_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsubwod_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwod.d.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvsubwod_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsubwod_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwod.q.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvsubwod_h_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsubwod_h_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwod.h.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvsubwod_w_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsubwod_w_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwod.w.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvsubwod_d_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsubwod_d_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwod.d.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvsubwod_q_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsubwod_q_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwod.q.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xor.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xor.ll new file mode 100644 index 0000000000000000000000000000000000000000..6395b3d6f2e7a8c0aafb2eed863314789126b9c3 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xor.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvxor_v(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvxor_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..0170d204cf425bbe61cd8a6f8197bb0afdc814a4 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvxori_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvxori.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvxori_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvxori.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> %va, i32 256) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..1478f691a1cc6e3c8a3e0e65859fc9198f880b65 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvxori_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori.ll new file mode 100644 index 0000000000000000000000000000000000000000..c71d7e7311656c988f8baa27e92a6b0f78cbf3f6 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvxori_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvxori_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvxori.b $xr0, $xr0, 3 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> %va, i32 3) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll new file mode 100644 index 0000000000000000000000000000000000000000..8e4d0dc6f1c380f9163c6895a2f96781b58e35ff --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @add_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: add_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvadd.b $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %v2 = add <32 x i8> %v0, %v1 + store <32 x i8> %v2, ptr %res + ret void +} + +define void @add_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: add_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvadd.h $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %v2 = add <16 x i16> %v0, %v1 + store <16 x i16> %v2, ptr %res + ret void +} + +define void @add_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: add_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvadd.w $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %v2 = add <8 x i32> %v0, %v1 + store <8 x i32> %v2, ptr %res + ret void +} + +define void @add_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: add_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvadd.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %v2 = add <4 x i64> %v0, %v1 + store <4 x i64> %v2, ptr %res + ret void +} + +define void @add_v32i8_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: add_v32i8_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvaddi.bu $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = add <32 x i8> %v0, + store <32 x i8> %v1, ptr %res + ret void +} + +define void @add_v16i16_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: add_v16i16_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvaddi.hu $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = add <16 x i16> %v0, + store <16 x i16> %v1, ptr %res + ret void +} + +define void @add_v8i32_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: add_v8i32_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvaddi.wu $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = add <8 x i32> %v0, + store <8 x i32> %v1, ptr %res + ret void +} + +define void @add_v4i64_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: add_v4i64_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvaddi.du $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = add <4 x i64> %v0, + store <4 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll new file mode 100644 index 0000000000000000000000000000000000000000..98c87cadeeb5a0cc4e6efd56a071819e78a455f7 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll @@ -0,0 +1,125 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @and_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: and_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvand.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %v2 = and <32 x i8> %v0, %v1 + store <32 x i8> %v2, ptr %res + ret void +} + +define void @and_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: and_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvand.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %v2 = and <16 x i16> %v0, %v1 + store <16 x i16> %v2, ptr %res + ret void +} + +define void @and_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: and_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvand.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %v2 = and <8 x i32> %v0, %v1 + store <8 x i32> %v2, ptr %res + ret void +} + +define void @and_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: and_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvand.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %v2 = and <4 x i64> %v0, %v1 + store <4 x i64> %v2, ptr %res + ret void +} + +define void @and_u_v32i8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: and_u_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvandi.b $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = and <32 x i8> %v0, + store <32 x i8> %v1, ptr %res + ret void +} + +define void @and_u_v16i16(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: and_u_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrepli.h $xr1, 31 +; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = and <16 x i16> %v0, + store <16 x i16> %v1, ptr %res + ret void +} + +define void @and_u_v8i32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: and_u_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrepli.w $xr1, 31 +; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = and <8 x i32> %v0, + store <8 x i32> %v1, ptr %res + ret void +} + +define void @and_u_v4i64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: and_u_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrepli.d $xr1, 31 +; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = and <4 x i64> %v0, + store <4 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ashr.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ashr.ll new file mode 100644 index 0000000000000000000000000000000000000000..fcbf0f1400fe61fcde0ca8908ae89ab654ed19f9 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ashr.ll @@ -0,0 +1,178 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @ashr_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: ashr_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsra.b $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %v2 = ashr <32 x i8> %v0, %v1 + store <32 x i8> %v2, ptr %res + ret void +} + +define void @ashr_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: ashr_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsra.h $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %v2 = ashr <16 x i16> %v0, %v1 + store <16 x i16> %v2, ptr %res + ret void +} + +define void @ashr_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: ashr_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsra.w $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %v2 = ashr <8 x i32> %v0, %v1 + store <8 x i32> %v2, ptr %res + ret void +} + +define void @ashr_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: ashr_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsra.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %v2 = ashr <4 x i64> %v0, %v1 + store <4 x i64> %v2, ptr %res + ret void +} + +define void @ashr_v32i8_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ashr_v32i8_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrai.b $xr0, $xr0, 1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = ashr <32 x i8> %v0, + store <32 x i8> %v1, ptr %res + ret void +} + +define void @ashr_v32i8_7(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ashr_v32i8_7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrai.b $xr0, $xr0, 7 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = ashr <32 x i8> %v0, + store <32 x i8> %v1, ptr %res + ret void +} + +define void @ashr_v16i16_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ashr_v16i16_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrai.h $xr0, $xr0, 1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = ashr <16 x i16> %v0, + store <16 x i16> %v1, ptr %res + ret void +} + +define void @ashr_v16i16_15(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ashr_v16i16_15: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrai.h $xr0, $xr0, 15 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = ashr <16 x i16> %v0, + store <16 x i16> %v1, ptr %res + ret void +} + +define void @ashr_v8i32_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ashr_v8i32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrai.w $xr0, $xr0, 1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = ashr <8 x i32> %v0, + store <8 x i32> %v1, ptr %res + ret void +} + +define void @ashr_v8i32_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ashr_v8i32_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrai.w $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = ashr <8 x i32> %v0, + store <8 x i32> %v1, ptr %res + ret void +} + +define void @ashr_v4i64_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ashr_v4i64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrai.d $xr0, $xr0, 1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = ashr <4 x i64> %v0, + store <4 x i64> %v1, ptr %res + ret void +} + +define void @ashr_v4i64_63(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ashr_v4i64_63: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrai.d $xr0, $xr0, 63 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = ashr <4 x i64> %v0, + store <4 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll new file mode 100644 index 0000000000000000000000000000000000000000..02b76bf75b75393b496eca32a935744fdcc0a543 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll @@ -0,0 +1,238 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @extract_32xi8(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: extract_32xi8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1 +; CHECK-NEXT: st.b $a0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <32 x i8>, ptr %src + %e = extractelement <32 x i8> %v, i32 1 + store i8 %e, ptr %dst + ret void +} + +define void @extract_16xi16(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: extract_16xi16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1 +; CHECK-NEXT: st.h $a0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <16 x i16>, ptr %src + %e = extractelement <16 x i16> %v, i32 1 + store i16 %e, ptr %dst + ret void +} + +define void @extract_8xi32(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: extract_8xi32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1 +; CHECK-NEXT: st.w $a0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <8 x i32>, ptr %src + %e = extractelement <8 x i32> %v, i32 1 + store i32 %e, ptr %dst + ret void +} + +define void @extract_4xi64(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: extract_4xi64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1 +; CHECK-NEXT: st.d $a0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <4 x i64>, ptr %src + %e = extractelement <4 x i64> %v, i32 1 + store i64 %e, ptr %dst + ret void +} + +define void @extract_8xfloat(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: extract_8xfloat: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7 +; CHECK-NEXT: movgr2fr.w $fa0, $a0 +; CHECK-NEXT: fst.s $fa0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <8 x float>, ptr %src + %e = extractelement <8 x float> %v, i32 7 + store float %e, ptr %dst + ret void +} + +define void @extract_4xdouble(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: extract_4xdouble: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3 +; CHECK-NEXT: movgr2fr.d $fa0, $a0 +; CHECK-NEXT: fst.d $fa0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <4 x double>, ptr %src + %e = extractelement <4 x double> %v, i32 3 + store double %e, ptr %dst + ret void +} + +define void @extract_32xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind { +; CHECK-LABEL: extract_32xi8_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: srli.d $a3, $sp, 5 +; CHECK-NEXT: slli.d $sp, $a3, 5 +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a2, 4, 0 +; CHECK-NEXT: ld.b $a0, $a0, 0 +; CHECK-NEXT: st.b $a0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret + %v = load volatile <32 x i8>, ptr %src + %e = extractelement <32 x i8> %v, i32 %idx + store i8 %e, ptr %dst + ret void +} + +define void @extract_16xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind { +; CHECK-LABEL: extract_16xi16_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: srli.d $a3, $sp, 5 +; CHECK-NEXT: slli.d $sp, $a3, 5 +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a2, 4, 1 +; CHECK-NEXT: ld.h $a0, $a0, 0 +; CHECK-NEXT: st.h $a0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret + %v = load volatile <16 x i16>, ptr %src + %e = extractelement <16 x i16> %v, i32 %idx + store i16 %e, ptr %dst + ret void +} + +define void @extract_8xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind { +; CHECK-LABEL: extract_8xi32_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: srli.d $a3, $sp, 5 +; CHECK-NEXT: slli.d $sp, $a3, 5 +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2 +; CHECK-NEXT: ld.w $a0, $a0, 0 +; CHECK-NEXT: st.w $a0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret + %v = load volatile <8 x i32>, ptr %src + %e = extractelement <8 x i32> %v, i32 %idx + store i32 %e, ptr %dst + ret void +} + +define void @extract_4xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind { +; CHECK-LABEL: extract_4xi64_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: srli.d $a3, $sp, 5 +; CHECK-NEXT: slli.d $sp, $a3, 5 +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3 +; CHECK-NEXT: ld.d $a0, $a0, 0 +; CHECK-NEXT: st.d $a0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret + %v = load volatile <4 x i64>, ptr %src + %e = extractelement <4 x i64> %v, i32 %idx + store i64 %e, ptr %dst + ret void +} + +define void @extract_8xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind { +; CHECK-LABEL: extract_8xfloat_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: srli.d $a3, $sp, 5 +; CHECK-NEXT: slli.d $sp, $a3, 5 +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2 +; CHECK-NEXT: fld.s $fa0, $a0, 0 +; CHECK-NEXT: fst.s $fa0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret + %v = load volatile <8 x float>, ptr %src + %e = extractelement <8 x float> %v, i32 %idx + store float %e, ptr %dst + ret void +} + +define void @extract_4xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind { +; CHECK-LABEL: extract_4xdouble_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: srli.d $a3, $sp, 5 +; CHECK-NEXT: slli.d $sp, $a3, 5 +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3 +; CHECK-NEXT: fld.d $fa0, $a0, 0 +; CHECK-NEXT: fst.d $fa0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret + %v = load volatile <4 x double>, ptr %src + %e = extractelement <4 x double> %v, i32 %idx + store double %e, ptr %dst + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fadd.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fadd.ll new file mode 100644 index 0000000000000000000000000000000000000000..365bb305fc5aaa30c7ae9444257a13211680ce5a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fadd.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @fadd_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: fadd_v8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfadd.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = fadd <8 x float> %v0, %v1 + store <8 x float> %v2, ptr %res + ret void +} + +define void @fadd_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: fadd_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfadd.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = fadd <4 x double> %v0, %v1 + store <4 x double> %v2, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fcmp.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fcmp.ll new file mode 100644 index 0000000000000000000000000000000000000000..ef67dbc100c04519e364fb9bf4e8d0b83c8ebb8c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fcmp.ll @@ -0,0 +1,692 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +;; TREU +define void @v8f32_fcmp_true(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_true: +; CHECK: # %bb.0: +; CHECK-NEXT: xvrepli.b $xr0, -1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp true <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +;; FALSE +define void @v4f64_fcmp_false(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_false: +; CHECK: # %bb.0: +; CHECK-NEXT: xvrepli.b $xr0, 0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp false <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; SETOEQ +define void @v8f32_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_oeq: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.ceq.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp oeq <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_oeq: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.ceq.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp oeq <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; SETUEQ +define void @v8f32_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_ueq: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cueq.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp ueq <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_ueq: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cueq.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp ueq <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; SETEQ +define void @v8f32_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_eq: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.ceq.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp fast oeq <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_eq: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.ceq.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp fast ueq <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; SETOLE +define void @v8f32_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_ole: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cle.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp ole <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_ole: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cle.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp ole <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; SETULE +define void @v8f32_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_ule: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cule.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp ule <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_ule: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cule.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp ule <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; SETLE +define void @v8f32_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_le: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cle.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp fast ole <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_le: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cle.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp fast ule <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; SETOLT +define void @v8f32_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_olt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp olt <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_olt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp olt <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; SETULT +define void @v8f32_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_ult: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cult.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp ult <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_ult: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cult.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp ult <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; SETLT +define void @v8f32_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_lt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp fast olt <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_lt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp fast ult <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; SETONE +define void @v8f32_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_one: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cne.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp one <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_one: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cne.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp one <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; SETUNE +define void @v8f32_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_une: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cune.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp une <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_une: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cune.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp une <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; SETNE +define void @v8f32_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_ne: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cne.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp fast one <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_ne: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cne.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp fast une <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; SETO +define void @v8f32_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_ord: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cor.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp ord <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_ord: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cor.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp ord <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; SETUO +define void @v8f32_fcmp_uno(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_uno: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cun.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp uno <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_uno(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_uno: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cun.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp uno <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; Expand SETOGT +define void @v8f32_fcmp_ogt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_ogt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp ogt <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_ogt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_ogt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp ogt <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; Expand SETUGT +define void @v8f32_fcmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_ugt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvfcmp.cult.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp ugt <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_ugt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvfcmp.cult.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp ugt <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; Expand SETGT +define void @v8f32_fcmp_gt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_gt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp fast ogt <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_gt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_gt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp fast ugt <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; Expand SETOGE +define void @v8f32_fcmp_oge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_oge: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvfcmp.cle.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp oge <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_oge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_oge: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvfcmp.cle.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp oge <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; Expand SETUGE +define void @v8f32_fcmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_uge: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvfcmp.cule.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp uge <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_uge: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvfcmp.cule.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp uge <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; Expand SETGE +define void @v8f32_fcmp_ge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_ge: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvfcmp.cle.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp fast oge <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_ge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_ge: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvfcmp.cle.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp fast uge <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll new file mode 100644 index 0000000000000000000000000000000000000000..6004565b0b784e5db52b6449d1febae0e9e24ea6 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll @@ -0,0 +1,63 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @fdiv_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: fdiv_v8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfdiv.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = fdiv <8 x float> %v0, %v1 + store <8 x float> %v2, ptr %res + ret void +} + +define void @fdiv_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: fdiv_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfdiv.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = fdiv <4 x double> %v0, %v1 + store <4 x double> %v2, ptr %res + ret void +} + +;; 1.0 / vec +define void @one_fdiv_v8f32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: one_fdiv_v8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvfrecip.s $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %div = fdiv <8 x float> , %v0 + store <8 x float> %div, ptr %res + ret void +} + +define void @one_fdiv_v4f64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: one_fdiv_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvfrecip.d $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %div = fdiv <4 x double> , %v0 + store <4 x double> %div, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fmul.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fmul.ll new file mode 100644 index 0000000000000000000000000000000000000000..a48dca8d284704b28ae1a9e125b76fc07bcc6074 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fmul.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @fmul_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: fmul_v8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfmul.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = fmul <8 x float> %v0, %v1 + store <8 x float> %v2, ptr %res + ret void +} + +define void @fmul_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: fmul_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfmul.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = fmul <4 x double> %v0, %v1 + store <4 x double> %v2, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fneg.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fneg.ll new file mode 100644 index 0000000000000000000000000000000000000000..5eb468fc55a0ec6685ad9524d87615a1b6305bb4 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fneg.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @fneg_v8f32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: fneg_v8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = fneg <8 x float> %v0 + store <8 x float> %v1, ptr %res + ret void +} +define void @fneg_v4f64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: fneg_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = fneg <4 x double> %v0 + store <4 x double> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptosi.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptosi.ll new file mode 100644 index 0000000000000000000000000000000000000000..0d9f57b57ffae3cf6046c20ffa7d804f44975dca --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptosi.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @fptosi_v8f32_v8i32(ptr %res, ptr %in){ +; CHECK-LABEL: fptosi_v8f32_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvftintrz.w.s $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %in + %v1 = fptosi <8 x float> %v0 to <8 x i32> + store <8 x i32> %v1, ptr %res + ret void +} + +define void @fptosi_v4f64_v4i64(ptr %res, ptr %in){ +; CHECK-LABEL: fptosi_v4f64_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvftintrz.l.d $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %in + %v1 = fptosi <4 x double> %v0 to <4 x i64> + store <4 x i64> %v1, ptr %res + ret void +} + +define void @fptosi_v4f64_v4i32(ptr %res, ptr %in){ +; CHECK-LABEL: fptosi_v4f64_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvpermi.d $xr1, $xr0, 238 +; CHECK-NEXT: xvfcvt.s.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvftintrz.w.s $xr0, $xr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %in + %v1 = fptosi <4 x double> %v0 to <4 x i32> + store <4 x i32> %v1, ptr %res + ret void +} + +define void @fptosi_v4f32_v4i64(ptr %res, ptr %in){ +; CHECK-LABEL: fptosi_v4f32_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vftintrz.w.s $vr0, $vr0 +; CHECK-NEXT: vext2xv.d.w $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %in + %v1 = fptosi <4 x float> %v0 to <4 x i64> + store <4 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptoui.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptoui.ll new file mode 100644 index 0000000000000000000000000000000000000000..27d70f33cd34e61fe332a52e31390049ec05beb9 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptoui.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @fptoui_v8f32_v8i32(ptr %res, ptr %in){ +; CHECK-LABEL: fptoui_v8f32_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvftintrz.wu.s $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %in + %v1 = fptoui <8 x float> %v0 to <8 x i32> + store <8 x i32> %v1, ptr %res + ret void +} + +define void @fptoui_v4f64_v4i64(ptr %res, ptr %in){ +; CHECK-LABEL: fptoui_v4f64_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvftintrz.lu.d $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %in + %v1 = fptoui <4 x double> %v0 to <4 x i64> + store <4 x i64> %v1, ptr %res + ret void +} + +define void @fptoui_v4f64_v4i32(ptr %res, ptr %in){ +; CHECK-LABEL: fptoui_v4f64_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvpermi.d $xr1, $xr0, 238 +; CHECK-NEXT: xvfcvt.s.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvftintrz.w.s $xr0, $xr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %in + %v1 = fptoui <4 x double> %v0 to <4 x i32> + store <4 x i32> %v1, ptr %res + ret void +} + +define void @fptoui_v4f32_v4i64(ptr %res, ptr %in){ +; CHECK-LABEL: fptoui_v4f32_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vftintrz.wu.s $vr0, $vr0 +; CHECK-NEXT: vext2xv.du.wu $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %in + %v1 = fptoui <4 x float> %v0 to <4 x i64> + store <4 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fsub.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fsub.ll new file mode 100644 index 0000000000000000000000000000000000000000..6164aa5a55c7e40e1b2c635bb2bc2ccb49854dc1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fsub.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @fsub_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: fsub_v8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfsub.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = fsub <8 x float> %v0, %v1 + store <8 x float> %v2, ptr %res + ret void +} + +define void @fsub_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: fsub_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfsub.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = fsub <4 x double> %v0, %v1 + store <4 x double> %v2, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll new file mode 100644 index 0000000000000000000000000000000000000000..6693fe0f6ec7cec0b0a7bedea15611e4e644081b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll @@ -0,0 +1,939 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +;; SETEQ +define void @v32i8_icmp_eq_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v32i8_icmp_eq_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvseqi.b $xr0, $xr0, 15 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %cmp = icmp eq <32 x i8> %v0, + %ext = sext <32 x i1> %cmp to <32 x i8> + store <32 x i8> %ext, ptr %res + ret void +} + +define void @v32i8_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v32i8_icmp_eq: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvseq.b $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %cmp = icmp eq <32 x i8> %v0, %v1 + %ext = sext <32 x i1> %cmp to <32 x i8> + store <32 x i8> %ext, ptr %res + ret void +} + +define void @v16i16_icmp_eq_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v16i16_icmp_eq_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvseqi.h $xr0, $xr0, 15 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %cmp = icmp eq <16 x i16> %v0, + %ext = sext <16 x i1> %cmp to <16 x i16> + store <16 x i16> %ext, ptr %res + ret void +} + +define void @v16i16_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i16_icmp_eq: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvseq.h $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %cmp = icmp eq <16 x i16> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i16> + store <16 x i16> %ext, ptr %res + ret void +} + +define void @v8i32_icmp_eq_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v8i32_icmp_eq_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvseqi.w $xr0, $xr0, 15 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %cmp = icmp eq <8 x i32> %v0, + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v8i32_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i32_icmp_eq: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvseq.w $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %cmp = icmp eq <8 x i32> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4i64_icmp_eq_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v4i64_icmp_eq_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvseqi.d $xr0, $xr0, 15 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %a0 + %cmp = icmp eq <4 x i64> %v0, + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +define void @v4i64_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i64_icmp_eq: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvseq.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %cmp = icmp eq <4 x i64> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; SETLE +define void @v32i8_icmp_sle_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v32i8_icmp_sle_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslei.b $xr0, $xr0, 15 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %cmp = icmp sle <32 x i8> %v0, + %ext = sext <32 x i1> %cmp to <32 x i8> + store <32 x i8> %ext, ptr %res + ret void +} + +define void @v32i8_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v32i8_icmp_sle: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsle.b $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %cmp = icmp sle <32 x i8> %v0, %v1 + %ext = sext <32 x i1> %cmp to <32 x i8> + store <32 x i8> %ext, ptr %res + ret void +} + +define void @v16i16_icmp_sle_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v16i16_icmp_sle_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslei.h $xr0, $xr0, 15 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %cmp = icmp sle <16 x i16> %v0, + %ext = sext <16 x i1> %cmp to <16 x i16> + store <16 x i16> %ext, ptr %res + ret void +} + +define void @v16i16_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i16_icmp_sle: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsle.h $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %cmp = icmp sle <16 x i16> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i16> + store <16 x i16> %ext, ptr %res + ret void +} + +define void @v8i32_icmp_sle_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v8i32_icmp_sle_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslei.w $xr0, $xr0, 15 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %cmp = icmp sle <8 x i32> %v0, + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v8i32_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i32_icmp_sle: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsle.w $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %cmp = icmp sle <8 x i32> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4i64_icmp_sle_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v4i64_icmp_sle_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslei.d $xr0, $xr0, 15 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %a0 + %cmp = icmp sle <4 x i64> %v0, + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +define void @v4i64_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i64_icmp_sle: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsle.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %cmp = icmp sle <4 x i64> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; SETULE +define void @v32i8_icmp_ule_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v32i8_icmp_ule_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslei.bu $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %cmp = icmp ule <32 x i8> %v0, + %ext = sext <32 x i1> %cmp to <32 x i8> + store <32 x i8> %ext, ptr %res + ret void +} + +define void @v32i8_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v32i8_icmp_ule: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsle.bu $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %cmp = icmp ule <32 x i8> %v0, %v1 + %ext = sext <32 x i1> %cmp to <32 x i8> + store <32 x i8> %ext, ptr %res + ret void +} + +define void @v16i16_icmp_ule_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v16i16_icmp_ule_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslei.hu $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %cmp = icmp ule <16 x i16> %v0, + %ext = sext <16 x i1> %cmp to <16 x i16> + store <16 x i16> %ext, ptr %res + ret void +} + +define void @v16i16_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i16_icmp_ule: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsle.hu $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %cmp = icmp ule <16 x i16> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i16> + store <16 x i16> %ext, ptr %res + ret void +} + +define void @v8i32_icmp_ule_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v8i32_icmp_ule_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslei.wu $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %cmp = icmp ule <8 x i32> %v0, + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v8i32_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i32_icmp_ule: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsle.wu $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %cmp = icmp ule <8 x i32> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4i64_icmp_ule_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v4i64_icmp_ule_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslei.du $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %a0 + %cmp = icmp ule <4 x i64> %v0, + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +define void @v4i64_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i64_icmp_ule: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsle.du $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %cmp = icmp ule <4 x i64> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; SETLT +define void @v32i8_icmp_slt_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v32i8_icmp_slt_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslti.b $xr0, $xr0, 15 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %cmp = icmp slt <32 x i8> %v0, + %ext = sext <32 x i1> %cmp to <32 x i8> + store <32 x i8> %ext, ptr %res + ret void +} + +define void @v32i8_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v32i8_icmp_slt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvslt.b $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %cmp = icmp slt <32 x i8> %v0, %v1 + %ext = sext <32 x i1> %cmp to <32 x i8> + store <32 x i8> %ext, ptr %res + ret void +} + +define void @v16i16_icmp_slt_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v16i16_icmp_slt_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslti.h $xr0, $xr0, 15 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %cmp = icmp slt <16 x i16> %v0, + %ext = sext <16 x i1> %cmp to <16 x i16> + store <16 x i16> %ext, ptr %res + ret void +} + +define void @v16i16_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i16_icmp_slt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvslt.h $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %cmp = icmp slt <16 x i16> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i16> + store <16 x i16> %ext, ptr %res + ret void +} + +define void @v8i32_icmp_slt_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v8i32_icmp_slt_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslti.w $xr0, $xr0, 15 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %cmp = icmp slt <8 x i32> %v0, + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v8i32_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i32_icmp_slt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvslt.w $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %cmp = icmp slt <8 x i32> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4i64_icmp_slt_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v4i64_icmp_slt_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslti.d $xr0, $xr0, 15 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %a0 + %cmp = icmp slt <4 x i64> %v0, + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +define void @v4i64_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i64_icmp_slt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvslt.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %cmp = icmp slt <4 x i64> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; SETULT +define void @v32i8_icmp_ult_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v32i8_icmp_ult_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslti.bu $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %cmp = icmp ult <32 x i8> %v0, + %ext = sext <32 x i1> %cmp to <32 x i8> + store <32 x i8> %ext, ptr %res + ret void +} + +define void @v32i8_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v32i8_icmp_ult: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvslt.bu $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %cmp = icmp ult <32 x i8> %v0, %v1 + %ext = sext <32 x i1> %cmp to <32 x i8> + store <32 x i8> %ext, ptr %res + ret void +} + +define void @v16i16_icmp_ult_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v16i16_icmp_ult_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslti.hu $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %cmp = icmp ult <16 x i16> %v0, + %ext = sext <16 x i1> %cmp to <16 x i16> + store <16 x i16> %ext, ptr %res + ret void +} + +define void @v16i16_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i16_icmp_ult: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvslt.hu $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %cmp = icmp ult <16 x i16> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i16> + store <16 x i16> %ext, ptr %res + ret void +} + +define void @v8i32_icmp_ult_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v8i32_icmp_ult_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslti.wu $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %cmp = icmp ult <8 x i32> %v0, + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v8i32_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i32_icmp_ult: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvslt.wu $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %cmp = icmp ult <8 x i32> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4i64_icmp_ult_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v4i64_icmp_ult_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslti.du $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %a0 + %cmp = icmp ult <4 x i64> %v0, + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +define void @v4i64_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i64_icmp_ult: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvslt.du $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %cmp = icmp ult <4 x i64> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; Expand SETNE +define void @v32i8_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v32i8_icmp_ne: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvseq.b $xr0, $xr1, $xr0 +; CHECK-NEXT: xvxori.b $xr0, $xr0, 255 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %cmp = icmp ne <32 x i8> %v0, %v1 + %ext = sext <32 x i1> %cmp to <32 x i8> + store <32 x i8> %ext, ptr %res + ret void +} + +define void @v16i16_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i16_icmp_ne: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvseq.h $xr0, $xr1, $xr0 +; CHECK-NEXT: xvrepli.b $xr1, -1 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %cmp = icmp ne <16 x i16> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i16> + store <16 x i16> %ext, ptr %res + ret void +} + +define void @v8i32_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i32_icmp_ne: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvseq.w $xr0, $xr1, $xr0 +; CHECK-NEXT: xvrepli.b $xr1, -1 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %cmp = icmp ne <8 x i32> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4i64_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i64_icmp_ne: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvseq.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvrepli.b $xr1, -1 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %cmp = icmp ne <4 x i64> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; Expand SETGE +define void @v32i8_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v32i8_icmp_sge: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvsle.b $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %cmp = icmp sge <32 x i8> %v0, %v1 + %ext = sext <32 x i1> %cmp to <32 x i8> + store <32 x i8> %ext, ptr %res + ret void +} + +define void @v16i16_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i16_icmp_sge: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvsle.h $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %cmp = icmp sge <16 x i16> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i16> + store <16 x i16> %ext, ptr %res + ret void +} + +define void @v8i32_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i32_icmp_sge: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvsle.w $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %cmp = icmp sge <8 x i32> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4i64_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i64_icmp_sge: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvsle.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %cmp = icmp sge <4 x i64> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; Expand SETUGE +define void @v32i8_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v32i8_icmp_uge: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvsle.bu $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %cmp = icmp uge <32 x i8> %v0, %v1 + %ext = sext <32 x i1> %cmp to <32 x i8> + store <32 x i8> %ext, ptr %res + ret void +} + +define void @v16i16_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i16_icmp_uge: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvsle.hu $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %cmp = icmp uge <16 x i16> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i16> + store <16 x i16> %ext, ptr %res + ret void +} + +define void @v8i32_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i32_icmp_uge: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvsle.wu $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %cmp = icmp uge <8 x i32> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4i64_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i64_icmp_uge: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvsle.du $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %cmp = icmp uge <4 x i64> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; Expand SETGT +define void @v32i8_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v32i8_icmp_sgt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvslt.b $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %cmp = icmp sgt <32 x i8> %v0, %v1 + %ext = sext <32 x i1> %cmp to <32 x i8> + store <32 x i8> %ext, ptr %res + ret void +} + +define void @v16i16_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i16_icmp_sgt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvslt.h $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %cmp = icmp sgt <16 x i16> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i16> + store <16 x i16> %ext, ptr %res + ret void +} + +define void @v8i32_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i32_icmp_sgt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvslt.w $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %cmp = icmp sgt <8 x i32> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4i64_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i64_icmp_sgt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvslt.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %cmp = icmp sgt <4 x i64> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; Expand SETUGT +define void @v32i8_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v32i8_icmp_ugt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvslt.bu $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %cmp = icmp ugt <32 x i8> %v0, %v1 + %ext = sext <32 x i1> %cmp to <32 x i8> + store <32 x i8> %ext, ptr %res + ret void +} + +define void @v16i16_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i16_icmp_ugt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvslt.hu $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %cmp = icmp ugt <16 x i16> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i16> + store <16 x i16> %ext, ptr %res + ret void +} + +define void @v8i32_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i32_icmp_ugt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvslt.wu $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %cmp = icmp ugt <8 x i32> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4i64_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i64_icmp_ugt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvslt.du $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %cmp = icmp ugt <4 x i64> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll new file mode 100644 index 0000000000000000000000000000000000000000..e571a5d2e4cf9767992c03f1706f995f78b1e5f7 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll @@ -0,0 +1,276 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @insert_32xi8(ptr %src, ptr %dst, i8 %in) nounwind { +; CHECK-LABEL: insert_32xi8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <32 x i8>, ptr %src + %v_new = insertelement <32 x i8> %v, i8 %in, i32 1 + store <32 x i8> %v_new, ptr %dst + ret void +} + +define void @insert_32xi8_upper(ptr %src, ptr %dst, i8 %in) nounwind { +; CHECK-LABEL: insert_32xi8_upper: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.b $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <32 x i8>, ptr %src + %v_new = insertelement <32 x i8> %v, i8 %in, i32 16 + store <32 x i8> %v_new, ptr %dst + ret void +} + +define void @insert_16xi16(ptr %src, ptr %dst, i16 %in) nounwind { +; CHECK-LABEL: insert_16xi16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <16 x i16>, ptr %src + %v_new = insertelement <16 x i16> %v, i16 %in, i32 1 + store <16 x i16> %v_new, ptr %dst + ret void +} + +define void @insert_16xi16_upper(ptr %src, ptr %dst, i16 %in) nounwind { +; CHECK-LABEL: insert_16xi16_upper: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <16 x i16>, ptr %src + %v_new = insertelement <16 x i16> %v, i16 %in, i32 8 + store <16 x i16> %v_new, ptr %dst + ret void +} + +define void @insert_8xi32(ptr %src, ptr %dst, i32 %in) nounwind { +; CHECK-LABEL: insert_8xi32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a2, 1 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <8 x i32>, ptr %src + %v_new = insertelement <8 x i32> %v, i32 %in, i32 1 + store <8 x i32> %v_new, ptr %dst + ret void +} + +define void @insert_4xi64(ptr %src, ptr %dst, i64 %in) nounwind { +; CHECK-LABEL: insert_4xi64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a2, 1 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <4 x i64>, ptr %src + %v_new = insertelement <4 x i64> %v, i64 %in, i32 1 + store <4 x i64> %v_new, ptr %dst + ret void +} + +define void @insert_8xfloat(ptr %src, ptr %dst, float %in) nounwind { +; CHECK-LABEL: insert_8xfloat: +; CHECK: # %bb.0: +; CHECK-NEXT: movfr2gr.s $a2, $fa0 +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a2, 1 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <8 x float>, ptr %src + %v_new = insertelement <8 x float> %v, float %in, i32 1 + store <8 x float> %v_new, ptr %dst + ret void +} + +define void @insert_4xdouble(ptr %src, ptr %dst, double %in) nounwind { +; CHECK-LABEL: insert_4xdouble: +; CHECK: # %bb.0: +; CHECK-NEXT: movfr2gr.d $a2, $fa0 +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a2, 1 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <4 x double>, ptr %src + %v_new = insertelement <4 x double> %v, double %in, i32 1 + store <4 x double> %v_new, ptr %dst + ret void +} + +define void @insert_32xi8_idx(ptr %src, ptr %dst, i8 %in, i32 %idx) nounwind { +; CHECK-LABEL: insert_32xi8_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: srli.d $a4, $sp, 5 +; CHECK-NEXT: slli.d $sp, $a4, 5 +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a3, 4, 0 +; CHECK-NEXT: st.b $a2, $a0, 0 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret + %v = load volatile <32 x i8>, ptr %src + %v_new = insertelement <32 x i8> %v, i8 %in, i32 %idx + store <32 x i8> %v_new, ptr %dst + ret void +} + +define void @insert_16xi16_idx(ptr %src, ptr %dst, i16 %in, i32 %idx) nounwind { +; CHECK-LABEL: insert_16xi16_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: srli.d $a4, $sp, 5 +; CHECK-NEXT: slli.d $sp, $a4, 5 +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a3, 4, 1 +; CHECK-NEXT: st.h $a2, $a0, 0 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret + %v = load volatile <16 x i16>, ptr %src + %v_new = insertelement <16 x i16> %v, i16 %in, i32 %idx + store <16 x i16> %v_new, ptr %dst + ret void +} + +define void @insert_8xi32_idx(ptr %src, ptr %dst, i32 %in, i32 %idx) nounwind { +; CHECK-LABEL: insert_8xi32_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: srli.d $a4, $sp, 5 +; CHECK-NEXT: slli.d $sp, $a4, 5 +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a3, 4, 2 +; CHECK-NEXT: st.w $a2, $a0, 0 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret + %v = load volatile <8 x i32>, ptr %src + %v_new = insertelement <8 x i32> %v, i32 %in, i32 %idx + store <8 x i32> %v_new, ptr %dst + ret void +} + +define void @insert_4xi64_idx(ptr %src, ptr %dst, i64 %in, i32 %idx) nounwind { +; CHECK-LABEL: insert_4xi64_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: srli.d $a4, $sp, 5 +; CHECK-NEXT: slli.d $sp, $a4, 5 +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a3, 4, 3 +; CHECK-NEXT: st.d $a2, $a0, 0 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret + %v = load volatile <4 x i64>, ptr %src + %v_new = insertelement <4 x i64> %v, i64 %in, i32 %idx + store <4 x i64> %v_new, ptr %dst + ret void +} + +define void @insert_8xfloat_idx(ptr %src, ptr %dst, float %in, i32 %idx) nounwind { +; CHECK-LABEL: insert_8xfloat_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: srli.d $a3, $sp, 5 +; CHECK-NEXT: slli.d $sp, $a3, 5 +; CHECK-NEXT: xvld $xr1, $a0, 0 +; CHECK-NEXT: xvst $xr1, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2 +; CHECK-NEXT: fst.s $fa0, $a0, 0 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret + %v = load volatile <8 x float>, ptr %src + %v_new = insertelement <8 x float> %v, float %in, i32 %idx + store <8 x float> %v_new, ptr %dst + ret void +} + +define void @insert_4xdouble_idx(ptr %src, ptr %dst, double %in, i32 %idx) nounwind { +; CHECK-LABEL: insert_4xdouble_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: srli.d $a3, $sp, 5 +; CHECK-NEXT: slli.d $sp, $a3, 5 +; CHECK-NEXT: xvld $xr1, $a0, 0 +; CHECK-NEXT: xvst $xr1, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3 +; CHECK-NEXT: fst.d $fa0, $a0, 0 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret + %v = load volatile <4 x double>, ptr %src + %v_new = insertelement <4 x double> %v, double %in, i32 %idx + store <4 x double> %v_new, ptr %dst + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/lshr.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/lshr.ll new file mode 100644 index 0000000000000000000000000000000000000000..24be69d8032a82ad5667fb89c92ce2349168e105 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/lshr.ll @@ -0,0 +1,178 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @lshr_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: lshr_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsrl.b $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %v2 = lshr <32 x i8> %v0, %v1 + store <32 x i8> %v2, ptr %res + ret void +} + +define void @lshr_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: lshr_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsrl.h $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %v2 = lshr <16 x i16> %v0, %v1 + store <16 x i16> %v2, ptr %res + ret void +} + +define void @lshr_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: lshr_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsrl.w $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %v2 = lshr <8 x i32> %v0, %v1 + store <8 x i32> %v2, ptr %res + ret void +} + +define void @lshr_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: lshr_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsrl.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %v2 = lshr <4 x i64> %v0, %v1 + store <4 x i64> %v2, ptr %res + ret void +} + +define void @lshr_v32i8_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: lshr_v32i8_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = lshr <32 x i8> %v0, + store <32 x i8> %v1, ptr %res + ret void +} + +define void @lshr_v32i8_7(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: lshr_v32i8_7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrli.b $xr0, $xr0, 7 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = lshr <32 x i8> %v0, + store <32 x i8> %v1, ptr %res + ret void +} + +define void @lshr_v16i16_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: lshr_v16i16_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrli.h $xr0, $xr0, 1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = lshr <16 x i16> %v0, + store <16 x i16> %v1, ptr %res + ret void +} + +define void @lshr_v16i16_15(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: lshr_v16i16_15: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrli.h $xr0, $xr0, 15 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = lshr <16 x i16> %v0, + store <16 x i16> %v1, ptr %res + ret void +} + +define void @lshr_v8i32_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: lshr_v8i32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrli.w $xr0, $xr0, 1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = lshr <8 x i32> %v0, + store <8 x i32> %v1, ptr %res + ret void +} + +define void @lshr_v8i32_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: lshr_v8i32_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrli.w $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = lshr <8 x i32> %v0, + store <8 x i32> %v1, ptr %res + ret void +} + +define void @lshr_v4i64_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: lshr_v4i64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrli.d $xr0, $xr0, 1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = lshr <4 x i64> %v0, + store <4 x i64> %v1, ptr %res + ret void +} + +define void @lshr_v4i64_63(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: lshr_v4i64_63: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrli.d $xr0, $xr0, 63 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = lshr <4 x i64> %v0, + store <4 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mul.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mul.ll new file mode 100644 index 0000000000000000000000000000000000000000..dcb893caa2555a27ee709c888b0724ba6c00fcdc --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mul.ll @@ -0,0 +1,238 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @mul_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mul_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvmul.b $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %v2 = mul <32 x i8> %v0, %v1 + store <32 x i8> %v2, ptr %res + ret void +} + +define void @mul_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mul_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvmul.h $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %v2 = mul <16 x i16> %v0, %v1 + store <16 x i16> %v2, ptr %res + ret void +} + +define void @mul_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mul_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvmul.w $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %v2 = mul <8 x i32> %v0, %v1 + store <8 x i32> %v2, ptr %res + ret void +} + +define void @mul_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mul_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvmul.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %v2 = mul <4 x i64> %v0, %v1 + store <4 x i64> %v2, ptr %res + ret void +} + +define void @mul_square_v32i8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_square_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvmul.b $xr0, $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = mul <32 x i8> %v0, %v0 + store <32 x i8> %v1, ptr %res + ret void +} + +define void @mul_square_v16i16(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_square_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvmul.h $xr0, $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = mul <16 x i16> %v0, %v0 + store <16 x i16> %v1, ptr %res + ret void +} + +define void @mul_square_v8i32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_square_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvmul.w $xr0, $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = mul <8 x i32> %v0, %v0 + store <8 x i32> %v1, ptr %res + ret void +} + +define void @mul_square_v4i64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_square_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvmul.d $xr0, $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = mul <4 x i64> %v0, %v0 + store <4 x i64> %v1, ptr %res + ret void +} + +define void @mul_v32i8_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_v32i8_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslli.b $xr0, $xr0, 3 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = mul <32 x i8> %v0, + store <32 x i8> %v1, ptr %res + ret void +} + +define void @mul_v16i16_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_v16i16_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslli.h $xr0, $xr0, 3 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = mul <16 x i16> %v0, + store <16 x i16> %v1, ptr %res + ret void +} + +define void @mul_v8i32_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_v8i32_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslli.w $xr0, $xr0, 3 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = mul <8 x i32> %v0, + store <8 x i32> %v1, ptr %res + ret void +} + +define void @mul_v4i64_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_v4i64_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslli.d $xr0, $xr0, 3 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = mul <4 x i64> %v0, + store <4 x i64> %v1, ptr %res + ret void +} + +define void @mul_v32i8_17(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_v32i8_17: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrepli.b $xr1, 17 +; CHECK-NEXT: xvmul.b $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = mul <32 x i8> %v0, + store <32 x i8> %v1, ptr %res + ret void +} + +define void @mul_v16i16_17(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_v16i16_17: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrepli.h $xr1, 17 +; CHECK-NEXT: xvmul.h $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = mul <16 x i16> %v0, + store <16 x i16> %v1, ptr %res + ret void +} + +define void @mul_v8i32_17(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_v8i32_17: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrepli.w $xr1, 17 +; CHECK-NEXT: xvmul.w $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = mul <8 x i32> %v0, + store <8 x i32> %v1, ptr %res + ret void +} + +define void @mul_v4i64_17(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_v4i64_17: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrepli.d $xr1, 17 +; CHECK-NEXT: xvmul.d $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = mul <4 x i64> %v0, + store <4 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll new file mode 100644 index 0000000000000000000000000000000000000000..f37cbf1cefedc468595801f0bc0527ba859f5202 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll @@ -0,0 +1,125 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @or_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: or_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %v2 = or <32 x i8> %v0, %v1 + store <32 x i8> %v2, ptr %res + ret void +} + +define void @or_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: or_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %v2 = or <16 x i16> %v0, %v1 + store <16 x i16> %v2, ptr %res + ret void +} + +define void @or_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: or_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %v2 = or <8 x i32> %v0, %v1 + store <8 x i32> %v2, ptr %res + ret void +} + +define void @or_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: or_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %v2 = or <4 x i64> %v0, %v1 + store <4 x i64> %v2, ptr %res + ret void +} + +define void @or_u_v32i8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: or_u_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvori.b $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = or <32 x i8> %v0, + store <32 x i8> %v1, ptr %res + ret void +} + +define void @or_u_v16i16(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: or_u_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrepli.h $xr1, 31 +; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = or <16 x i16> %v0, + store <16 x i16> %v1, ptr %res + ret void +} + +define void @or_u_v8i32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: or_u_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrepli.w $xr1, 31 +; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = or <8 x i32> %v0, + store <8 x i32> %v1, ptr %res + ret void +} + +define void @or_u_v4i64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: or_u_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrepli.d $xr1, 31 +; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = or <4 x i64> %v0, + store <4 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sdiv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sdiv.ll new file mode 100644 index 0000000000000000000000000000000000000000..e3635a5f14a2bac49bb33ec7a9b7b0a04d7e9cca --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sdiv.ll @@ -0,0 +1,134 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @sdiv_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: sdiv_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvdiv.b $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %v2 = sdiv <32 x i8> %v0, %v1 + store <32 x i8> %v2, ptr %res + ret void +} + +define void @sdiv_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: sdiv_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvdiv.h $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %v2 = sdiv <16 x i16> %v0, %v1 + store <16 x i16> %v2, ptr %res + ret void +} + +define void @sdiv_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: sdiv_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvdiv.w $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %v2 = sdiv <8 x i32> %v0, %v1 + store <8 x i32> %v2, ptr %res + ret void +} + +define void @sdiv_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: sdiv_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvdiv.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %v2 = sdiv <4 x i64> %v0, %v1 + store <4 x i64> %v2, ptr %res + ret void +} + +define void @sdiv_v32i8_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sdiv_v32i8_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrai.b $xr1, $xr0, 7 +; CHECK-NEXT: xvsrli.b $xr1, $xr1, 5 +; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1 +; CHECK-NEXT: xvsrai.b $xr0, $xr0, 3 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = sdiv <32 x i8> %v0, + store <32 x i8> %v1, ptr %res + ret void +} + +define void @sdiv_v16i16_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sdiv_v16i16_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrai.h $xr1, $xr0, 15 +; CHECK-NEXT: xvsrli.h $xr1, $xr1, 13 +; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1 +; CHECK-NEXT: xvsrai.h $xr0, $xr0, 3 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = sdiv <16 x i16> %v0, + store <16 x i16> %v1, ptr %res + ret void +} + +define void @sdiv_v8i32_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sdiv_v8i32_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrai.w $xr1, $xr0, 31 +; CHECK-NEXT: xvsrli.w $xr1, $xr1, 29 +; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1 +; CHECK-NEXT: xvsrai.w $xr0, $xr0, 3 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = sdiv <8 x i32> %v0, + store <8 x i32> %v1, ptr %res + ret void +} + +define void @sdiv_v4i64_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sdiv_v4i64_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrai.d $xr1, $xr0, 63 +; CHECK-NEXT: xvsrli.d $xr1, $xr1, 61 +; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: xvsrai.d $xr0, $xr0, 3 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = sdiv <4 x i64> %v0, + store <4 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shl.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shl.ll new file mode 100644 index 0000000000000000000000000000000000000000..8a02c7e3ac975a9a1cddba166a2b6cbb79d9ddd5 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shl.ll @@ -0,0 +1,178 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @shl_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: shl_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsll.b $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %v2 = shl <32 x i8> %v0, %v1 + store <32 x i8> %v2, ptr %res + ret void +} + +define void @shl_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: shl_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsll.h $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %v2 = shl <16 x i16> %v0, %v1 + store <16 x i16> %v2, ptr %res + ret void +} + +define void @shl_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: shl_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsll.w $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %v2 = shl <8 x i32> %v0, %v1 + store <8 x i32> %v2, ptr %res + ret void +} + +define void @shl_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: shl_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsll.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %v2 = shl <4 x i64> %v0, %v1 + store <4 x i64> %v2, ptr %res + ret void +} + +define void @shl_v32i8_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: shl_v32i8_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslli.b $xr0, $xr0, 1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = shl <32 x i8> %v0, + store <32 x i8> %v1, ptr %res + ret void +} + +define void @shl_v32i8_7(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: shl_v32i8_7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslli.b $xr0, $xr0, 7 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = shl <32 x i8> %v0, + store <32 x i8> %v1, ptr %res + ret void +} + +define void @shl_v16i16_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: shl_v16i16_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslli.h $xr0, $xr0, 1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = shl <16 x i16> %v0, + store <16 x i16> %v1, ptr %res + ret void +} + +define void @shl_v16i16_15(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: shl_v16i16_15: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslli.h $xr0, $xr0, 15 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = shl <16 x i16> %v0, + store <16 x i16> %v1, ptr %res + ret void +} + +define void @shl_v8i32_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: shl_v8i32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslli.w $xr0, $xr0, 1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = shl <8 x i32> %v0, + store <8 x i32> %v1, ptr %res + ret void +} + +define void @shl_v8i32_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: shl_v8i32_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslli.w $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = shl <8 x i32> %v0, + store <8 x i32> %v1, ptr %res + ret void +} + +define void @shl_v4i64_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: shl_v4i64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslli.d $xr0, $xr0, 1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = shl <4 x i64> %v0, + store <4 x i64> %v1, ptr %res + ret void +} + +define void @shl_v4i64_63(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: shl_v4i64_63: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslli.d $xr0, $xr0, 63 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = shl <4 x i64> %v0, + store <4 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sitofp.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sitofp.ll new file mode 100644 index 0000000000000000000000000000000000000000..208a758ea4e9a2212c8b44531a9070370d58c2a1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sitofp.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @sitofp_v8i32_v8f32(ptr %res, ptr %in){ +; CHECK-LABEL: sitofp_v8i32_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvffint.s.w $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %in + %v1 = sitofp <8 x i32> %v0 to <8 x float> + store <8 x float> %v1, ptr %res + ret void +} + +define void @sitofp_v4f64_v4f64(ptr %res, ptr %in){ +; CHECK-LABEL: sitofp_v4f64_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvffint.d.l $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %in + %v1 = sitofp <4 x i64> %v0 to <4 x double> + store <4 x double> %v1, ptr %res + ret void +} + +define void @sitofp_v4i64_v4f32(ptr %res, ptr %in){ +; CHECK-LABEL: sitofp_v4i64_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvffint.d.l $xr0, $xr0 +; CHECK-NEXT: xvpermi.d $xr1, $xr0, 238 +; CHECK-NEXT: xvfcvt.s.d $xr0, $xr1, $xr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %in + %v1 = sitofp <4 x i64> %v0 to <4 x float> + store <4 x float> %v1, ptr %res + ret void +} + +define void @sitofp_v4i32_v4f64(ptr %res, ptr %in){ +; CHECK-LABEL: sitofp_v4i32_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vext2xv.d.w $xr0, $xr0 +; CHECK-NEXT: xvffint.d.l $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %in + %v1 = sitofp <4 x i32> %v0 to <4 x double> + store <4 x double> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll new file mode 100644 index 0000000000000000000000000000000000000000..bcfff16514770f357a7d2502ffb406af8fe371df --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @sub_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: sub_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsub.b $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %v2 = sub <32 x i8> %v0, %v1 + store <32 x i8> %v2, ptr %res + ret void +} + +define void @sub_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: sub_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsub.h $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %v2 = sub <16 x i16> %v0, %v1 + store <16 x i16> %v2, ptr %res + ret void +} + +define void @sub_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: sub_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsub.w $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %v2 = sub <8 x i32> %v0, %v1 + store <8 x i32> %v2, ptr %res + ret void +} + +define void @sub_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: sub_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsub.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %v2 = sub <4 x i64> %v0, %v1 + store <4 x i64> %v2, ptr %res + ret void +} + +define void @sub_v32i8_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sub_v32i8_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsubi.bu $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = sub <32 x i8> %v0, + store <32 x i8> %v1, ptr %res + ret void +} + +define void @sub_v16i16_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sub_v16i16_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsubi.hu $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = sub <16 x i16> %v0, + store <16 x i16> %v1, ptr %res + ret void +} + +define void @sub_v8i32_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sub_v8i32_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsubi.wu $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = sub <8 x i32> %v0, + store <8 x i32> %v1, ptr %res + ret void +} + +define void @sub_v4i64_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sub_v4i64_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsubi.du $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = sub <4 x i64> %v0, + store <4 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/udiv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/udiv.ll new file mode 100644 index 0000000000000000000000000000000000000000..e78084c7186d338f4c8108bc482f974bd97345e4 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/udiv.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @udiv_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: udiv_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvdiv.bu $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %v2 = udiv <32 x i8> %v0, %v1 + store <32 x i8> %v2, ptr %res + ret void +} + +define void @udiv_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: udiv_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvdiv.hu $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %v2 = udiv <16 x i16> %v0, %v1 + store <16 x i16> %v2, ptr %res + ret void +} + +define void @udiv_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: udiv_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvdiv.wu $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %v2 = udiv <8 x i32> %v0, %v1 + store <8 x i32> %v2, ptr %res + ret void +} + +define void @udiv_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: udiv_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvdiv.du $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %v2 = udiv <4 x i64> %v0, %v1 + store <4 x i64> %v2, ptr %res + ret void +} + +define void @udiv_v32i8_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: udiv_v32i8_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrli.b $xr0, $xr0, 3 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = udiv <32 x i8> %v0, + store <32 x i8> %v1, ptr %res + ret void +} + +define void @udiv_v16i16_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: udiv_v16i16_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrli.h $xr0, $xr0, 3 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = udiv <16 x i16> %v0, + store <16 x i16> %v1, ptr %res + ret void +} + +define void @udiv_v8i32_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: udiv_v8i32_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrli.w $xr0, $xr0, 3 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = udiv <8 x i32> %v0, + store <8 x i32> %v1, ptr %res + ret void +} + +define void @udiv_v4i64_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: udiv_v4i64_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrli.d $xr0, $xr0, 3 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = udiv <4 x i64> %v0, + store <4 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/uitofp.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/uitofp.ll new file mode 100644 index 0000000000000000000000000000000000000000..70cf71c4cec218c2b8f892573431ac6add4ddf59 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/uitofp.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @uitofp_v8i32_v8f32(ptr %res, ptr %in){ +; CHECK-LABEL: uitofp_v8i32_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvffint.s.wu $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %in + %v1 = uitofp <8 x i32> %v0 to <8 x float> + store <8 x float> %v1, ptr %res + ret void +} + +define void @uitofp_v4f64_v4f64(ptr %res, ptr %in){ +; CHECK-LABEL: uitofp_v4f64_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvffint.d.lu $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %in + %v1 = uitofp <4 x i64> %v0 to <4 x double> + store <4 x double> %v1, ptr %res + ret void +} + +define void @uitofp_v4i64_v4f32(ptr %res, ptr %in){ +; CHECK-LABEL: uitofp_v4i64_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvffint.d.lu $xr0, $xr0 +; CHECK-NEXT: xvpermi.d $xr1, $xr0, 238 +; CHECK-NEXT: xvfcvt.s.d $xr0, $xr1, $xr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %in + %v1 = uitofp <4 x i64> %v0 to <4 x float> + store <4 x float> %v1, ptr %res + ret void +} + +define void @uitofp_v4i32_v4f64(ptr %res, ptr %in){ +; CHECK-LABEL: uitofp_v4i32_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vext2xv.du.wu $xr0, $xr0 +; CHECK-NEXT: xvffint.d.lu $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %in + %v1 = uitofp <4 x i32> %v0 to <4 x double> + store <4 x double> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll new file mode 100644 index 0000000000000000000000000000000000000000..c2fb1462b7a2501a4cf632bdf352cdd76a81d950 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll @@ -0,0 +1,125 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @xor_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: xor_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvxor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %v2 = xor <32 x i8> %v0, %v1 + store <32 x i8> %v2, ptr %res + ret void +} + +define void @xor_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: xor_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvxor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %v2 = xor <16 x i16> %v0, %v1 + store <16 x i16> %v2, ptr %res + ret void +} + +define void @xor_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: xor_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvxor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %v2 = xor <8 x i32> %v0, %v1 + store <8 x i32> %v2, ptr %res + ret void +} + +define void @xor_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: xor_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvxor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %v2 = xor <4 x i64> %v0, %v1 + store <4 x i64> %v2, ptr %res + ret void +} + +define void @xor_u_v32i8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: xor_u_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvxori.b $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = xor <32 x i8> %v0, + store <32 x i8> %v1, ptr %res + ret void +} + +define void @xor_u_v16i16(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: xor_u_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrepli.h $xr1, 31 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = xor <16 x i16> %v0, + store <16 x i16> %v1, ptr %res + ret void +} + +define void @xor_u_v8i32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: xor_u_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrepli.w $xr1, 31 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = xor <8 x i32> %v0, + store <8 x i32> %v1, ptr %res + ret void +} + +define void @xor_u_v4i64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: xor_u_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrepli.d $xr1, 31 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = xor <4 x i64> %v0, + store <4 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/mulh.ll b/llvm/test/CodeGen/LoongArch/lasx/mulh.ll new file mode 100644 index 0000000000000000000000000000000000000000..aac711a4a371ca9cc50c4ffc5be43ac860df0e51 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/mulh.ll @@ -0,0 +1,162 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @mulhs_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mulhs_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvmuh.b $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %v0s = sext <32 x i8> %v0 to <32 x i16> + %v1s = sext <32 x i8> %v1 to <32 x i16> + %m = mul <32 x i16> %v0s, %v1s + %s = ashr <32 x i16> %m, + %v2 = trunc <32 x i16> %s to <32 x i8> + store <32 x i8> %v2, ptr %res + ret void +} + +define void @mulhu_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mulhu_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvmuh.bu $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %v0z = zext <32 x i8> %v0 to <32 x i16> + %v1z = zext <32 x i8> %v1 to <32 x i16> + %m = mul <32 x i16> %v0z, %v1z + %s = lshr <32 x i16> %m, + %v2 = trunc <32 x i16> %s to <32 x i8> + store <32 x i8> %v2, ptr %res + ret void +} + +define void @mulhs_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mulhs_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvmuh.h $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %v0s = sext <16 x i16> %v0 to <16 x i32> + %v1s = sext <16 x i16> %v1 to <16 x i32> + %m = mul <16 x i32> %v0s, %v1s + %s = ashr <16 x i32> %m, + %v2 = trunc <16 x i32> %s to <16 x i16> + store <16 x i16> %v2, ptr %res + ret void +} + +define void @mulhu_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mulhu_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvmuh.hu $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %v0z = zext <16 x i16> %v0 to <16 x i32> + %v1z = zext <16 x i16> %v1 to <16 x i32> + %m = mul <16 x i32> %v0z, %v1z + %s = lshr <16 x i32> %m, + %v2 = trunc <16 x i32> %s to <16 x i16> + store <16 x i16> %v2, ptr %res + ret void +} + +define void @mulhs_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mulhs_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvmuh.w $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %v0s = sext <8 x i32> %v0 to <8 x i64> + %v1s = sext <8 x i32> %v1 to <8 x i64> + %m = mul <8 x i64> %v0s, %v1s + %s = ashr <8 x i64> %m, + %v2 = trunc <8 x i64> %s to <8 x i32> + store <8 x i32> %v2, ptr %res + ret void +} + +define void @mulhu_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mulhu_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvmuh.wu $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %v0z = zext <8 x i32> %v0 to <8 x i64> + %v1z = zext <8 x i32> %v1 to <8 x i64> + %m = mul <8 x i64> %v0z, %v1z + %s = lshr <8 x i64> %m, + %v2 = trunc <8 x i64> %s to <8 x i32> + store <8 x i32> %v2, ptr %res + ret void +} + +define void @mulhs_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mulhs_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvmuh.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %v0s = sext <4 x i64> %v0 to <4 x i128> + %v1s = sext <4 x i64> %v1 to <4 x i128> + %m = mul <4 x i128> %v0s, %v1s + %s = ashr <4 x i128> %m, + %v2 = trunc <4 x i128> %s to <4 x i64> + store <4 x i64> %v2, ptr %res + ret void +} + +define void @mulhu_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mulhu_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvmuh.du $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %v0z = zext <4 x i64> %v0 to <4 x i128> + %v1z = zext <4 x i64> %v1 to <4 x i128> + %m = mul <4 x i128> %v0z, %v1z + %s = lshr <4 x i128> %m, + %v2 = trunc <4 x i128> %s to <4 x i64> + store <4 x i64> %v2, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll new file mode 100644 index 0000000000000000000000000000000000000000..ec2fc28db33cc7ae13ab565fdd060a1f14ba7d58 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll @@ -0,0 +1,86 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @select_v32i8_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: select_v32i8_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrepli.h $xr1, -256 +; CHECK-NEXT: xvbitseli.b $xr1, $xr0, 1 +; CHECK-NEXT: xvst $xr1, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %sel = select <32 x i1> , <32 x i8> , <32 x i8> %v0 + store <32 x i8> %sel, ptr %res + ret void +} + +define void @select_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvrepli.h $xr2, -256 +; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %sel = select <32 x i1> , <32 x i8> %v0, <32 x i8> %v1 + store <32 x i8> %sel, ptr %res + ret void +} + +define void @select_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: lu12i.w $a1, -16 +; CHECK-NEXT: lu32i.d $a1, 0 +; CHECK-NEXT: xvreplgr2vr.w $xr2, $a1 +; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %sel = select <16 x i1> , <16 x i16> %v0, <16 x i16> %v1 + store <16 x i16> %sel, ptr %res + ret void +} + +define void @select_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: ori $a1, $zero, 0 +; CHECK-NEXT: lu32i.d $a1, -1 +; CHECK-NEXT: xvreplgr2vr.d $xr2, $a1 +; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %sel = select <8 x i1> , <8 x i32> %v0, <8 x i32> %v1 + store <8 x i32> %sel, ptr %res + ret void +} + +define void @select_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_0) +; CHECK-NEXT: addi.d $a3, $a3, %pc_lo12(.LCPI4_0) +; CHECK-NEXT: xvld $xr0, $a3, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvld $xr2, $a2, 0 +; CHECK-NEXT: xvbitsel.v $xr0, $xr2, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %sel = select <4 x i1> , <4 x i64> %v0, <4 x i64> %v1 + store <4 x i64> %sel, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll new file mode 100644 index 0000000000000000000000000000000000000000..ed1f610a5fa61515f64eafd01230ebda1a65c1c9 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll @@ -0,0 +1,398 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @buildvector_v16i8_splat(ptr %dst, i8 %a0) nounwind { +; CHECK-LABEL: buildvector_v16i8_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vreplgr2vr.b $vr0, $a1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %insert = insertelement <16 x i8> undef, i8 %a0, i8 0 + %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer + store <16 x i8> %splat, ptr %dst + ret void +} + +define void @buildvector_v8i16_splat(ptr %dst, i16 %a0) nounwind { +; CHECK-LABEL: buildvector_v8i16_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vreplgr2vr.h $vr0, $a1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %insert = insertelement <8 x i16> undef, i16 %a0, i8 0 + %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer + store <8 x i16> %splat, ptr %dst + ret void +} + +define void @buildvector_v4i32_splat(ptr %dst, i32 %a0) nounwind { +; CHECK-LABEL: buildvector_v4i32_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vreplgr2vr.w $vr0, $a1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %insert = insertelement <4 x i32> undef, i32 %a0, i8 0 + %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer + store <4 x i32> %splat, ptr %dst + ret void +} + +define void @buildvector_v2i64_splat(ptr %dst, i64 %a0) nounwind { +; CHECK-LABEL: buildvector_v2i64_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vreplgr2vr.d $vr0, $a1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %insert = insertelement <2 x i64> undef, i64 %a0, i8 0 + %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer + store <2 x i64> %splat, ptr %dst + ret void +} + +define void @buildvector_v4f32_splat(ptr %dst, float %a0) nounwind { +; CHECK-LABEL: buildvector_v4f32_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0 +; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %insert = insertelement <4 x float> undef, float %a0, i8 0 + %splat = shufflevector <4 x float> %insert, <4 x float> undef, <4 x i32> zeroinitializer + store <4 x float> %splat, ptr %dst + ret void +} + +define void @buildvector_v2f64_splat(ptr %dst, double %a0) nounwind { +; CHECK-LABEL: buildvector_v2f64_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 +; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %insert = insertelement <2 x double> undef, double %a0, i8 0 + %splat = shufflevector <2 x double> %insert, <2 x double> undef, <2 x i32> zeroinitializer + store <2 x double> %splat, ptr %dst + ret void +} + +define void @buildvector_v16i8_const_splat(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v16i8_const_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrepli.b $vr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <16 x i8> , ptr %dst + ret void +} + +define void @buildvector_v8i16_const_splat(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v8i16_const_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrepli.h $vr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <8 x i16> , ptr %dst + ret void +} + +define void @buildvector_v4i32_const_splat(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v4i32_const_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrepli.w $vr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <4 x i32> , ptr %dst + ret void +} + +define void @buildvector_v2i64_const_splat(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v2i64_const_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrepli.d $vr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <2 x i64> , ptr %dst + ret void +} + +define void @buildvector_v2f32_const_splat(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v2f32_const_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lu12i.w $a1, 260096 +; CHECK-NEXT: vreplgr2vr.w $vr0, $a1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <4 x float> , ptr %dst + ret void +} + +define void @buildvector_v2f64_const_splat(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v2f64_const_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lu52i.d $a1, $zero, 1023 +; CHECK-NEXT: vreplgr2vr.d $vr0, $a1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <2 x double> , ptr %dst + ret void +} + +define void @buildvector_v16i8_const(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v16i8_const: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI12_0) +; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI12_0) +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <16 x i8> , ptr %dst + ret void +} + +define void @buildvector_v8i16_const(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v8i16_const: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI13_0) +; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI13_0) +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <8 x i16> , ptr %dst + ret void +} + +define void @buildvector_v4i32_const(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v4i32_const: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI14_0) +; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI14_0) +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <4 x i32> , ptr %dst + ret void +} + +define void @buildvector_v2i64_const(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v2i64_const: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI15_0) +; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI15_0) +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <2 x i64> , ptr %dst + ret void +} + +define void @buildvector_v2f32_const(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v2f32_const: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI16_0) +; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI16_0) +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <4 x float> , ptr %dst + ret void +} + +define void @buildvector_v2f64_const(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v2f64_const: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI17_0) +; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI17_0) +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <2 x double> , ptr %dst + ret void +} + +define void @buildvector_v16i8(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind { +; CHECK-LABEL: buildvector_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 0 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a3, 2 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a4, 3 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a5, 4 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a6, 5 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a7, 6 +; CHECK-NEXT: ld.b $a1, $sp, 0 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 7 +; CHECK-NEXT: ld.b $a1, $sp, 8 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 8 +; CHECK-NEXT: ld.b $a1, $sp, 16 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 9 +; CHECK-NEXT: ld.b $a1, $sp, 24 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 10 +; CHECK-NEXT: ld.b $a1, $sp, 32 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 11 +; CHECK-NEXT: ld.b $a1, $sp, 40 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 12 +; CHECK-NEXT: ld.b $a1, $sp, 48 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 13 +; CHECK-NEXT: ld.b $a1, $sp, 56 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 14 +; CHECK-NEXT: ld.b $a1, $sp, 64 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 15 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <16 x i8> undef, i8 %a0, i32 0 + %ins1 = insertelement <16 x i8> %ins0, i8 %a1, i32 1 + %ins2 = insertelement <16 x i8> %ins1, i8 %a2, i32 2 + %ins3 = insertelement <16 x i8> %ins2, i8 %a3, i32 3 + %ins4 = insertelement <16 x i8> %ins3, i8 %a4, i32 4 + %ins5 = insertelement <16 x i8> %ins4, i8 %a5, i32 5 + %ins6 = insertelement <16 x i8> %ins5, i8 %a6, i32 6 + %ins7 = insertelement <16 x i8> %ins6, i8 %a7, i32 7 + %ins8 = insertelement <16 x i8> %ins7, i8 %a8, i32 8 + %ins9 = insertelement <16 x i8> %ins8, i8 %a9, i32 9 + %ins10 = insertelement <16 x i8> %ins9, i8 %a10, i32 10 + %ins11 = insertelement <16 x i8> %ins10, i8 %a11, i32 11 + %ins12 = insertelement <16 x i8> %ins11, i8 %a12, i32 12 + %ins13 = insertelement <16 x i8> %ins12, i8 %a13, i32 13 + %ins14 = insertelement <16 x i8> %ins13, i8 %a14, i32 14 + %ins15 = insertelement <16 x i8> %ins14, i8 %a15, i32 15 + store <16 x i8> %ins15, ptr %dst + ret void +} + +define void @buildvector_v8i16(ptr %dst, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind { +; CHECK-LABEL: buildvector_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a3, 2 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a4, 3 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a5, 4 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a6, 5 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a7, 6 +; CHECK-NEXT: ld.h $a1, $sp, 0 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 7 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <8 x i16> undef, i16 %a0, i32 0 + %ins1 = insertelement <8 x i16> %ins0, i16 %a1, i32 1 + %ins2 = insertelement <8 x i16> %ins1, i16 %a2, i32 2 + %ins3 = insertelement <8 x i16> %ins2, i16 %a3, i32 3 + %ins4 = insertelement <8 x i16> %ins3, i16 %a4, i32 4 + %ins5 = insertelement <8 x i16> %ins4, i16 %a5, i32 5 + %ins6 = insertelement <8 x i16> %ins5, i16 %a6, i32 6 + %ins7 = insertelement <8 x i16> %ins6, i16 %a7, i32 7 + store <8 x i16> %ins7, ptr %dst + ret void +} + +define void @buildvector_v4i32(ptr %dst, i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind { +; CHECK-LABEL: buildvector_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 0 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a2, 1 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a3, 2 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a4, 3 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <4 x i32> undef, i32 %a0, i32 0 + %ins1 = insertelement <4 x i32> %ins0, i32 %a1, i32 1 + %ins2 = insertelement <4 x i32> %ins1, i32 %a2, i32 2 + %ins3 = insertelement <4 x i32> %ins2, i32 %a3, i32 3 + store <4 x i32> %ins3, ptr %dst + ret void +} + +define void @buildvector_v2i64(ptr %dst, i64 %a0, i64 %a1) nounwind { +; CHECK-LABEL: buildvector_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 0 +; CHECK-NEXT: vinsgr2vr.d $vr0, $a2, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <2 x i64> undef, i64 %a0, i32 0 + %ins1 = insertelement <2 x i64> %ins0, i64 %a1, i32 1 + store <2 x i64> %ins1, ptr %dst + ret void +} + +define void @buildvector_v4f32(ptr %dst, float %a0, float %a1, float %a2, float %a3) nounwind { +; CHECK-LABEL: buildvector_v4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movfr2gr.s $a1, $fa0 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 0 +; CHECK-NEXT: movfr2gr.s $a1, $fa1 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 1 +; CHECK-NEXT: movfr2gr.s $a1, $fa2 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 2 +; CHECK-NEXT: movfr2gr.s $a1, $fa3 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 3 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <4 x float> undef, float %a0, i32 0 + %ins1 = insertelement <4 x float> %ins0, float %a1, i32 1 + %ins2 = insertelement <4 x float> %ins1, float %a2, i32 2 + %ins3 = insertelement <4 x float> %ins2, float %a3, i32 3 + store <4 x float> %ins3, ptr %dst + ret void +} + +define void @buildvector_v2f64(ptr %dst, double %a0, double %a1) nounwind { +; CHECK-LABEL: buildvector_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movfr2gr.d $a1, $fa0 +; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 0 +; CHECK-NEXT: movfr2gr.d $a1, $fa1 +; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <2 x double> undef, double %a0, i32 0 + %ins1 = insertelement <2 x double> %ins0, double %a1, i32 1 + store <2 x double> %ins1, ptr %dst + ret void +} + +;; BUILD_VECTOR through stack. +;; If `isShuffleMaskLegal` returns true, it will lead to an infinite loop. +define void @extract1_i32_zext_insert0_i64_undef(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: extract1_i32_zext_insert0_i64_undef: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1 +; CHECK-NEXT: bstrpick.d $a0, $a0, 31, 0 +; CHECK-NEXT: st.d $a0, $sp, 0 +; CHECK-NEXT: vld $vr0, $sp, 0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret + %v = load volatile <4 x i32>, ptr %src + %e = extractelement <4 x i32> %v, i32 1 + %z = zext i32 %e to i64 + %r = insertelement <2 x i64> undef, i64 %z, i32 0 + store <2 x i64> %r, ptr %dst + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll b/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll new file mode 100644 index 0000000000000000000000000000000000000000..5df553fba7ef773f9eac09cd2e4d5e686c719384 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll @@ -0,0 +1,115 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @ctpop_v16i8(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctpop_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vpcnt.b $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <16 x i8>, ptr %src + %res = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %v) + store <16 x i8> %res, ptr %dst + ret void +} + +define void @ctpop_v8i16(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctpop_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vpcnt.h $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <8 x i16>, ptr %src + %res = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %v) + store <8 x i16> %res, ptr %dst + ret void +} + +define void @ctpop_v4i32(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctpop_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vpcnt.w $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <4 x i32>, ptr %src + %res = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %v) + store <4 x i32> %res, ptr %dst + ret void +} + +define void @ctpop_v2i64(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctpop_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vpcnt.d $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <2 x i64>, ptr %src + %res = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %v) + store <2 x i64> %res, ptr %dst + ret void +} + +define void @ctlz_v16i8(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctlz_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vclz.b $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <16 x i8>, ptr %src + %res = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %v, i1 false) + store <16 x i8> %res, ptr %dst + ret void +} + +define void @ctlz_v8i16(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctlz_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vclz.h $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <8 x i16>, ptr %src + %res = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %v, i1 false) + store <8 x i16> %res, ptr %dst + ret void +} + +define void @ctlz_v4i32(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctlz_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vclz.w $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <4 x i32>, ptr %src + %res = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %v, i1 false) + store <4 x i32> %res, ptr %dst + ret void +} + +define void @ctlz_v2i64(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctlz_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vclz.d $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <2 x i64>, ptr %src + %res = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %v, i1 false) + store <2 x i64> %res, ptr %dst + ret void +} + +declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) +declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>) +declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) +declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) +declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) +declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) +declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) +declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) diff --git a/llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll b/llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll new file mode 100644 index 0000000000000000000000000000000000000000..8e0459b4afabef3499b852d623365360231ebbb8 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll @@ -0,0 +1,804 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=fast < %s \ +; RUN: | FileCheck %s --check-prefix=CONTRACT-FAST +; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=on < %s \ +; RUN: | FileCheck %s --check-prefix=CONTRACT-ON +; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=off < %s \ +; RUN: | FileCheck %s --check-prefix=CONTRACT-OFF + +define void @vfmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfmadd_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfmadd_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 +; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 +; CONTRACT-ON-NEXT: vfadd.d $vr0, $vr0, $vr1 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfmadd_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 +; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 +; CONTRACT-OFF-NEXT: vfadd.d $vr0, $vr0, $vr1 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %mul = fmul<2 x double> %v0, %v1 + %add = fadd<2 x double> %mul, %v2 + store <2 x double> %add, ptr %res + ret void +} + +define void @vfmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfmsub_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfmsub_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 +; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 +; CONTRACT-ON-NEXT: vfsub.d $vr0, $vr0, $vr1 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfmsub_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 +; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 +; CONTRACT-OFF-NEXT: vfsub.d $vr0, $vr0, $vr1 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %mul = fmul<2 x double> %v0, %v1 + %sub = fsub<2 x double> %mul, %v2 + store <2 x double> %sub, ptr %res + ret void +} + +define void @vfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfnmadd_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfnmadd_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 +; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 +; CONTRACT-ON-NEXT: vfadd.d $vr0, $vr0, $vr1 +; CONTRACT-ON-NEXT: vbitrevi.d $vr0, $vr0, 63 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfnmadd_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 +; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 +; CONTRACT-OFF-NEXT: vfadd.d $vr0, $vr0, $vr1 +; CONTRACT-OFF-NEXT: vbitrevi.d $vr0, $vr0, 63 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %mul = fmul<2 x double> %v0, %v1 + %add = fadd<2 x double> %mul, %v2 + %negadd = fneg<2 x double> %add + store <2 x double> %negadd, ptr %res + ret void +} + +define void @vfnmadd_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfnmadd_d_nsz: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfnmadd_d_nsz: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 +; CONTRACT-ON-NEXT: vbitrevi.d $vr1, $vr1, 63 +; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 +; CONTRACT-ON-NEXT: vfsub.d $vr0, $vr0, $vr1 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfnmadd_d_nsz: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 +; CONTRACT-OFF-NEXT: vbitrevi.d $vr1, $vr1, 63 +; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 +; CONTRACT-OFF-NEXT: vfsub.d $vr0, $vr0, $vr1 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %negv0 = fneg nsz<2 x double> %v0 + %negv2 = fneg nsz<2 x double> %v2 + %mul = fmul nsz<2 x double> %negv0, %v1 + %add = fadd nsz<2 x double> %mul, %negv2 + store <2 x double> %add, ptr %res + ret void +} + +;; Check that vfnmadd.d is not emitted. +define void @not_vfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: not_vfnmadd_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vbitrevi.d $vr2, $vr2, 63 +; CONTRACT-FAST-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: not_vfnmadd_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 +; CONTRACT-ON-NEXT: vbitrevi.d $vr1, $vr1, 63 +; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 +; CONTRACT-ON-NEXT: vfsub.d $vr0, $vr0, $vr1 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: not_vfnmadd_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 +; CONTRACT-OFF-NEXT: vbitrevi.d $vr1, $vr1, 63 +; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 +; CONTRACT-OFF-NEXT: vfsub.d $vr0, $vr0, $vr1 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %negv0 = fneg<2 x double> %v0 + %negv2 = fneg<2 x double> %v2 + %mul = fmul<2 x double> %negv0, %v1 + %add = fadd<2 x double> %mul, %negv2 + store <2 x double> %add, ptr %res + ret void +} + +define void @vfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfnmsub_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfnmsub_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 +; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 +; CONTRACT-ON-NEXT: vfsub.d $vr0, $vr0, $vr1 +; CONTRACT-ON-NEXT: vbitrevi.d $vr0, $vr0, 63 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfnmsub_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 +; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 +; CONTRACT-OFF-NEXT: vfsub.d $vr0, $vr0, $vr1 +; CONTRACT-OFF-NEXT: vbitrevi.d $vr0, $vr0, 63 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %negv2 = fneg<2 x double> %v2 + %mul = fmul<2 x double> %v0, %v1 + %add = fadd<2 x double> %mul, %negv2 + %neg = fneg<2 x double> %add + store <2 x double> %neg, ptr %res + ret void +} + +define void @vfnmsub_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfnmsub_d_nsz: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfnmsub_d_nsz: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 +; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 +; CONTRACT-ON-NEXT: vfsub.d $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfnmsub_d_nsz: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 +; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 +; CONTRACT-OFF-NEXT: vfsub.d $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %negv0 = fneg nsz<2 x double> %v0 + %mul = fmul nsz<2 x double> %negv0, %v1 + %add = fadd nsz<2 x double> %mul, %v2 + store <2 x double> %add, ptr %res + ret void +} + +;; Check that vfnmsub.d is not emitted. +define void @not_vfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: not_vfnmsub_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vbitrevi.d $vr2, $vr2, 63 +; CONTRACT-FAST-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: not_vfnmsub_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 +; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 +; CONTRACT-ON-NEXT: vfsub.d $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: not_vfnmsub_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 +; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 +; CONTRACT-OFF-NEXT: vfsub.d $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %negv0 = fneg<2 x double> %v0 + %mul = fmul<2 x double> %negv0, %v1 + %add = fadd<2 x double> %mul, %v2 + store <2 x double> %add, ptr %res + ret void +} + +define void @contract_vfmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_vfmadd_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_vfmadd_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_vfmadd_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %mul = fmul contract <2 x double> %v0, %v1 + %add = fadd contract <2 x double> %mul, %v2 + store <2 x double> %add, ptr %res + ret void +} + +define void @contract_vfmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_vfmsub_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_vfmsub_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_vfmsub_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %mul = fmul contract <2 x double> %v0, %v1 + %sub = fsub contract <2 x double> %mul, %v2 + store <2 x double> %sub, ptr %res + ret void +} + +define void @contract_vfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_vfnmadd_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_vfnmadd_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_vfnmadd_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %mul = fmul contract <2 x double> %v0, %v1 + %add = fadd contract <2 x double> %mul, %v2 + %negadd = fneg contract <2 x double> %add + store <2 x double> %negadd, ptr %res + ret void +} + +define void @contract_vfnmadd_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_vfnmadd_d_nsz: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_vfnmadd_d_nsz: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_vfnmadd_d_nsz: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %negv0 = fneg contract nsz<2 x double> %v0 + %negv2 = fneg contract nsz<2 x double> %v2 + %mul = fmul contract nsz<2 x double> %negv0, %v1 + %add = fadd contract nsz<2 x double> %mul, %negv2 + store <2 x double> %add, ptr %res + ret void +} + +;; Check that vfnmadd.d is not emitted. +define void @not_contract_vfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: not_contract_vfnmadd_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vbitrevi.d $vr2, $vr2, 63 +; CONTRACT-FAST-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: not_contract_vfnmadd_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vbitrevi.d $vr2, $vr2, 63 +; CONTRACT-ON-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: not_contract_vfnmadd_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vbitrevi.d $vr2, $vr2, 63 +; CONTRACT-OFF-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %negv0 = fneg contract <2 x double> %v0 + %negv2 = fneg contract <2 x double> %v2 + %mul = fmul contract <2 x double> %negv0, %v1 + %add = fadd contract <2 x double> %mul, %negv2 + store <2 x double> %add, ptr %res + ret void +} + +define void @contract_vfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_vfnmsub_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_vfnmsub_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_vfnmsub_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %negv2 = fneg contract <2 x double> %v2 + %mul = fmul contract <2 x double> %v0, %v1 + %add = fadd contract <2 x double> %mul, %negv2 + %neg = fneg contract <2 x double> %add + store <2 x double> %neg, ptr %res + ret void +} + +define void @contract_vfnmsub_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_vfnmsub_d_nsz: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_vfnmsub_d_nsz: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_vfnmsub_d_nsz: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %negv0 = fneg contract nsz<2 x double> %v0 + %mul = fmul contract nsz<2 x double> %negv0, %v1 + %add = fadd contract nsz<2 x double> %mul, %v2 + store <2 x double> %add, ptr %res + ret void +} + +;; Check that vfnmsub.d is not emitted. +define void @not_contract_vfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: not_contract_vfnmsub_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vbitrevi.d $vr2, $vr2, 63 +; CONTRACT-FAST-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: not_contract_vfnmsub_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vbitrevi.d $vr2, $vr2, 63 +; CONTRACT-ON-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: not_contract_vfnmsub_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vbitrevi.d $vr2, $vr2, 63 +; CONTRACT-OFF-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %negv0 = fneg contract <2 x double> %v0 + %mul = fmul contract <2 x double> %negv0, %v1 + %add = fadd contract <2 x double> %mul, %v2 + store <2 x double> %add, ptr %res + ret void +} + +define void @vfmadd_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfmadd_d_contract: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfmadd_d_contract: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfmadd_d_contract: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %mul = fmul contract <2 x double> %v0, %v1 + %add = fadd contract <2 x double> %mul, %v2 + store <2 x double> %add, ptr %res + ret void +} + +define void @vfmsub_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfmsub_d_contract: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfmsub_d_contract: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfmsub_d_contract: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %mul = fmul contract <2 x double> %v0, %v1 + %sub = fsub contract <2 x double> %mul, %v2 + store <2 x double> %sub, ptr %res + ret void +} + +define void @vfnmadd_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfnmadd_d_contract: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfnmadd_d_contract: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfnmadd_d_contract: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %mul = fmul contract <2 x double> %v0, %v1 + %add = fadd contract <2 x double> %mul, %v2 + %negadd = fneg contract <2 x double> %add + store <2 x double> %negadd, ptr %res + ret void +} + +define void @vfnmsub_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfnmsub_d_contract: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfnmsub_d_contract: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfnmsub_d_contract: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %mul = fmul contract <2 x double> %v0, %v1 + %negv2 = fneg contract <2 x double> %v2 + %add = fadd contract <2 x double> %negv2, %mul + %negadd = fneg contract <2 x double> %add + store <2 x double> %negadd, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll b/llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll new file mode 100644 index 0000000000000000000000000000000000000000..7efbd61c0c4f7b5fac52ad297de76030b1835e6f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll @@ -0,0 +1,804 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=fast < %s \ +; RUN: | FileCheck %s --check-prefix=CONTRACT-FAST +; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=on < %s \ +; RUN: | FileCheck %s --check-prefix=CONTRACT-ON +; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=off < %s \ +; RUN: | FileCheck %s --check-prefix=CONTRACT-OFF + +define void @vfmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfmadd_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfmadd_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 +; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 +; CONTRACT-ON-NEXT: vfadd.s $vr0, $vr0, $vr1 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfmadd_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 +; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 +; CONTRACT-OFF-NEXT: vfadd.s $vr0, $vr0, $vr1 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %mul = fmul<4 x float> %v0, %v1 + %add = fadd<4 x float> %mul, %v2 + store <4 x float> %add, ptr %res + ret void +} + +define void @vfmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfmsub_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfmsub_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 +; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 +; CONTRACT-ON-NEXT: vfsub.s $vr0, $vr0, $vr1 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfmsub_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 +; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 +; CONTRACT-OFF-NEXT: vfsub.s $vr0, $vr0, $vr1 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %mul = fmul<4 x float> %v0, %v1 + %sub = fsub<4 x float> %mul, %v2 + store <4 x float> %sub, ptr %res + ret void +} + +define void @vfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfnmadd_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfnmadd_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 +; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 +; CONTRACT-ON-NEXT: vfadd.s $vr0, $vr0, $vr1 +; CONTRACT-ON-NEXT: vbitrevi.w $vr0, $vr0, 31 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfnmadd_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 +; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 +; CONTRACT-OFF-NEXT: vfadd.s $vr0, $vr0, $vr1 +; CONTRACT-OFF-NEXT: vbitrevi.w $vr0, $vr0, 31 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %mul = fmul<4 x float> %v0, %v1 + %add = fadd<4 x float> %mul, %v2 + %negadd = fneg<4 x float> %add + store <4 x float> %negadd, ptr %res + ret void +} + +define void @vfnmadd_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfnmadd_s_nsz: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfnmadd_s_nsz: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 +; CONTRACT-ON-NEXT: vbitrevi.w $vr1, $vr1, 31 +; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 +; CONTRACT-ON-NEXT: vfsub.s $vr0, $vr0, $vr1 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfnmadd_s_nsz: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 +; CONTRACT-OFF-NEXT: vbitrevi.w $vr1, $vr1, 31 +; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 +; CONTRACT-OFF-NEXT: vfsub.s $vr0, $vr0, $vr1 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %negv0 = fneg nsz<4 x float> %v0 + %negv2 = fneg nsz<4 x float> %v2 + %mul = fmul nsz<4 x float> %negv0, %v1 + %add = fadd nsz<4 x float> %mul, %negv2 + store <4 x float> %add, ptr %res + ret void +} + +;; Check that vfnmadd.s is not emitted. +define void @not_vfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: not_vfnmadd_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vbitrevi.w $vr2, $vr2, 31 +; CONTRACT-FAST-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: not_vfnmadd_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 +; CONTRACT-ON-NEXT: vbitrevi.w $vr1, $vr1, 31 +; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 +; CONTRACT-ON-NEXT: vfsub.s $vr0, $vr0, $vr1 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: not_vfnmadd_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 +; CONTRACT-OFF-NEXT: vbitrevi.w $vr1, $vr1, 31 +; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 +; CONTRACT-OFF-NEXT: vfsub.s $vr0, $vr0, $vr1 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %negv0 = fneg<4 x float> %v0 + %negv2 = fneg<4 x float> %v2 + %mul = fmul<4 x float> %negv0, %v1 + %add = fadd<4 x float> %mul, %negv2 + store <4 x float> %add, ptr %res + ret void +} + +define void @vfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfnmsub_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfnmsub_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 +; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 +; CONTRACT-ON-NEXT: vfsub.s $vr0, $vr0, $vr1 +; CONTRACT-ON-NEXT: vbitrevi.w $vr0, $vr0, 31 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfnmsub_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 +; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 +; CONTRACT-OFF-NEXT: vfsub.s $vr0, $vr0, $vr1 +; CONTRACT-OFF-NEXT: vbitrevi.w $vr0, $vr0, 31 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %negv2 = fneg<4 x float> %v2 + %mul = fmul<4 x float> %v0, %v1 + %add = fadd<4 x float> %mul, %negv2 + %neg = fneg<4 x float> %add + store <4 x float> %neg, ptr %res + ret void +} + +define void @vfnmsub_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfnmsub_s_nsz: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfnmsub_s_nsz: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 +; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 +; CONTRACT-ON-NEXT: vfsub.s $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfnmsub_s_nsz: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 +; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 +; CONTRACT-OFF-NEXT: vfsub.s $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %negv0 = fneg nsz<4 x float> %v0 + %mul = fmul nsz<4 x float> %negv0, %v1 + %add = fadd nsz<4 x float> %mul, %v2 + store <4 x float> %add, ptr %res + ret void +} + +;; Check that vfnmsub.s is not emitted. +define void @not_vfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: not_vfnmsub_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vbitrevi.w $vr2, $vr2, 31 +; CONTRACT-FAST-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: not_vfnmsub_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 +; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 +; CONTRACT-ON-NEXT: vfsub.s $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: not_vfnmsub_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 +; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 +; CONTRACT-OFF-NEXT: vfsub.s $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %negv0 = fneg<4 x float> %v0 + %mul = fmul<4 x float> %negv0, %v1 + %add = fadd<4 x float> %mul, %v2 + store <4 x float> %add, ptr %res + ret void +} + +define void @contract_vfmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_vfmadd_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_vfmadd_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_vfmadd_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %mul = fmul contract <4 x float> %v0, %v1 + %add = fadd contract <4 x float> %mul, %v2 + store <4 x float> %add, ptr %res + ret void +} + +define void @contract_vfmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_vfmsub_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_vfmsub_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_vfmsub_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %mul = fmul contract <4 x float> %v0, %v1 + %sub = fsub contract <4 x float> %mul, %v2 + store <4 x float> %sub, ptr %res + ret void +} + +define void @contract_vfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_vfnmadd_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_vfnmadd_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_vfnmadd_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %mul = fmul contract <4 x float> %v0, %v1 + %add = fadd contract <4 x float> %mul, %v2 + %negadd = fneg contract <4 x float> %add + store <4 x float> %negadd, ptr %res + ret void +} + +define void @contract_vfnmadd_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_vfnmadd_s_nsz: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_vfnmadd_s_nsz: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_vfnmadd_s_nsz: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %negv0 = fneg contract nsz<4 x float> %v0 + %negv2 = fneg contract nsz<4 x float> %v2 + %mul = fmul contract nsz<4 x float> %negv0, %v1 + %add = fadd contract nsz<4 x float> %mul, %negv2 + store <4 x float> %add, ptr %res + ret void +} + +;; Check that vfnmadd.s is not emitted. +define void @not_contract_vfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: not_contract_vfnmadd_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vbitrevi.w $vr2, $vr2, 31 +; CONTRACT-FAST-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: not_contract_vfnmadd_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vbitrevi.w $vr2, $vr2, 31 +; CONTRACT-ON-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: not_contract_vfnmadd_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vbitrevi.w $vr2, $vr2, 31 +; CONTRACT-OFF-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %negv0 = fneg contract <4 x float> %v0 + %negv2 = fneg contract <4 x float> %v2 + %mul = fmul contract <4 x float> %negv0, %v1 + %add = fadd contract <4 x float> %mul, %negv2 + store <4 x float> %add, ptr %res + ret void +} + +define void @contract_vfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_vfnmsub_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_vfnmsub_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_vfnmsub_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %negv2 = fneg contract <4 x float> %v2 + %mul = fmul contract <4 x float> %v0, %v1 + %add = fadd contract <4 x float> %mul, %negv2 + %neg = fneg contract <4 x float> %add + store <4 x float> %neg, ptr %res + ret void +} + +define void @contract_vfnmsub_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_vfnmsub_s_nsz: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_vfnmsub_s_nsz: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_vfnmsub_s_nsz: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %negv0 = fneg contract nsz<4 x float> %v0 + %mul = fmul contract nsz<4 x float> %negv0, %v1 + %add = fadd contract nsz<4 x float> %mul, %v2 + store <4 x float> %add, ptr %res + ret void +} + +;; Check that vfnmsub.s is not emitted. +define void @not_contract_vfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: not_contract_vfnmsub_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vbitrevi.w $vr2, $vr2, 31 +; CONTRACT-FAST-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: not_contract_vfnmsub_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vbitrevi.w $vr2, $vr2, 31 +; CONTRACT-ON-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: not_contract_vfnmsub_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vbitrevi.w $vr2, $vr2, 31 +; CONTRACT-OFF-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %negv0 = fneg contract <4 x float> %v0 + %mul = fmul contract <4 x float> %negv0, %v1 + %add = fadd contract <4 x float> %mul, %v2 + store <4 x float> %add, ptr %res + ret void +} + +define void @vfmadd_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfmadd_s_contract: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfmadd_s_contract: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfmadd_s_contract: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %mul = fmul contract <4 x float> %v0, %v1 + %add = fadd contract <4 x float> %mul, %v2 + store <4 x float> %add, ptr %res + ret void +} + +define void @vfmsub_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfmsub_s_contract: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfmsub_s_contract: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfmsub_s_contract: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %mul = fmul contract <4 x float> %v0, %v1 + %sub = fsub contract <4 x float> %mul, %v2 + store <4 x float> %sub, ptr %res + ret void +} + +define void @vfnmadd_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfnmadd_s_contract: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfnmadd_s_contract: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfnmadd_s_contract: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %mul = fmul contract <4 x float> %v0, %v1 + %add = fadd contract <4 x float> %mul, %v2 + %negadd = fneg contract <4 x float> %add + store <4 x float> %negadd, ptr %res + ret void +} + +define void @vfnmsub_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfnmsub_s_contract: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfnmsub_s_contract: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfnmsub_s_contract: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %mul = fmul contract <4 x float> %v0, %v1 + %negv2 = fneg contract <4 x float> %v2 + %add = fadd contract <4 x float> %negv2, %mul + %negadd = fneg contract <4 x float> %add + store <4 x float> %negadd, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll b/llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll new file mode 100644 index 0000000000000000000000000000000000000000..a57bc1ca0e94885dd0a1a7e94a7748f5b38ee796 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +;; fsqrt +define void @sqrt_v4f32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sqrt_v4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vfsqrt.s $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0, align 16 + %sqrt = call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %v0) + store <4 x float> %sqrt, ptr %res, align 16 + ret void +} + +define void @sqrt_v2f64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sqrt_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vfsqrt.d $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0, align 16 + %sqrt = call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %v0) + store <2 x double> %sqrt, ptr %res, align 16 + ret void +} + +;; 1.0 / (fsqrt vec) +define void @one_div_sqrt_v4f32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: one_div_sqrt_v4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vfrsqrt.s $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0, align 16 + %sqrt = call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %v0) + %div = fdiv <4 x float> , %sqrt + store <4 x float> %div, ptr %res, align 16 + ret void +} + +define void @one_div_sqrt_v2f64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: one_div_sqrt_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vfrsqrt.d $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0, align 16 + %sqrt = call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %v0) + %div = fdiv <2 x double> , %sqrt + store <2 x double> %div, ptr %res, align 16 + ret void +} + +declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) +declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-absd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-absd.ll new file mode 100644 index 0000000000000000000000000000000000000000..811d9d712de4e671544722d363bedc792d051969 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-absd.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vabsd_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vabsd_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vabsd.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vabsd_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vabsd_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vabsd.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vabsd_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vabsd_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vabsd.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vabsd_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vabsd_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vabsd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vabsd_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vabsd_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vabsd.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vabsd_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vabsd_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vabsd.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vabsd_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vabsd_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vabsd.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vabsd_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vabsd_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vabsd.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-add.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-add.ll new file mode 100644 index 0000000000000000000000000000000000000000..fac16c8308dafbb79a93d3c9ddfbe1e99994091c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-add.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vadd_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vadd_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vadd_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vadd_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vadd_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vadd_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vadd_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vadd_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vadd_q(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vadd_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vadd.q $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-adda.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-adda.ll new file mode 100644 index 0000000000000000000000000000000000000000..79be0a184bfb18a5b22a40ad7db8f7f4390d9a3c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-adda.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vadda_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vadda_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vadda.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vadda_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vadda_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vadda.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vadda_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vadda_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vadda.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vadda_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vadda_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vadda.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..6875872b6f83b685b01df71aa218c70ba66afb21 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vaddi_bu_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vaddi.bu: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vaddi_bu_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vaddi.bu: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> %va, i32 32) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vaddi_hu_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vaddi.hu: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vaddi_hu_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vaddi.hu: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> %va, i32 32) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vaddi_wu_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vaddi.wu: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vaddi_wu_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vaddi.wu: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> %va, i32 32) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vaddi_du_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vaddi.du: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vaddi_du_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vaddi.du: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> %va, i32 32) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..87d32b3ce02a8ffeddb562c6fea4e33d5d435d48 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vaddi_bu(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vaddi_hu(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vaddi_wu(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vaddi_du(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi.ll new file mode 100644 index 0000000000000000000000000000000000000000..b9134e0724fe4c4079d037e60c7cf5ae2dc04548 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vaddi_bu(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vaddi_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddi.bu $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> %va, i32 31) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vaddi_hu(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vaddi_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddi.hu $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> %va, i32 31) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vaddi_wu(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vaddi_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddi.wu $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> %va, i32 31) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vaddi_du(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vaddi_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddi.du $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> %va, i32 31) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addw.ll new file mode 100644 index 0000000000000000000000000000000000000000..086e3bec12d2365b44bfb3cae2b45fa7933db946 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addw.ll @@ -0,0 +1,290 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vaddwev_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwev_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwev.h.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vaddwev_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwev_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwev.w.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vaddwev_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwev_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwev.d.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vaddwev_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwev_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwev.q.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vaddwev_h_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwev_h_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwev.h.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vaddwev_w_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwev_w_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwev.w.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vaddwev_d_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwev_d_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwev.d.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vaddwev_q_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwev_q_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwev.q.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vaddwev_h_bu_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwev_h_bu_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwev.h.bu.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vaddwev_w_hu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwev_w_hu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwev.w.hu.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vaddwev_d_wu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwev_d_wu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwev.d.wu.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vaddwev_q_du_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwev_q_du_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwev.q.du.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vaddwod_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwod_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwod.h.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vaddwod_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwod_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwod.w.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vaddwod_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwod_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwod.d.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vaddwod_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwod_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwod.q.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vaddwod_h_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwod_h_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwod.h.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vaddwod_w_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwod_w_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwod.w.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vaddwod_d_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwod_d_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwod.d.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vaddwod_q_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwod_q_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwod.q.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vaddwod_h_bu_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwod_h_bu_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwod.h.bu.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vaddwod_w_hu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwod_w_hu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwod.w.hu.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vaddwod_d_wu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwod_d_wu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwod.d.wu.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vaddwod_q_du_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwod_q_du_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwod.q.du.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-and.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-and.ll new file mode 100644 index 0000000000000000000000000000000000000000..77496239c3a9f75eba70acbd910999ff1fba888f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-and.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vand_v(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vand_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..82a117b2aba577a11d8653b87807c0428806024d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vandi_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vandi.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vandi_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vandi.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> %va, i32 256) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..c0c35c775266d6270ea1e68d4c222d51e93f76fb --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vandi_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi.ll new file mode 100644 index 0000000000000000000000000000000000000000..9a1c38a641d056a48f373aaba5431fd8f3608629 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vandi_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vandi_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vandi.b $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> %va, i32 1) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andn.ll new file mode 100644 index 0000000000000000000000000000000000000000..b08c759ecc322bd5139a736d8aefe856a43629ed --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andn.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vandn_v(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vandn_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avg.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avg.ll new file mode 100644 index 0000000000000000000000000000000000000000..fb0861f4cd5eec7b76dea2bfaa3e0b9aec09e1b9 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avg.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vavg_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vavg_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vavg.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vavg_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vavg_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vavg.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vavg_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vavg_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vavg.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vavg_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vavg_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vavg.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vavg_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vavg_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vavg.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vavg_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vavg_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vavg.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vavg_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vavg_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vavg.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vavg_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vavg_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vavg.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avgr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avgr.ll new file mode 100644 index 0000000000000000000000000000000000000000..8bf7d0ed8817325232cd0def475d996a77ad64ad --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avgr.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vavgr_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vavgr_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vavgr.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vavgr_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vavgr_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vavgr.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vavgr_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vavgr_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vavgr.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vavgr_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vavgr_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vavgr.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vavgr_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vavgr_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vavgr.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vavgr_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vavgr_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vavgr.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vavgr_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vavgr_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vavgr.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vavgr_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vavgr_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vavgr.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..b020806cd86cbdb309dd76e05233be7d1b89b6dc --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vbitclri_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitclri.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vbitclri_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitclri.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> %va, i32 8) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vbitclri_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitclri.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vbitclri_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitclri.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> %va, i32 16) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vbitclri_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitclri.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vbitclri_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitclri.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> %va, i32 32) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vbitclri_d_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitclri.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vbitclri_d_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitclri.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> %va, i32 64) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..df6cdb99cdbcb94d48747026694c152e683fc390 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vbitclri_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vbitclri_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vbitclri_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vbitclri_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr.ll new file mode 100644 index 0000000000000000000000000000000000000000..f5fba6dbb1414376ff22db9127ac71b111082b25 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vbitclr_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vbitclr_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitclr.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vbitclr_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vbitclr_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitclr.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vbitclr_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vbitclr_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitclr.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vbitclr_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vbitclr_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitclr.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vbitclri_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vbitclri_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitclri.b $vr0, $vr0, 7 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> %va, i32 7) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vbitclri_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vbitclri_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitclri.h $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> %va, i32 15) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vbitclri_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vbitclri_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitclri.w $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> %va, i32 31) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vbitclri_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vbitclri_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitclri.d $vr0, $vr0, 63 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> %va, i32 63) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..24b6ec3284cb8d9a07013a6dd43f9eaab57ca540 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vbitrevi_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitrevi.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vbitrevi_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitrevi.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> %va, i32 8) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vbitrevi_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitrevi.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vbitrevi_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitrevi.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> %va, i32 16) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vbitrevi_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitrevi.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vbitrevi_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitrevi.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> %va, i32 32) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vbitrevi_d_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitrevi.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vbitrevi_d_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitrevi.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> %va, i32 64) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..3ffb494c9907a71edfb0fc5b0fc13c53cd2ed0c0 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vbitrevi_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vbitrevi_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vbitrevi_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vbitrevi_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev.ll new file mode 100644 index 0000000000000000000000000000000000000000..ad56e88fdb8828ce60ab9d60092c052fc5d13020 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vbitrev_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vbitrev_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitrev.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vbitrev_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vbitrev_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitrev.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vbitrev_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vbitrev_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitrev.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vbitrev_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vbitrev_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitrev.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vbitrevi_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vbitrevi_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitrevi.b $vr0, $vr0, 7 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> %va, i32 7) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vbitrevi_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vbitrevi_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitrevi.h $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> %va, i32 15) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vbitrevi_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vbitrevi_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitrevi.w $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> %va, i32 31) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vbitrevi_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vbitrevi_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitrevi.d $vr0, $vr0, 63 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> %va, i32 63) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitsel.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitsel.ll new file mode 100644 index 0000000000000000000000000000000000000000..4b4b5ff1fc8cdb501154835188ebdda5847786b5 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitsel.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8>, <16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vbitsel_v(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { +; CHECK-LABEL: lsx_vbitsel_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitsel.v $vr0, $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..bc63b40e9fca76d607d46faee3aa3cbb5653d605 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vbitseli_b_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vbitseli.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> %va, <16 x i8> %vb, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vbitseli_b_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vbitseli.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> %va, <16 x i8> %vb, i32 256) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..52c1eb7d202438f32454da2742faa3929f12fcbe --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vbitseli_b(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> %va, <16 x i8> %vb, i32 %c) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli.ll new file mode 100644 index 0000000000000000000000000000000000000000..28d342b5c378fb85a23ab28fcfd5148a9663edc6 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vbitseli_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vbitseli_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitseli.b $vr0, $vr1, 255 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> %va, <16 x i8> %vb, i32 255) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..e57e14d8cb077f596bd71fb7dabcef6f0c6c1dde --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vbitseti_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitseti.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vbitseti_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitseti.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> %va, i32 8) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vbitseti_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitseti.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vbitseti_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitseti.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> %va, i32 16) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vbitseti_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitseti.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vbitseti_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitseti.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> %va, i32 32) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vbitseti_d_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitseti.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vbitseti_d_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitseti.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> %va, i32 64) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..9b2bde015ed93f625ed06ebd6f4af05bf43dd09d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vbitseti_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vbitseti_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vbitseti_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vbitseti_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset.ll new file mode 100644 index 0000000000000000000000000000000000000000..75d98e6f8bce1acea086cbd028b387599d264ff3 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vbitset_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vbitset_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitset.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vbitset_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vbitset_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitset.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vbitset_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vbitset_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitset.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vbitset_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vbitset_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitset.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vbitseti_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vbitseti_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitseti.b $vr0, $vr0, 7 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> %va, i32 7) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vbitseti_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vbitseti_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitseti.h $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> %va, i32 15) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vbitseti_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vbitseti_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitseti.w $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> %va, i32 31) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vbitseti_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vbitseti_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitseti.d $vr0, $vr0, 63 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> %va, i32 63) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..eb49af49c9bee2925f88be60e515f3bf5e968186 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8>, i32) + +define <16 x i8> @lsx_vbsll_v_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbsll.v: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vbsll_v_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbsll.v: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> %va, i32 32) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..5b10c9e91a4f41ceb01911be0eb5b84068d97f1d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8>, i32) + +define <16 x i8> @lsx_vbsll_v(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll.ll new file mode 100644 index 0000000000000000000000000000000000000000..e7eb1cfcb4074706cb044e9349e57503f9b1811b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8>, i32) + +define <16 x i8> @lsx_vbsll_v(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vbsll_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbsll.v $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> %va, i32 31) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..bf56822e2ef59d399e9b8725e20d3c11d697151b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8>, i32) + +define <16 x i8> @lsx_vbsrl_v_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbsrl.v: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vbsrl_v_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbsrl.v: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> %va, i32 32) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..0bc038c869ced502530018f7190e664507f28f8f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8>, i32) + +define <16 x i8> @lsx_vbsrl_v(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl.ll new file mode 100644 index 0000000000000000000000000000000000000000..fe0565297641bc3f859aec10ee0f3cb8d8f50a22 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8>, i32) + +define <16 x i8> @lsx_vbsrl_v(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vbsrl_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbsrl.v $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> %va, i32 31) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clo.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clo.ll new file mode 100644 index 0000000000000000000000000000000000000000..c581109f3fd0b5f81645f8bbeea20bd10ff8548a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clo.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8>) + +define <16 x i8> @lsx_vclo_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vclo_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vclo.b $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> %va) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16>) + +define <8 x i16> @lsx_vclo_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vclo_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vclo.h $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> %va) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32>) + +define <4 x i32> @lsx_vclo_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vclo_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vclo.w $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> %va) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64>) + +define <2 x i64> @lsx_vclo_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vclo_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vclo.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> %va) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clz.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clz.ll new file mode 100644 index 0000000000000000000000000000000000000000..25c37b64349b35dd4304537074a6d89de68537e0 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clz.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8>) + +define <16 x i8> @lsx_vclz_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vclz_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vclz.b $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> %va) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16>) + +define <8 x i16> @lsx_vclz_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vclz_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vclz.h $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> %va) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32>) + +define <4 x i32> @lsx_vclz_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vclz_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vclz.w $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> %va) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64>) + +define <2 x i64> @lsx_vclz_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vclz_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vclz.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> %va) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-div.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-div.ll new file mode 100644 index 0000000000000000000000000000000000000000..53166e84d269a3fb16f83d7fbff38251fa1972cd --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-div.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vdiv_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vdiv_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vdiv.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vdiv_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vdiv_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vdiv.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vdiv_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vdiv_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vdiv.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vdiv_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vdiv_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vdiv.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vdiv_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vdiv_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vdiv.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vdiv_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vdiv_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vdiv.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vdiv_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vdiv_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vdiv.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vdiv_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vdiv_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vdiv.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-exth.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-exth.ll new file mode 100644 index 0000000000000000000000000000000000000000..2f3e891a9eef2a68ca854463b615e2dd3e16f1a2 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-exth.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8>) + +define <8 x i16> @lsx_vexth_h_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vexth_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vexth.h.b $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> %va) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16>) + +define <4 x i32> @lsx_vexth_w_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vexth_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vexth.w.h $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> %va) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32>) + +define <2 x i64> @lsx_vexth_d_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vexth_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vexth.d.w $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> %va) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64>) + +define <2 x i64> @lsx_vexth_q_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vexth_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vexth.q.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> %va) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8>) + +define <8 x i16> @lsx_vexth_hu_bu(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vexth_hu_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vexth.hu.bu $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> %va) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16>) + +define <4 x i32> @lsx_vexth_wu_hu(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vexth_wu_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vexth.wu.hu $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> %va) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32>) + +define <2 x i64> @lsx_vexth_du_wu(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vexth_du_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vexth.du.wu $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> %va) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64>) + +define <2 x i64> @lsx_vexth_qu_du(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vexth_qu_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vexth.qu.du $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> %va) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extl.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extl.ll new file mode 100644 index 0000000000000000000000000000000000000000..cbf19e2a391905d783233526650366fae4415301 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extl.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64>) + +define <2 x i64> @lsx_vextl_q_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vextl_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vextl.q.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> %va) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64>) + +define <2 x i64> @lsx_vextl_qu_du(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vextl_qu_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vextl.qu.du $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> %va) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..7f94234ed603b666171d41887c7c53c1dc658a2e --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vextrins_b_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vextrins.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> %va, <16 x i8> %vb, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vextrins_b_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vextrins.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> %va, <16 x i8> %vb, i32 256) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vextrins_h_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vextrins.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> %va, <8 x i16> %vb, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vextrins_h_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vextrins.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> %va, <8 x i16> %vb, i32 256) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vextrins_w_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vextrins.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> %va, <4 x i32> %vb, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vextrins_w_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vextrins.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> %va, <4 x i32> %vb, i32 256) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vextrins_d_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vextrins.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> %va, <2 x i64> %vb, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vextrins_d_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vextrins.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> %va, <2 x i64> %vb, i32 256) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..e834002bb60b8992bc05de1ac88b340a9a3c8ef7 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vextrins_b(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> %va, <16 x i8> %vb, i32 %c) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vextrins_h(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> %va, <8 x i16> %vb, i32 %c) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vextrins_w(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> %va, <4 x i32> %vb, i32 %c) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vextrins_d(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> %va, <2 x i64> %vb, i32 %c) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins.ll new file mode 100644 index 0000000000000000000000000000000000000000..8f03a2b812917ebb206821acd1efab7e283807c8 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vextrins_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vextrins_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vextrins.b $vr0, $vr1, 255 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> %va, <16 x i8> %vb, i32 255) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vextrins_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vextrins_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vextrins.h $vr0, $vr1, 255 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> %va, <8 x i16> %vb, i32 255) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vextrins_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vextrins_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vextrins.w $vr0, $vr1, 255 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> %va, <4 x i32> %vb, i32 255) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vextrins_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vextrins_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vextrins.d $vr0, $vr1, 255 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> %va, <2 x i64> %vb, i32 255) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fadd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fadd.ll new file mode 100644 index 0000000000000000000000000000000000000000..569002314c92923c07b58ea2aca5c4a65d60b817 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fadd.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float>, <4 x float>) + +define <4 x float> @lsx_vfadd_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfadd_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfadd.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> %va, <4 x float> %vb) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double>, <2 x double>) + +define <2 x double> @lsx_vfadd_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfadd_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> %va, <2 x double> %vb) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fclass.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fclass.ll new file mode 100644 index 0000000000000000000000000000000000000000..0c668218710174f07e9096826c074a48b78c68a5 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fclass.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float>) + +define <4 x i32> @lsx_vfclass_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vfclass_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfclass.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> %va) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double>) + +define <2 x i64> @lsx_vfclass_d(<2 x double> %va) nounwind { +; CHECK-LABEL: lsx_vfclass_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfclass.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> %va) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcmp.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcmp.ll new file mode 100644 index 0000000000000000000000000000000000000000..669c53b73b16fe49e6810be5e55745575b68ff71 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcmp.ll @@ -0,0 +1,530 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_caf_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_caf_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.caf.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_caf_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_caf_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.caf.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_cun_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_cun_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.cun.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_cun_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_cun_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.cun.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_ceq_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_ceq_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.ceq.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_ceq_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_ceq_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.ceq.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_cueq_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_cueq_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.cueq.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_cueq_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_cueq_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.cueq.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_clt_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_clt_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.clt.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_clt_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_clt_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.clt.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_cult_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_cult_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.cult.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_cult_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_cult_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.cult.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_cle_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_cle_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.cle.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_cle_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_cle_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.cle.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_cule_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_cule_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.cule.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_cule_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_cule_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.cule.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_cne_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_cne_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.cne.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_cne_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_cne_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.cne.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_cor_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_cor_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.cor.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_cor_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_cor_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.cor.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_cune_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_cune_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.cune.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_cune_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_cune_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.cune.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_saf_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_saf_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.saf.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_saf_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_saf_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.saf.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_sun_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_sun_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.sun.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_sun_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_sun_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.sun.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_seq_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_seq_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.seq.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_seq_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_seq_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.seq.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_sueq_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_sueq_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.sueq.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_sueq_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_sueq_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.sueq.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_slt_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_slt_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.slt.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_slt_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_slt_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.slt.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_sult_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_sult_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.sult.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_sult_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_sult_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.sult.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_sle_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_sle_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.sle.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_sle_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_sle_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.sle.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_sule_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_sule_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.sule.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_sule_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_sule_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.sule.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_sne_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_sne_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.sne.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_sne_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_sne_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.sne.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_sor_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_sor_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.sor.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_sor_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_sor_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.sor.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_sune_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_sune_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.sune.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_sune_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_sune_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.sune.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvt.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvt.ll new file mode 100644 index 0000000000000000000000000000000000000000..a6a151a96d84e7f7496032c09fc313ab48b1e074 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvt.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float>, <4 x float>) + +define <8 x i16> @lsx_vfcvt_h_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcvt_h_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcvt.h.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> %va, <4 x float> %vb) + ret <8 x i16> %res +} + +declare <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double>, <2 x double>) + +define <4 x float> @lsx_vfcvt_s_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcvt_s_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcvt.s.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> %va, <2 x double> %vb) + ret <4 x float> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvth.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvth.ll new file mode 100644 index 0000000000000000000000000000000000000000..a9e4328bd011dbd7d9a06f2d1a7d26ecac816190 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvth.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16>) + +define <4 x float> @lsx_vfcvth_s_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vfcvth_s_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcvth.s.h $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> %va) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float>) + +define <2 x double> @lsx_vfcvth_d_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vfcvth_d_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcvth.d.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> %va) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvtl.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvtl.ll new file mode 100644 index 0000000000000000000000000000000000000000..9a69964bb22741ffc32f00299dff78473caba8be --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvtl.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16>) + +define <4 x float> @lsx_vfcvtl_s_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vfcvtl_s_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcvtl.s.h $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> %va) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float>) + +define <2 x double> @lsx_vfcvtl_d_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vfcvtl_d_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcvtl.d.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> %va) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fdiv.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fdiv.ll new file mode 100644 index 0000000000000000000000000000000000000000..1ca8e5e2c0e9c435ba71ecd3d11360ad7dbd84db --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fdiv.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float>, <4 x float>) + +define <4 x float> @lsx_vfdiv_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfdiv_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfdiv.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> %va, <4 x float> %vb) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double>, <2 x double>) + +define <2 x double> @lsx_vfdiv_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfdiv_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfdiv.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> %va, <2 x double> %vb) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ffint.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ffint.ll new file mode 100644 index 0000000000000000000000000000000000000000..62fbcfa339cda62d8e4b5dfb9f00e6f1b303f81a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ffint.ll @@ -0,0 +1,86 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32>) + +define <4 x float> @lsx_vffint_s_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vffint_s_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vffint.s.w $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> %va) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64>) + +define <2 x double> @lsx_vffint_d_l(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vffint_d_l: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vffint.d.l $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> %va) + ret <2 x double> %res +} + +declare <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32>) + +define <4 x float> @lsx_vffint_s_wu(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vffint_s_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vffint.s.wu $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> %va) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64>) + +define <2 x double> @lsx_vffint_d_lu(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vffint_d_lu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vffint.d.lu $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> %va) + ret <2 x double> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32>) + +define <2 x double> @lsx_vffintl_d_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vffintl_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vffintl.d.w $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> %va) + ret <2 x double> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32>) + +define <2 x double> @lsx_vffinth_d_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vffinth_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vffinth.d.w $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> %va) + ret <2 x double> %res +} + +declare <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64>, <2 x i64>) + +define <4 x float> @lsx_vffint_s_l(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vffint_s_l: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vffint.s.l $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> %va, <2 x i64> %vb) + ret <4 x float> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-flogb.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-flogb.ll new file mode 100644 index 0000000000000000000000000000000000000000..d8382acc70ed68a49c52b247295975788a5f7e1a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-flogb.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float>) + +define <4 x float> @lsx_vflogb_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vflogb_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vflogb.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> %va) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double>) + +define <2 x double> @lsx_vflogb_d(<2 x double> %va) nounwind { +; CHECK-LABEL: lsx_vflogb_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vflogb.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> %va) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmadd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmadd.ll new file mode 100644 index 0000000000000000000000000000000000000000..adbaf6c76b1b6a73919a02e8374b4160cb86f1ee --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmadd.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float>, <4 x float>, <4 x float>) + +define <4 x float> @lsx_vfmadd_s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) nounwind { +; CHECK-LABEL: lsx_vfmadd_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmadd.s $vr0, $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double>, <2 x double>, <2 x double>) + +define <2 x double> @lsx_vfmadd_d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) nounwind { +; CHECK-LABEL: lsx_vfmadd_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmadd.d $vr0, $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmax.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmax.ll new file mode 100644 index 0000000000000000000000000000000000000000..89f757c4e456792f802e0e2d610149020c773763 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmax.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float>, <4 x float>) + +define <4 x float> @lsx_vfmax_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfmax_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmax.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> %va, <4 x float> %vb) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double>, <2 x double>) + +define <2 x double> @lsx_vfmax_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfmax_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmax.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> %va, <2 x double> %vb) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmaxa.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmaxa.ll new file mode 100644 index 0000000000000000000000000000000000000000..5662acc0b9a143b7711b736d716aa8c12fa895f1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmaxa.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float>, <4 x float>) + +define <4 x float> @lsx_vfmaxa_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfmaxa_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmaxa.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> %va, <4 x float> %vb) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double>, <2 x double>) + +define <2 x double> @lsx_vfmaxa_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfmaxa_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmaxa.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> %va, <2 x double> %vb) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmin.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmin.ll new file mode 100644 index 0000000000000000000000000000000000000000..0f844240277fb0cf71216d5f26ac1240f35e0c53 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmin.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float>, <4 x float>) + +define <4 x float> @lsx_vfmin_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfmin_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmin.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> %va, <4 x float> %vb) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double>, <2 x double>) + +define <2 x double> @lsx_vfmin_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfmin_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmin.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> %va, <2 x double> %vb) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmina.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmina.ll new file mode 100644 index 0000000000000000000000000000000000000000..27f70b5fba3229f65aca81aaee5f8dedfcf2ccde --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmina.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float>, <4 x float>) + +define <4 x float> @lsx_vfmina_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfmina_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmina.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> %va, <4 x float> %vb) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double>, <2 x double>) + +define <2 x double> @lsx_vfmina_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfmina_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmina.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> %va, <2 x double> %vb) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmsub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmsub.ll new file mode 100644 index 0000000000000000000000000000000000000000..856ca9cadbd90583000e00a556924a22fc7f76ad --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmsub.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float>, <4 x float>, <4 x float>) + +define <4 x float> @lsx_vfmsub_s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) nounwind { +; CHECK-LABEL: lsx_vfmsub_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmsub.s $vr0, $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double>, <2 x double>, <2 x double>) + +define <2 x double> @lsx_vfmsub_d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) nounwind { +; CHECK-LABEL: lsx_vfmsub_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmsub.d $vr0, $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmul.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmul.ll new file mode 100644 index 0000000000000000000000000000000000000000..1e6c4c77d536b4a8584db52120bd56ac535c0ad7 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmul.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float>, <4 x float>) + +define <4 x float> @lsx_vfmul_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfmul_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmul.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> %va, <4 x float> %vb) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double>, <2 x double>) + +define <2 x double> @lsx_vfmul_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfmul_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmul.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> %va, <2 x double> %vb) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmadd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmadd.ll new file mode 100644 index 0000000000000000000000000000000000000000..e1a9ea78ef9db50ac4a36c375d0935bd5a113796 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmadd.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float>, <4 x float>, <4 x float>) + +define <4 x float> @lsx_vfnmadd_s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) nounwind { +; CHECK-LABEL: lsx_vfnmadd_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfnmadd.s $vr0, $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double>, <2 x double>, <2 x double>) + +define <2 x double> @lsx_vfnmadd_d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) nounwind { +; CHECK-LABEL: lsx_vfnmadd_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfnmadd.d $vr0, $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmsub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmsub.ll new file mode 100644 index 0000000000000000000000000000000000000000..46db0f4a50613abb9db42eff39c8a6b766c7b2df --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmsub.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float>, <4 x float>, <4 x float>) + +define <4 x float> @lsx_vfnmsub_s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) nounwind { +; CHECK-LABEL: lsx_vfnmsub_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfnmsub.s $vr0, $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double>, <2 x double>, <2 x double>) + +define <2 x double> @lsx_vfnmsub_d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) nounwind { +; CHECK-LABEL: lsx_vfnmsub_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfnmsub.d $vr0, $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecip.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecip.ll new file mode 100644 index 0000000000000000000000000000000000000000..669fde5912d4b995e4491d1d2128562c68603dff --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecip.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float>) + +define <4 x float> @lsx_vfrecip_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vfrecip_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrecip.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> %va) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double>) + +define <2 x double> @lsx_vfrecip_d(<2 x double> %va) nounwind { +; CHECK-LABEL: lsx_vfrecip_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrecip.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> %va) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frint.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frint.ll new file mode 100644 index 0000000000000000000000000000000000000000..8d872fc7296255166545edb7cdd06d530790a835 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frint.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float>) + +define <4 x float> @lsx_vfrintrne_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vfrintrne_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrintrne.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> %va) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double>) + +define <2 x double> @lsx_vfrintrne_d(<2 x double> %va) nounwind { +; CHECK-LABEL: lsx_vfrintrne_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrintrne.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> %va) + ret <2 x double> %res +} + +declare <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float>) + +define <4 x float> @lsx_vfrintrz_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vfrintrz_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrintrz.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> %va) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double>) + +define <2 x double> @lsx_vfrintrz_d(<2 x double> %va) nounwind { +; CHECK-LABEL: lsx_vfrintrz_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrintrz.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> %va) + ret <2 x double> %res +} + +declare <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float>) + +define <4 x float> @lsx_vfrintrp_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vfrintrp_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrintrp.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> %va) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double>) + +define <2 x double> @lsx_vfrintrp_d(<2 x double> %va) nounwind { +; CHECK-LABEL: lsx_vfrintrp_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrintrp.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> %va) + ret <2 x double> %res +} + +declare <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float>) + +define <4 x float> @lsx_vfrintrm_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vfrintrm_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrintrm.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> %va) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double>) + +define <2 x double> @lsx_vfrintrm_d(<2 x double> %va) nounwind { +; CHECK-LABEL: lsx_vfrintrm_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrintrm.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> %va) + ret <2 x double> %res +} + +declare <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float>) + +define <4 x float> @lsx_vfrint_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vfrint_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrint.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> %va) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double>) + +define <2 x double> @lsx_vfrint_d(<2 x double> %va) nounwind { +; CHECK-LABEL: lsx_vfrint_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrint.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> %va) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrt.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrt.ll new file mode 100644 index 0000000000000000000000000000000000000000..326d87308b0ba61b20e5831eaf6e8c6e7f40cc26 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrt.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float>) + +define <4 x float> @lsx_vfrsqrt_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vfrsqrt_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrsqrt.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> %va) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double>) + +define <2 x double> @lsx_vfrsqrt_d(<2 x double> %va) nounwind { +; CHECK-LABEL: lsx_vfrsqrt_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrsqrt.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> %va) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..0184c855c9c100d4cfb109b05a62547502753c29 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-invalid-imm.ll @@ -0,0 +1,33 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vfrstpi_b_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vfrstpi.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> %va, <16 x i8> %vb, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vfrstpi_b_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vfrstpi.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> %va, <16 x i8> %vb, i32 32) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vfrstpi_h_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vfrstpi.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> %va, <8 x i16> %vb, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vfrstpi_h_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vfrstpi.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> %va, <8 x i16> %vb, i32 32) + ret <8 x i16> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..9583f672a305e64adc4d4dec59264faa2cd0f19d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-non-imm.ll @@ -0,0 +1,19 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vfrstpi_b(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> %va, <16 x i8> %vb, i32 %c) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vfrstpi_h(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> %va, <8 x i16> %vb, i32 %c) + ret <8 x i16> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp.ll new file mode 100644 index 0000000000000000000000000000000000000000..5c072b194d4fef066f9e13993f2055ecdc5d7531 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8>, <16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vfrstp_b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { +; CHECK-LABEL: lsx_vfrstp_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrstp.b $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16>, <8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vfrstp_h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { +; CHECK-LABEL: lsx_vfrstp_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrstp.h $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) + ret <8 x i16> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vfrstpi_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vfrstpi_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrstpi.b $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> %va, <16 x i8> %vb, i32 1) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vfrstpi_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vfrstpi_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrstpi.h $vr0, $vr1, 31 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> %va, <8 x i16> %vb, i32 31) + ret <8 x i16> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsqrt.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsqrt.ll new file mode 100644 index 0000000000000000000000000000000000000000..55bffba9e99e9c1c3b5517f34b4c3d47729cb681 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsqrt.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float>) + +define <4 x float> @lsx_vfsqrt_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vfsqrt_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfsqrt.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> %va) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double>) + +define <2 x double> @lsx_vfsqrt_d(<2 x double> %va) nounwind { +; CHECK-LABEL: lsx_vfsqrt_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfsqrt.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> %va) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsub.ll new file mode 100644 index 0000000000000000000000000000000000000000..2beba4a70dc960ef8de6072fd560cc8d0554e8dd --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsub.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float>, <4 x float>) + +define <4 x float> @lsx_vfsub_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfsub_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfsub.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> %va, <4 x float> %vb) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double>, <2 x double>) + +define <2 x double> @lsx_vfsub_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfsub_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfsub.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> %va, <2 x double> %vb) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ftint.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ftint.ll new file mode 100644 index 0000000000000000000000000000000000000000..2a494cd7fa874b0d3589bf0bdeb1e5bf9bd062be --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ftint.ll @@ -0,0 +1,350 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float>) + +define <4 x i32> @lsx_vftintrne_w_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vftintrne_w_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrne.w.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> %va) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double>) + +define <2 x i64> @lsx_vftintrne_l_d(<2 x double> %va) nounwind { +; CHECK-LABEL: lsx_vftintrne_l_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrne.l.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> %va) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float>) + +define <4 x i32> @lsx_vftintrz_w_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vftintrz_w_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrz.w.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> %va) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double>) + +define <2 x i64> @lsx_vftintrz_l_d(<2 x double> %va) nounwind { +; CHECK-LABEL: lsx_vftintrz_l_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrz.l.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> %va) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float>) + +define <4 x i32> @lsx_vftintrp_w_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vftintrp_w_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrp.w.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> %va) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double>) + +define <2 x i64> @lsx_vftintrp_l_d(<2 x double> %va) nounwind { +; CHECK-LABEL: lsx_vftintrp_l_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrp.l.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> %va) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float>) + +define <4 x i32> @lsx_vftintrm_w_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vftintrm_w_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrm.w.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> %va) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double>) + +define <2 x i64> @lsx_vftintrm_l_d(<2 x double> %va) nounwind { +; CHECK-LABEL: lsx_vftintrm_l_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrm.l.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> %va) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float>) + +define <4 x i32> @lsx_vftint_w_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vftint_w_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftint.w.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> %va) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double>) + +define <2 x i64> @lsx_vftint_l_d(<2 x double> %va) nounwind { +; CHECK-LABEL: lsx_vftint_l_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftint.l.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> %va) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float>) + +define <4 x i32> @lsx_vftintrz_wu_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vftintrz_wu_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrz.wu.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> %va) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double>) + +define <2 x i64> @lsx_vftintrz_lu_d(<2 x double> %va) nounwind { +; CHECK-LABEL: lsx_vftintrz_lu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrz.lu.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> %va) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float>) + +define <4 x i32> @lsx_vftint_wu_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vftint_wu_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftint.wu.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> %va) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double>) + +define <2 x i64> @lsx_vftint_lu_d(<2 x double> %va) nounwind { +; CHECK-LABEL: lsx_vftint_lu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftint.lu.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> %va) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double>, <2 x double>) + +define <4 x i32> @lsx_vftintrne_w_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vftintrne_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrne.w.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> %va, <2 x double> %vb) + ret <4 x i32> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double>, <2 x double>) + +define <4 x i32> @lsx_vftintrz_w_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vftintrz_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrz.w.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> %va, <2 x double> %vb) + ret <4 x i32> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double>, <2 x double>) + +define <4 x i32> @lsx_vftintrp_w_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vftintrp_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrp.w.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> %va, <2 x double> %vb) + ret <4 x i32> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double>, <2 x double>) + +define <4 x i32> @lsx_vftintrm_w_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vftintrm_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrm.w.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> %va, <2 x double> %vb) + ret <4 x i32> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double>, <2 x double>) + +define <4 x i32> @lsx_vftint_w_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vftint_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftint.w.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> %va, <2 x double> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float>) + +define <2 x i64> @lsx_vftintrnel_l_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vftintrnel_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrnel.l.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> %va) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float>) + +define <2 x i64> @lsx_vftintrneh_l_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vftintrneh_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrneh.l.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> %va) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float>) + +define <2 x i64> @lsx_vftintrzl_l_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vftintrzl_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrzl.l.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> %va) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float>) + +define <2 x i64> @lsx_vftintrzh_l_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vftintrzh_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrzh.l.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> %va) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float>) + +define <2 x i64> @lsx_vftintrpl_l_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vftintrpl_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrpl.l.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> %va) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float>) + +define <2 x i64> @lsx_vftintrph_l_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vftintrph_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrph.l.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> %va) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float>) + +define <2 x i64> @lsx_vftintrml_l_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vftintrml_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrml.l.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> %va) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float>) + +define <2 x i64> @lsx_vftintrmh_l_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vftintrmh_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrmh.l.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> %va) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float>) + +define <2 x i64> @lsx_vftintl_l_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vftintl_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintl.l.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> %va) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float>) + +define <2 x i64> @lsx_vftinth_l_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vftinth_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftinth.l.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> %va) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-haddw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-haddw.ll new file mode 100644 index 0000000000000000000000000000000000000000..05725582334ae383518965e678dac9bc602e49cb --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-haddw.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vhaddw_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vhaddw_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vhaddw.h.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vhaddw_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vhaddw_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vhaddw.w.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vhaddw_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vhaddw_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vhaddw.d.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vhaddw_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vhaddw_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vhaddw.q.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vhaddw_hu_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vhaddw_hu_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vhaddw.hu.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vhaddw_wu_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vhaddw_wu_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vhaddw.wu.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vhaddw_du_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vhaddw_du_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vhaddw.du.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vhaddw_qu_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vhaddw_qu_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vhaddw.qu.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-hsubw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-hsubw.ll new file mode 100644 index 0000000000000000000000000000000000000000..dd5815b2ea85a6aa5ec2fd6abdc6971e536f64c3 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-hsubw.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vhsubw_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vhsubw_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vhsubw.h.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vhsubw_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vhsubw_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vhsubw.w.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vhsubw_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vhsubw_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vhsubw.d.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vhsubw_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vhsubw_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vhsubw.q.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vhsubw_hu_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vhsubw_hu_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vhsubw.hu.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vhsubw_wu_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vhsubw_wu_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vhsubw.wu.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vhsubw_du_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vhsubw_du_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vhsubw.du.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vhsubw_qu_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vhsubw_qu_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vhsubw.qu.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ilv.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ilv.ll new file mode 100644 index 0000000000000000000000000000000000000000..77b0b3484df8c47a2594746a705c8bddbbe5591f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ilv.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vilvl_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vilvl_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vilvl_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vilvl_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vilvl_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vilvl_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vilvl_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vilvl_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vilvl.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vilvh_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vilvh_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vilvh.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vilvh_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vilvh_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vilvh.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vilvh_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vilvh_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vilvh.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vilvh_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vilvh_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vilvh.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..3d4f84fb6e0388ec81495d0f5ecafb5e28918548 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8>, i32, i32) + +define <16 x i8> @lsx_vinsgr2vr_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vinsgr2vr.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> %va, i32 1, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vinsgr2vr_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vinsgr2vr.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> %va, i32 1, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16>, i32, i32) + +define <8 x i16> @lsx_vinsgr2vr_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vinsgr2vr.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> %va, i32 1, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vinsgr2vr_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vinsgr2vr.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> %va, i32 1, i32 8) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32>, i32, i32) + +define <4 x i32> @lsx_vinsgr2vr_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vinsgr2vr.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> %va, i32 1, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vinsgr2vr_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vinsgr2vr.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> %va, i32 1, i32 4) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64>, i64, i32) + +define <2 x i64> @lsx_vinsgr2vr_d_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vinsgr2vr.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> %va, i64 1, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vinsgr2vr_d_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vinsgr2vr.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> %va, i64 1, i32 2) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..2a4c2218de8c9e93c485b355b4f61fcdfdba20a5 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8>, i32, i32) + +define <16 x i8> @lsx_vinsgr2vr_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> %va, i32 1, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16>, i32, i32) + +define <8 x i16> @lsx_vinsgr2vr_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> %va, i32 1, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32>, i32, i32) + +define <4 x i32> @lsx_vinsgr2vr_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> %va, i32 1, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64>, i64, i32) + +define <2 x i64> @lsx_vinsgr2vr_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> %va, i64 1, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr.ll new file mode 100644 index 0000000000000000000000000000000000000000..61d2cbd2806646fb765dd685eff2676bc31f7002 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr.ll @@ -0,0 +1,54 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8>, i32, i32) + +define <16 x i8> @lsx_vinsgr2vr_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vinsgr2vr_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ori $a0, $zero, 1 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 15 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> %va, i32 1, i32 15) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16>, i32, i32) + +define <8 x i16> @lsx_vinsgr2vr_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vinsgr2vr_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ori $a0, $zero, 1 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 7 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> %va, i32 1, i32 7) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32>, i32, i32) + +define <4 x i32> @lsx_vinsgr2vr_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vinsgr2vr_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ori $a0, $zero, 1 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 3 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> %va, i32 1, i32 3) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64>, i64, i32) + +define <2 x i64> @lsx_vinsgr2vr_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vinsgr2vr_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ori $a0, $zero, 1 +; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> %va, i64 1, i32 1) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..3aeb30ce66b4441d452aa875ea97870d00ab6a0d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vld(i8*, i32) + +define <16 x i8> @lsx_vld_lo(i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vld: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vld(i8* %p, i32 -2049) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vld_hi(i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vld: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vld(i8* %p, i32 2048) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..db6a0318d87aefe1a2955ea91cf83a3742b2547b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vld(i8*, i32) + +define <16 x i8> @lsx_vld(i8* %p, i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vld(i8* %p, i32 %a) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld.ll new file mode 100644 index 0000000000000000000000000000000000000000..b9e2ff8088d834d0c9ff270ecbe355f8747106e0 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vld(i8*, i32) + +define <16 x i8> @lsx_vld(i8* %p) nounwind { +; CHECK-LABEL: lsx_vld: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vld(i8* %p, i32 1) + ret <16 x i8> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vldx(i8*, i64) + +define <16 x i8> @lsx_vldx(i8* %p, i64 %b) nounwind { +; CHECK-LABEL: lsx_vldx: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vldx $vr0, $a0, $a1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vldx(i8* %p, i64 %b) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..57f6f8e81d91c57313524356b9e8bd7ce52d0caa --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-invalid-imm.ll @@ -0,0 +1,81 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <2 x i64> @llvm.loongarch.lsx.vldi(i32) + +define <2 x i64> @lsx_vldi_lo() nounwind { +; CHECK: llvm.loongarch.lsx.vldi: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vldi(i32 -4097) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vldi_hi() nounwind { +; CHECK: llvm.loongarch.lsx.vldi: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vldi(i32 4096) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32) + +define <16 x i8> @lsx_vrepli_b_lo() nounwind { +; CHECK: llvm.loongarch.lsx.vrepli.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 -513) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vrepli_b_hi() nounwind { +; CHECK: llvm.loongarch.lsx.vrepli.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 512) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32) + +define <8 x i16> @lsx_vrepli_h_lo() nounwind { +; CHECK: llvm.loongarch.lsx.vrepli.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 -513) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vrepli_h_hi() nounwind { +; CHECK: llvm.loongarch.lsx.vrepli.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 512) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32) + +define <4 x i32> @lsx_vrepli_w_lo() nounwind { +; CHECK: llvm.loongarch.lsx.vrepli.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 -513) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vrepli_w_hi() nounwind { +; CHECK: llvm.loongarch.lsx.vrepli.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 512) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32) + +define <2 x i64> @lsx_vrepli_d_lo() nounwind { +; CHECK: llvm.loongarch.lsx.vrepli.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 -513) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vrepli_d_hi() nounwind { +; CHECK: llvm.loongarch.lsx.vrepli.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 512) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..a8f8278f8097a223a257607c8489175226a43ffc --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-non-imm.ll @@ -0,0 +1,46 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <2 x i64> @llvm.loongarch.lsx.vldi(i32) + +define <2 x i64> @lsx_vldi(i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vldi(i32 %a) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32) + +define <16 x i8> @lsx_vrepli_b(i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 %a) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32) + +define <8 x i16> @lsx_vrepli_h(i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 %a) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32) + +define <4 x i32> @lsx_vrepli_w(i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 %a) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32) + +define <2 x i64> @lsx_vrepli_d(i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 %a) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi.ll new file mode 100644 index 0000000000000000000000000000000000000000..ace910b54d9a6b4c0821430422f08e46b150a47c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <2 x i64> @llvm.loongarch.lsx.vldi(i32) + +define <2 x i64> @lsx_vldi() nounwind { +; CHECK-LABEL: lsx_vldi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vldi $vr0, 4095 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vldi(i32 4095) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32) + +define <16 x i8> @lsx_vrepli_b() nounwind { +; CHECK-LABEL: lsx_vrepli_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrepli.b $vr0, 511 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 511) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32) + +define <8 x i16> @lsx_vrepli_h() nounwind { +; CHECK-LABEL: lsx_vrepli_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrepli.h $vr0, 511 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 511) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32) + +define <4 x i32> @lsx_vrepli_w() nounwind { +; CHECK-LABEL: lsx_vrepli_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrepli.w $vr0, 511 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 511) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32) + +define <2 x i64> @lsx_vrepli_d() nounwind { +; CHECK-LABEL: lsx_vrepli_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrepli.d $vr0, 511 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 511) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..cb640e1245daa722261390f30b89df5cc69012b9 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8*, i32) + +define <16 x i8> @lsx_vldrepl_b_lo(i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vldrepl.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8* %p, i32 -2049) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vldrepl_b_hi(i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vldrepl.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8* %p, i32 2048) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8*, i32) + +define <8 x i16> @lsx_vldrepl_h_lo(i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vldrepl.h: argument out of range or not a multiple of 2. +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8* %p, i32 -2050) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vldrepl_h_hi(i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vldrepl.h: argument out of range or not a multiple of 2. +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8* %p, i32 2048) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8*, i32) + +define <4 x i32> @lsx_vldrepl_w_lo(i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vldrepl.w: argument out of range or not a multiple of 4. +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8* %p, i32 -2052) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vldrepl_w_hi(i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vldrepl.w: argument out of range or not a multiple of 4. +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8* %p, i32 2048) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8*, i32) + +define <2 x i64> @lsx_vldrepl_d_lo(i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vldrepl.d: argument out of range or not a multiple of 8. +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8* %p, i32 -2056) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vldrepl_d_hi(i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vldrepl.d: argument out of range or not a multiple of 8. +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8* %p, i32 2048) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..e60b21913c6995d4d4c07c18d3390933f8366b34 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8*, i32) + +define <16 x i8> @lsx_vldrepl_b(i8* %p, i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8* %p, i32 %a) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8*, i32) + +define <8 x i16> @lsx_vldrepl_h(i8* %p, i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8* %p, i32 %a) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8*, i32) + +define <4 x i32> @lsx_vldrepl_w(i8* %p, i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8* %p, i32 %a) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8*, i32) + +define <2 x i64> @lsx_vldrepl_d(i8* %p, i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8* %p, i32 %a) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl.ll new file mode 100644 index 0000000000000000000000000000000000000000..1a9cf3d3a7665d8f8be9c5d0546bdbe2afc00b52 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8*, i32) + +define <16 x i8> @lsx_vldrepl_b(i8* %p, i32 %b) nounwind { +; CHECK-LABEL: lsx_vldrepl_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vldrepl.b $vr0, $a0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8* %p, i32 1) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8*, i32) + +define <8 x i16> @lsx_vldrepl_h(i8* %p, i32 %b) nounwind { +; CHECK-LABEL: lsx_vldrepl_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vldrepl.h $vr0, $a0, 2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8* %p, i32 2) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8*, i32) + +define <4 x i32> @lsx_vldrepl_w(i8* %p, i32 %b) nounwind { +; CHECK-LABEL: lsx_vldrepl_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vldrepl.w $vr0, $a0, 4 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8* %p, i32 4) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8*, i32) + +define <2 x i64> @lsx_vldrepl_d(i8* %p, i32 %b) nounwind { +; CHECK-LABEL: lsx_vldrepl_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vldrepl.d $vr0, $a0, 8 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8* %p, i32 8) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-madd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-madd.ll new file mode 100644 index 0000000000000000000000000000000000000000..89503724fd730ebd078eeeb936ee3cf5e82542bf --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-madd.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8>, <16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vmadd_b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { +; CHECK-LABEL: lsx_vmadd_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmadd.b $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16>, <8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vmadd_h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { +; CHECK-LABEL: lsx_vmadd_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmadd.h $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32>, <4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vmadd_w(<4 x i32> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { +; CHECK-LABEL: lsx_vmadd_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmadd.w $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> %va, <4 x i32> %vb, <4 x i32> %vc) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64>, <2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmadd_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { +; CHECK-LABEL: lsx_vmadd_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmadd.d $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-maddw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-maddw.ll new file mode 100644 index 0000000000000000000000000000000000000000..1e3ab25a5fcf1ac4aa891616ec4615284c544a38 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-maddw.ll @@ -0,0 +1,290 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16>, <16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vmaddwev_h_b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwev_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwev.h.b $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32>, <8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vmaddwev_w_h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwev_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwev.w.h $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64>, <4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vmaddwev_d_w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwev_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwev.d.w $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64>, <2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmaddwev_q_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwev_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwev.q.d $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16>, <16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vmaddwev_h_bu(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwev_h_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwev.h.bu $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32>, <8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vmaddwev_w_hu(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwev_w_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwev.w.hu $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64>, <4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vmaddwev_d_wu(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwev_d_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwev.d.wu $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64>, <2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmaddwev_q_du(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwev_q_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwev.q.du $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16>, <16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vmaddwev_h_bu_b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwev_h_bu_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwev.h.bu.b $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32>, <8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vmaddwev_w_hu_h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwev_w_hu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwev.w.hu.h $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64>, <4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vmaddwev_d_wu_w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwev_d_wu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwev.d.wu.w $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64>, <2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmaddwev_q_du_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwev_q_du_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwev.q.du.d $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16>, <16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vmaddwod_h_b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwod_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwod.h.b $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32>, <8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vmaddwod_w_h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwod_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwod.w.h $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64>, <4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vmaddwod_d_w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwod_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwod.d.w $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64>, <2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmaddwod_q_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwod_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwod.q.d $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16>, <16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vmaddwod_h_bu(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwod_h_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwod.h.bu $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32>, <8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vmaddwod_w_hu(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwod_w_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwod.w.hu $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64>, <4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vmaddwod_d_wu(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwod_d_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwod.d.wu $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64>, <2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmaddwod_q_du(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwod_q_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwod.q.du $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16>, <16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vmaddwod_h_bu_b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwod_h_bu_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwod.h.bu.b $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32>, <8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vmaddwod_w_hu_h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwod_w_hu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwod.w.hu.h $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64>, <4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vmaddwod_d_wu_w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwod_d_wu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwod.d.wu.w $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64>, <2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmaddwod_q_du_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwod_q_du_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwod.q.du.d $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..667ba32723fc4f03bf7401b417b4012351576e1a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vmaxi_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmaxi.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> %va, i32 -17) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vmaxi_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmaxi.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> %va, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vmaxi_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmaxi.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> %va, i32 -17) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vmaxi_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmaxi.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> %va, i32 16) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vmaxi_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmaxi.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> %va, i32 -17) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vmaxi_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmaxi.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> %va, i32 16) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vmaxi_d_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmaxi.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> %va, i32 -17) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vmaxi_d_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmaxi.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> %va, i32 16) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vmaxi_bu_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmaxi.bu: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vmaxi_bu_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmaxi.bu: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> %va, i32 32) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vmaxi_hu_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmaxi.hu: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vmaxi_hu_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmaxi.hu: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> %va, i32 32) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vmaxi_wu_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmaxi.wu: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vmaxi_wu_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmaxi.wu: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> %va, i32 32) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vmaxi_du_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmaxi.du: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vmaxi_du_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmaxi.du: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> %va, i32 32) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..34bbe3495670745b7632b7f5ff41427f38355ab0 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vmaxi_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vmaxi_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vmaxi_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vmaxi_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vmaxi_bu(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vmaxi_hu(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vmaxi_wu(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vmaxi_du(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max.ll new file mode 100644 index 0000000000000000000000000000000000000000..4dd289cf6ed7202981893352c7d49d4b2cd775d1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max.ll @@ -0,0 +1,194 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vmax_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vmax_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vmax_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vmax_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmax.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vmax_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vmax_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmax.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmax_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vmax_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmax.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vmaxi_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vmaxi_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaxi.b $vr0, $vr0, -16 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> %va, i32 -16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vmaxi_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vmaxi_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaxi.h $vr0, $vr0, -16 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> %va, i32 -16) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vmaxi_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vmaxi_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaxi.w $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> %va, i32 15) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vmaxi_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vmaxi_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaxi.d $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> %va, i32 15) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vmax_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vmax_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vmax_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vmax_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmax.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vmax_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vmax_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmax.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmax_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vmax_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmax.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vmaxi_bu(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vmaxi_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaxi.bu $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> %va, i32 1) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vmaxi_hu(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vmaxi_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaxi.hu $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> %va, i32 1) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vmaxi_wu(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vmaxi_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaxi.wu $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> %va, i32 31) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vmaxi_du(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vmaxi_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaxi.du $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> %va, i32 31) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..b73bada4f06fb617d2699b819aaec001d5b76704 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vmini_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmini.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> %va, i32 -17) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vmini_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmini.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> %va, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vmini_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmini.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> %va, i32 -17) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vmini_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmini.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> %va, i32 16) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vmini_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmini.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> %va, i32 -17) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vmini_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmini.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> %va, i32 16) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vmini_d_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmini.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> %va, i32 -17) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vmini_d_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmini.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> %va, i32 16) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vmini_bu_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmini.bu: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vmini_bu_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmini.bu: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> %va, i32 32) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vmini_hu_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmini.hu: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vmini_hu_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmini.hu: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> %va, i32 32) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vmini_wu_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmini.wu: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vmini_wu_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmini.wu: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> %va, i32 32) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vmini_du_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmini.du: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vmini_du_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmini.du: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> %va, i32 32) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..5d9b98cec4d0e9a8ed817aec0c19f8d5cf25daf1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vmini_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vmini_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vmini_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vmini_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vmini_bu(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vmini_hu(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vmini_wu(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vmini_du(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min.ll new file mode 100644 index 0000000000000000000000000000000000000000..aa12a5ead6a3f7416819a4cb4b688e1b6b1b7c04 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min.ll @@ -0,0 +1,194 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vmin_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vmin_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vmin_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vmin_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmin.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vmin_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vmin_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmin.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmin_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vmin_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmin.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vmini_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vmini_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmini.b $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> %va, i32 15) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vmini_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vmini_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmini.h $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> %va, i32 15) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vmini_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vmini_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmini.w $vr0, $vr0, -16 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> %va, i32 -16) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vmini_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vmini_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmini.d $vr0, $vr0, -16 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> %va, i32 -16) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vmin_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vmin_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmin.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vmin_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vmin_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmin.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vmin_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vmin_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmin.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmin_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vmin_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmin.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vmini_bu(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vmini_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmini.bu $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> %va, i32 31) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vmini_hu(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vmini_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmini.hu $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> %va, i32 31) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vmini_wu(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vmini_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmini.wu $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> %va, i32 31) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vmini_du(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vmini_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmini.du $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> %va, i32 31) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mod.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mod.ll new file mode 100644 index 0000000000000000000000000000000000000000..6b3dc6865584e5964efb3ca63cbcf29f7601e0ee --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mod.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vmod_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vmod_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmod.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vmod_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vmod_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmod.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vmod_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vmod_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmod.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmod_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vmod_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmod.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vmod_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vmod_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmod.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vmod_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vmod_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmod.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vmod_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vmod_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmod.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmod_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vmod_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmod.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskgez.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskgez.ll new file mode 100644 index 0000000000000000000000000000000000000000..3ecd777aee67858a9e9671f442a64f398e821821 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskgez.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8>) + +define <16 x i8> @lsx_vmskgez_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vmskgez_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmskgez.b $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> %va) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskltz.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskltz.ll new file mode 100644 index 0000000000000000000000000000000000000000..be00c76137c770079f69bb1d9c7695de9fa4c4e2 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskltz.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8>) + +define <16 x i8> @lsx_vmskltz_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vmskltz_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmskltz.b $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> %va) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16>) + +define <8 x i16> @lsx_vmskltz_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vmskltz_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmskltz.h $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> %va) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32>) + +define <4 x i32> @lsx_vmskltz_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vmskltz_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmskltz.w $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> %va) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64>) + +define <2 x i64> @lsx_vmskltz_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vmskltz_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmskltz.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> %va) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msknz.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msknz.ll new file mode 100644 index 0000000000000000000000000000000000000000..02f1752f7190ddfb04a706f99480eba50e70400b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msknz.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8>) + +define <16 x i8> @lsx_vmsknz_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vmsknz_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmsknz.b $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> %va) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msub.ll new file mode 100644 index 0000000000000000000000000000000000000000..98684e10c78e5b2f854348d2ef0c0347e8ac2c6b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msub.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8>, <16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vmsub_b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { +; CHECK-LABEL: lsx_vmsub_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmsub.b $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16>, <8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vmsub_h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { +; CHECK-LABEL: lsx_vmsub_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmsub.h $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32>, <4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vmsub_w(<4 x i32> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { +; CHECK-LABEL: lsx_vmsub_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmsub.w $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> %va, <4 x i32> %vb, <4 x i32> %vc) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64>, <2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmsub_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { +; CHECK-LABEL: lsx_vmsub_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmsub.d $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-muh.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-muh.ll new file mode 100644 index 0000000000000000000000000000000000000000..a4deb8f8f823e555fe7c1a62d896a68af7d6a5ae --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-muh.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vmuh_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vmuh_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmuh.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vmuh_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vmuh_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmuh.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vmuh_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vmuh_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmuh.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmuh_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vmuh_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmuh.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vmuh_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vmuh_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmuh.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vmuh_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vmuh_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmuh.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vmuh_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vmuh_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmuh.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmuh_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vmuh_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmuh.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mul.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mul.ll new file mode 100644 index 0000000000000000000000000000000000000000..aca60d1663b7427de172fe86c3c006aca838b039 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mul.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vmul_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vmul_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmul.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vmul_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vmul_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmul.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vmul_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vmul_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmul.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmul_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vmul_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmul.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mulw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mulw.ll new file mode 100644 index 0000000000000000000000000000000000000000..eb55c1f809e3aa4fdf9b561fa806ccea21d07402 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mulw.ll @@ -0,0 +1,290 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vmulwev_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwev_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwev.h.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vmulwev_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwev_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwev.w.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vmulwev_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwev_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwev.d.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmulwev_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwev_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwev.q.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vmulwev_h_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwev_h_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwev.h.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vmulwev_w_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwev_w_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwev.w.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vmulwev_d_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwev_d_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwev.d.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmulwev_q_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwev_q_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwev.q.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vmulwev_h_bu_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwev_h_bu_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwev.h.bu.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vmulwev_w_hu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwev_w_hu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwev.w.hu.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vmulwev_d_wu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwev_d_wu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwev.d.wu.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmulwev_q_du_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwev_q_du_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwev.q.du.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vmulwod_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwod_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwod.h.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vmulwod_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwod_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwod.w.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vmulwod_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwod_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwod.d.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmulwod_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwod_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwod.q.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vmulwod_h_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwod_h_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwod.h.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vmulwod_w_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwod_w_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwod.w.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vmulwod_d_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwod_d_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwod.d.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmulwod_q_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwod_q_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwod.q.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vmulwod_h_bu_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwod_h_bu_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwod.h.bu.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vmulwod_w_hu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwod_w_hu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwod.w.hu.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vmulwod_d_wu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwod_d_wu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwod.d.wu.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmulwod_q_du_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwod_q_du_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwod.q.du.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-neg.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-neg.ll new file mode 100644 index 0000000000000000000000000000000000000000..43c6e9757614903d1400054e8be858445ff924f3 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-neg.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8>) + +define <16 x i8> @lsx_vneg_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vneg_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vneg.b $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> %va) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16>) + +define <8 x i16> @lsx_vneg_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vneg_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vneg.h $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> %va) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32>) + +define <4 x i32> @lsx_vneg_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vneg_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vneg.w $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> %va) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64>) + +define <2 x i64> @lsx_vneg_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vneg_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vneg.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> %va) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nor.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nor.ll new file mode 100644 index 0000000000000000000000000000000000000000..16619225f2d178c9716313b91f4ecb78c89c1e27 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nor.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vnor_v(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vnor_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vnor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..8c59d8fb9fa5e79783d51485dc7ac17755902c20 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vnori_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vnori.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vnori_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vnori.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> %va, i32 256) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..322a39c106a68623667a8f52e5966a4503990843 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vnori_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori.ll new file mode 100644 index 0000000000000000000000000000000000000000..c2388a1e0da377cdf84c73ee4f251a6d32e3de00 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vnori_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vnori_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vnori.b $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> %va, i32 1) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-or.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-or.ll new file mode 100644 index 0000000000000000000000000000000000000000..ab557003d1504aaf05a745c61b28ba50ed68b366 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-or.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vor_v(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vor_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..4a7fc7e109d96be6e34be486c8098f689704ec44 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vori_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vori.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vori_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vori.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> %va, i32 256) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..5644b8581dce72227f71f7073812619269214b03 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vori_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori.ll new file mode 100644 index 0000000000000000000000000000000000000000..85c0f432c54a2e9eedcb8d0a1111935bc92a7d49 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vori_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vori_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vori.b $vr0, $vr0, 3 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> %va, i32 3) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-orn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-orn.ll new file mode 100644 index 0000000000000000000000000000000000000000..4528628e02c3c881ad334031675ec12394a7f7ce --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-orn.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vorn_v(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vorn_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vorn.v $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pack.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pack.ll new file mode 100644 index 0000000000000000000000000000000000000000..70a3620d1757acd2bc1d6273cddc8e5e2e9cb476 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pack.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vpackev_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vpackev_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpackev.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vpackev_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vpackev_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpackev.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vpackev_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vpackev_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpackev.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vpackev_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vpackev_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpackev.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vpackod_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vpackod_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpackod.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vpackod_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vpackod_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpackod.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vpackod_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vpackod_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpackod.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vpackod_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vpackod_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpackod.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pcnt.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pcnt.ll new file mode 100644 index 0000000000000000000000000000000000000000..431b270ab0a14f57650ac7b8d496688ee40bfd2e --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pcnt.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8>) + +define <16 x i8> @lsx_vpcnt_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vpcnt_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpcnt.b $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> %va) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16>) + +define <8 x i16> @lsx_vpcnt_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vpcnt_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpcnt.h $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> %va) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32>) + +define <4 x i32> @lsx_vpcnt_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vpcnt_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpcnt.w $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> %va) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64>) + +define <2 x i64> @lsx_vpcnt_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vpcnt_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpcnt.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> %va) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..e439bbae6130db7ce962ca5475e57e0ede1f8a63 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vpermi_w_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vpermi.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> %va, <4 x i32> %vb, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vpermi_w_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vpermi.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> %va, <4 x i32> %vb, i32 256) + ret <4 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..bdfc08ed680a9d1de8b2ff4573d039bb2f289eb0 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vpermi_w(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> %va, <4 x i32> %vb, i32 %c) + ret <4 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi.ll new file mode 100644 index 0000000000000000000000000000000000000000..b8367d98caf660c5ede172befaa4353510fcfa06 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vpermi_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vpermi_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpermi.w $vr0, $vr1, 255 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> %va, <4 x i32> %vb, i32 255) + ret <4 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pick.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pick.ll new file mode 100644 index 0000000000000000000000000000000000000000..4ebf29e1409c08f19c7705c9dc4e1ebe8cef8f43 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pick.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vpickev_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vpickev_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpickev.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vpickev_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vpickev_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpickev.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vpickev_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vpickev_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpickev.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vpickev_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vpickev_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpickev.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vpickod_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vpickod_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpickod.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vpickod_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vpickod_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpickod.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vpickod_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vpickod_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpickod.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vpickod_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vpickod_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpickod.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..3430c54d21941b7fe89da90b6c3a8ff57d88aa22 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8>, i32) + +define i32 @lsx_vpickve2gr_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vpickve2gr.b: argument out of range +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> %va, i32 -1) + ret i32 %res +} + +define i32 @lsx_vpickve2gr_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vpickve2gr.b: argument out of range +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> %va, i32 16) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16>, i32) + +define i32 @lsx_vpickve2gr_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vpickve2gr.h: argument out of range +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> %va, i32 -1) + ret i32 %res +} + +define i32 @lsx_vpickve2gr_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vpickve2gr.h: argument out of range +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> %va, i32 8) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32>, i32) + +define i32 @lsx_vpickve2gr_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vpickve2gr.w: argument out of range +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> %va, i32 -1) + ret i32 %res +} + +define i32 @lsx_vpickve2gr_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vpickve2gr.w: argument out of range +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> %va, i32 4) + ret i32 %res +} + +declare i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64>, i32) + +define i64 @lsx_vpickve2gr_d_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vpickve2gr.d: argument out of range +entry: + %res = call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> %va, i32 -1) + ret i64 %res +} + +define i64 @lsx_vpickve2gr_d_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vpickve2gr.d: argument out of range +entry: + %res = call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> %va, i32 2) + ret i64 %res +} + +declare i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8>, i32) + +define i32 @lsx_vpickve2gr_bu_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vpickve2gr.bu: argument out of range +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> %va, i32 -1) + ret i32 %res +} + +define i32 @lsx_vpickve2gr_bu_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vpickve2gr.bu: argument out of range +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> %va, i32 16) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16>, i32) + +define i32 @lsx_vpickve2gr_hu_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vpickve2gr.hu: argument out of range +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> %va, i32 -1) + ret i32 %res +} + +define i32 @lsx_vpickve2gr_hu_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vpickve2gr.hu: argument out of range +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> %va, i32 8) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32>, i32) + +define i32 @lsx_vpickve2gr_wu_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vpickve2gr.wu: argument out of range +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> %va, i32 -1) + ret i32 %res +} + +define i32 @lsx_vpickve2gr_wu_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vpickve2gr.wu: argument out of range +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> %va, i32 4) + ret i32 %res +} + +declare i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64>, i32) + +define i64 @lsx_vpickve2gr_du_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vpickve2gr.du: argument out of range +entry: + %res = call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> %va, i32 -1) + ret i64 %res +} + +define i64 @lsx_vpickve2gr_du_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vpickve2gr.du: argument out of range +entry: + %res = call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> %va, i32 2) + ret i64 %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..6dd3c1f27a81efa7933036d8a743c57b44f32e96 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8>, i32) + +define i32 @lsx_vpickve2gr_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> %va, i32 %b) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16>, i32) + +define i32 @lsx_vpickve2gr_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> %va, i32 %b) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32>, i32) + +define i32 @lsx_vpickve2gr_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> %va, i32 %b) + ret i32 %res +} + +declare i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64>, i32) + +define i64 @lsx_vpickve2gr_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> %va, i32 %b) + ret i64 %res +} + +declare i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8>, i32) + +define i32 @lsx_vpickve2gr_bu(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> %va, i32 %b) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16>, i32) + +define i32 @lsx_vpickve2gr_hu(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> %va, i32 %b) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32>, i32) + +define i32 @lsx_vpickve2gr_wu(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> %va, i32 %b) + ret i32 %res +} + +declare i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64>, i32) + +define i64 @lsx_vpickve2gr_du(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> %va, i32 %b) + ret i64 %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr.ll new file mode 100644 index 0000000000000000000000000000000000000000..ed56d30ce3c46ab9dde842d89b4ae3761f0fa87a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8>, i32) + +define i32 @lsx_vpickve2gr_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vpickve2gr_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> %va, i32 15) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16>, i32) + +define i32 @lsx_vpickve2gr_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vpickve2gr_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> %va, i32 7) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32>, i32) + +define i32 @lsx_vpickve2gr_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vpickve2gr_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 3 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> %va, i32 3) + ret i32 %res +} + +declare i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64>, i32) + +define i64 @lsx_vpickve2gr_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vpickve2gr_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> %va, i32 1) + ret i64 %res +} + +declare i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8>, i32) + +define i32 @lsx_vpickve2gr_bu(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vpickve2gr_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpickve2gr.bu $a0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> %va, i32 15) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16>, i32) + +define i32 @lsx_vpickve2gr_hu(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vpickve2gr_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 7 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> %va, i32 7) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32>, i32) + +define i32 @lsx_vpickve2gr_wu(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vpickve2gr_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpickve2gr.wu $a0, $vr0, 3 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> %va, i32 3) + ret i32 %res +} + +declare i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64>, i32) + +define i64 @lsx_vpickve2gr_du(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vpickve2gr_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpickve2gr.du $a0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> %va, i32 1) + ret i64 %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replgr2vr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replgr2vr.ll new file mode 100644 index 0000000000000000000000000000000000000000..091f1c98c2289a1f2492f70c629ecc124feb98f9 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replgr2vr.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32) + +define <16 x i8> @lsx_vreplgr2vr_b(i32 %a) nounwind { +; CHECK-LABEL: lsx_vreplgr2vr_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vreplgr2vr.b $vr0, $a0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 %a) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32) + +define <8 x i16> @lsx_vreplgr2vr_h(i32 %a) nounwind { +; CHECK-LABEL: lsx_vreplgr2vr_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vreplgr2vr.h $vr0, $a0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 %a) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32) + +define <4 x i32> @lsx_vreplgr2vr_w(i32 %a) nounwind { +; CHECK-LABEL: lsx_vreplgr2vr_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vreplgr2vr.w $vr0, $a0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 %a) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64) + +define <2 x i64> @lsx_vreplgr2vr_d(i64 %a) nounwind { +; CHECK-LABEL: lsx_vreplgr2vr_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vreplgr2vr.d $vr0, $a0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 %a) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replve.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replve.ll new file mode 100644 index 0000000000000000000000000000000000000000..3ba184dad052b925a6becbe215c2b02fa37103c7 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replve.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vreplve_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK-LABEL: lsx_vreplve_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vreplve.b $vr0, $vr0, $a0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vreplve_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK-LABEL: lsx_vreplve_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vreplve.h $vr0, $vr0, $a0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vreplve_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK-LABEL: lsx_vreplve_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vreplve.w $vr0, $vr0, $a0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vreplve_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK-LABEL: lsx_vreplve_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vreplve.d $vr0, $vr0, $a0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..d625441122a6679487d26bbd77ea45fe8c532cdd --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vreplvei_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vreplvei.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vreplvei_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vreplvei.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> %va, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vreplvei_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vreplvei.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vreplvei_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vreplvei.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> %va, i32 8) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vreplvei_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vreplvei.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vreplvei_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vreplvei.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> %va, i32 4) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vreplvei_d_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vreplvei.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vreplvei_d_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vreplvei.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> %va, i32 2) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..3d271bb2b3073d8cff7467131f6a655b4362860c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vreplvei_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vreplvei_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vreplvei_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vreplvei_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei.ll new file mode 100644 index 0000000000000000000000000000000000000000..9b8af1878cb83d52308bc77865bcea4fcc4c1167 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vreplvei_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vreplvei_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vreplvei.b $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> %va, i32 15) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vreplvei_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vreplvei_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vreplvei.h $vr0, $vr0, 7 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> %va, i32 7) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vreplvei_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vreplvei_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> %va, i32 3) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vreplvei_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vreplvei_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vreplvei.d $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> %va, i32 1) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..3c53b36672ad32e36ba0b47e3d9edf91c8283c65 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vrotri_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vrotri.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vrotri_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vrotri.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> %va, i32 8) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vrotri_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vrotri.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vrotri_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vrotri.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> %va, i32 16) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vrotri_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vrotri.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vrotri_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vrotri.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> %va, i32 32) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vrotri_d_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vrotri.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vrotri_d_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vrotri.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> %va, i32 64) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..fd8ba3a1c633f5b0b2cdb4cc07145d1fb74b5f42 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vrotri_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vrotri_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vrotri_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vrotri_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr.ll new file mode 100644 index 0000000000000000000000000000000000000000..df8650677147b6aac7095bcd7cacdea0abe0ad77 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vrotr_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vrotr_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrotr.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vrotr_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vrotr_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrotr.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vrotr_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vrotr_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrotr.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vrotr_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vrotr_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrotr.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vrotri_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vrotri_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrotri.b $vr0, $vr0, 7 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> %va, i32 7) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vrotri_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vrotri_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrotri.h $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> %va, i32 15) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vrotri_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vrotri_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrotri.w $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> %va, i32 31) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vrotri_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vrotri_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrotri.d $vr0, $vr0, 63 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> %va, i32 63) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sadd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sadd.ll new file mode 100644 index 0000000000000000000000000000000000000000..a54f955766dfe6441b46b86f24bc4bbfcb69301a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sadd.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vsadd_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsadd_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsadd.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vsadd_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsadd_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsadd.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vsadd_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsadd_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsadd.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vsadd_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsadd_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vsadd_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsadd_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsadd.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vsadd_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsadd_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsadd.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vsadd_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsadd_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsadd.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vsadd_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsadd_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsadd.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..45fa4e43be198d38c6a5f471bb99df14061498f1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vsat_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsat.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vsat_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsat.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> %va, i32 8) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vsat_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsat.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vsat_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsat.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> %va, i32 16) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vsat_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsat.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vsat_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsat.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> %va, i32 32) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vsat_d_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsat.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vsat_d_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsat.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> %va, i32 64) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vsat_bu_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsat.bu: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vsat_bu_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsat.bu: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> %va, i32 8) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vsat_hu_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsat.hu: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vsat_hu_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsat.hu: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> %va, i32 16) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vsat_wu_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsat.wu: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vsat_wu_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsat.wu: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> %va, i32 32) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vsat_du_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsat.du: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vsat_du_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsat.du: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> %va, i32 64) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..afdbe0c1ce0b991df290a8f59d445c627dfd1f63 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vsat_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vsat_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vsat_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vsat_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vsat_bu(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vsat_hu(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vsat_wu(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vsat_du(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat.ll new file mode 100644 index 0000000000000000000000000000000000000000..4286842a63b98e2a9cb879fe07a434c28b67140d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vsat_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vsat_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsat.b $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> %va, i32 1) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vsat_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vsat_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsat.h $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> %va, i32 1) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vsat_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vsat_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsat.w $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> %va, i32 1) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vsat_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vsat_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsat.d $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> %va, i32 1) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vsat_bu(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vsat_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsat.bu $vr0, $vr0, 7 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> %va, i32 7) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vsat_hu(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vsat_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsat.hu $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> %va, i32 15) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vsat_wu(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vsat_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsat.wu $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> %va, i32 31) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vsat_du(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vsat_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsat.du $vr0, $vr0, 63 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> %va, i32 63) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..220398ff28cda67db836c34ed350270a68ccd27c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vseqi_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vseqi.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> %va, i32 -17) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vseqi_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vseqi.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> %va, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vseqi_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vseqi.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> %va, i32 -17) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vseqi_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vseqi.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> %va, i32 16) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vseqi_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vseqi.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> %va, i32 -17) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vseqi_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vseqi.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> %va, i32 16) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vseqi_d_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vseqi.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> %va, i32 -17) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vseqi_d_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vseqi.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> %va, i32 16) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..5fa1dd30475ce3679646025f74c86b00f64765e3 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vseqi_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vseqi_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vseqi_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vseqi_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq.ll new file mode 100644 index 0000000000000000000000000000000000000000..3cb4acd824393be9e7e8c919ade83e47e043c1d1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vseq_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vseq_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vseq.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vseq_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vseq_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vseq.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vseq_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vseq_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vseq.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vseq_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vseq_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vseq.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vseqi_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vseqi_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vseqi.b $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> %va, i32 15) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vseqi_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vseqi_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vseqi.h $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> %va, i32 15) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vseqi_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vseqi_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vseqi.w $vr0, $vr0, -16 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> %va, i32 -16) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vseqi_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vseqi_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vseqi.d $vr0, $vr0, -16 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> %va, i32 -16) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-set.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-set.ll new file mode 100644 index 0000000000000000000000000000000000000000..3188fb4e2c2ef7e3522ffcc1ac6b6a9ec0b539bc --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-set.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare i32 @llvm.loongarch.lsx.bz.v(<16 x i8>) + +define i32 @lsx_bz_v(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_bz_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vseteqz.v $fcc0, $vr0 +; CHECK-NEXT: bcnez $fcc0, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> %va) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.bnz.v(<16 x i8>) + +define i32 @lsx_bnz_v(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_bnz_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetnez.v $fcc0, $vr0 +; CHECK-NEXT: bcnez $fcc0, .LBB1_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> %va) + ret i32 %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setallnez.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setallnez.ll new file mode 100644 index 0000000000000000000000000000000000000000..22e01922e87bb3b6b22af6a7601719e14e1d7bd2 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setallnez.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare i32 @llvm.loongarch.lsx.bnz.b(<16 x i8>) + +define i32 @lsx_bnz_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_bnz_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetallnez.b $fcc0, $vr0 +; CHECK-NEXT: bcnez $fcc0, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> %va) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.bnz.h(<8 x i16>) + +define i32 @lsx_bnz_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_bnz_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetallnez.h $fcc0, $vr0 +; CHECK-NEXT: bcnez $fcc0, .LBB1_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> %va) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.bnz.w(<4 x i32>) + +define i32 @lsx_bnz_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_bnz_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetallnez.w $fcc0, $vr0 +; CHECK-NEXT: bcnez $fcc0, .LBB2_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB2_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> %va) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.bnz.d(<2 x i64>) + +define i32 @lsx_bnz_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_bnz_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetallnez.d $fcc0, $vr0 +; CHECK-NEXT: bcnez $fcc0, .LBB3_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB3_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> %va) + ret i32 %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setanyeqz.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setanyeqz.ll new file mode 100644 index 0000000000000000000000000000000000000000..96c79c10e46889af9b13c36e01529c63b4966e77 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setanyeqz.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare i32 @llvm.loongarch.lsx.bz.b(<16 x i8>) + +define i32 @lsx_bz_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_bz_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetanyeqz.b $fcc0, $vr0 +; CHECK-NEXT: bcnez $fcc0, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> %va) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.bz.h(<8 x i16>) + +define i32 @lsx_bz_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_bz_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetanyeqz.h $fcc0, $vr0 +; CHECK-NEXT: bcnez $fcc0, .LBB1_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> %va) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.bz.w(<4 x i32>) + +define i32 @lsx_bz_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_bz_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetanyeqz.w $fcc0, $vr0 +; CHECK-NEXT: bcnez $fcc0, .LBB2_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB2_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> %va) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.bz.d(<2 x i64>) + +define i32 @lsx_bz_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_bz_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetanyeqz.d $fcc0, $vr0 +; CHECK-NEXT: bcnez $fcc0, .LBB3_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB3_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> %va) + ret i32 %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf.ll new file mode 100644 index 0000000000000000000000000000000000000000..f5d516521e45f106d90900cbbedfd53823d9c2f7 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8>, <16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vshuf_b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { +; CHECK-LABEL: lsx_vshuf_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16>, <8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vshuf_h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { +; CHECK-LABEL: lsx_vshuf_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vshuf.h $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32>, <4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vshuf_w(<4 x i32> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { +; CHECK-LABEL: lsx_vshuf_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vshuf.w $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> %va, <4 x i32> %vb, <4 x i32> %vc) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64>, <2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vshuf_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { +; CHECK-LABEL: lsx_vshuf_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vshuf.d $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..4d6fadf08c26b17b1e9f6a0c69b4d0530accfccc --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vshuf4i_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vshuf4i.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vshuf4i_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vshuf4i.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> %va, i32 256) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vshuf4i_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vshuf4i.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vshuf4i_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vshuf4i.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> %va, i32 256) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vshuf4i_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vshuf4i.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vshuf4i_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vshuf4i.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> %va, i32 256) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vshuf4i_d_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vshuf4i.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> %va, <2 x i64> %vb, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vshuf4i_d_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vshuf4i.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> %va, <2 x i64> %vb, i32 256) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..a7d138bcc00bbfeb9a3bb320f4733ed5341ce618 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vshuf4i_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vshuf4i_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vshuf4i_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vshuf4i_d(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> %va, <2 x i64> %vb, i32 %c) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i.ll new file mode 100644 index 0000000000000000000000000000000000000000..1ad5f2af5591e5a050d28a420542f81e69d9190d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vshuf4i_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vshuf4i_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 255 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> %va, i32 255) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vshuf4i_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vshuf4i_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vshuf4i.h $vr0, $vr0, 255 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> %va, i32 255) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vshuf4i_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vshuf4i_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 255 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> %va, i32 255) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vshuf4i_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vshuf4i_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 255 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> %va, <2 x i64> %vb, i32 255) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-signcov.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-signcov.ll new file mode 100644 index 0000000000000000000000000000000000000000..3997b0cc995c50cae2a0c248235a01b3e54dbb56 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-signcov.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vsigncov_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsigncov_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsigncov.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vsigncov_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsigncov_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsigncov.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vsigncov_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsigncov_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsigncov.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vsigncov_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsigncov_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsigncov.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..4c945e296711f7e6f46da803794a9452f6f343a1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vslei_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslei.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> %va, i32 -17) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vslei_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslei.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> %va, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vslei_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslei.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> %va, i32 -17) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vslei_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslei.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> %va, i32 16) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vslei_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslei.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> %va, i32 -17) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vslei_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslei.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> %va, i32 16) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vslei_d_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslei.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> %va, i32 -17) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vslei_d_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslei.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> %va, i32 16) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vslei_bu_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslei.bu: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vslei_bu_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslei.bu: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> %va, i32 32) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vslei_hu_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslei.hu: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vslei_hu_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslei.hu: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> %va, i32 32) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vslei_wu_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslei.wu: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vslei_wu_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslei.wu: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> %va, i32 32) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vslei_du_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslei.du: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vslei_du_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslei.du: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> %va, i32 32) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..0fc137bf05498421a4df7fa30001a25c3159a91f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vslei_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vslei_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vslei_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vslei_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vslei_bu(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vslei_hu(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vslei_wu(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vslei_du(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle.ll new file mode 100644 index 0000000000000000000000000000000000000000..5a9d5f06e63f89d5710e14509531bcafa857c1db --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle.ll @@ -0,0 +1,194 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vsle_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsle_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsle.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vsle_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsle_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsle.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vsle_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsle_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsle.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vsle_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsle_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsle.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vslei_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vslei_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslei.b $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> %va, i32 15) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vslei_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vslei_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslei.h $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> %va, i32 15) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vslei_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vslei_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslei.w $vr0, $vr0, -16 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> %va, i32 -16) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vslei_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vslei_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslei.d $vr0, $vr0, -16 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> %va, i32 -16) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vsle_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsle_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsle.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vsle_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsle_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsle.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vsle_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsle_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsle.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vsle_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsle_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsle.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vslei_bu(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vslei_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslei.bu $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> %va, i32 1) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vslei_hu(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vslei_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslei.hu $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> %va, i32 1) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vslei_wu(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vslei_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslei.wu $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> %va, i32 31) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vslei_du(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vslei_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslei.du $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> %va, i32 31) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..75406f94887ca9db353dce9acd37616d058d0612 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vslli_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslli.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vslli_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslli.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> %va, i32 8) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vslli_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslli.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vslli_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslli.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> %va, i32 16) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vslli_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslli.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vslli_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslli.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> %va, i32 32) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vslli_d_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslli.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vslli_d_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslli.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> %va, i32 64) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..7474b5e2973498ed36e591549fff3e783f718c49 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vslli_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vslli_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vslli_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vslli_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll.ll new file mode 100644 index 0000000000000000000000000000000000000000..7bc20af41f17a865962af4cec51e08235afca161 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vsll_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsll_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsll.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vsll_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsll_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsll.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vsll_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsll_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsll.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vsll_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsll_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsll.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vslli_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vslli_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslli.b $vr0, $vr0, 7 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> %va, i32 7) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vslli_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vslli_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslli.h $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> %va, i32 15) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vslli_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vslli_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslli.w $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> %va, i32 31) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vslli_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vslli_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslli.d $vr0, $vr0, 63 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> %va, i32 63) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..bda3523a0b5c045ffc93c316465a7b1c94ea9868 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-invalid-imm.ll @@ -0,0 +1,97 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8>, i32) + +define <8 x i16> @lsx_vsllwil_h_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsllwil.h.b: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vsllwil_h_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsllwil.h.b: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> %va, i32 8) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16>, i32) + +define <4 x i32> @lsx_vsllwil_w_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsllwil.w.h: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vsllwil_w_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsllwil.w.h: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> %va, i32 16) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32>, i32) + +define <2 x i64> @lsx_vsllwil_d_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsllwil.d.w: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vsllwil_d_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsllwil.d.w: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> %va, i32 32) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8>, i32) + +define <8 x i16> @lsx_vsllwil_hu_bu_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsllwil.hu.bu: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vsllwil_hu_bu_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsllwil.hu.bu: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> %va, i32 8) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16>, i32) + +define <4 x i32> @lsx_vsllwil_wu_hu_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsllwil.wu.hu: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vsllwil_wu_hu_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsllwil.wu.hu: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> %va, i32 16) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32>, i32) + +define <2 x i64> @lsx_vsllwil_du_wu_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsllwil.du.wu: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vsllwil_du_wu_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsllwil.du.wu: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> %va, i32 32) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..a03656d5ca07a03f2188578953c65067781f3540 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-non-imm.ll @@ -0,0 +1,55 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8>, i32) + +define <8 x i16> @lsx_vsllwil_h_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16>, i32) + +define <4 x i32> @lsx_vsllwil_w_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32>, i32) + +define <2 x i64> @lsx_vsllwil_d_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> %va, i32 %b) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8>, i32) + +define <8 x i16> @lsx_vsllwil_hu_bu(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16>, i32) + +define <4 x i32> @lsx_vsllwil_wu_hu(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32>, i32) + +define <2 x i64> @lsx_vsllwil_du_wu(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil.ll new file mode 100644 index 0000000000000000000000000000000000000000..29ab70da1ceda32e8d45e0bc529062708cdc943f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8>, i32) + +define <8 x i16> @lsx_vsllwil_h_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vsllwil_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsllwil.h.b $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> %va, i32 1) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16>, i32) + +define <4 x i32> @lsx_vsllwil_w_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vsllwil_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> %va, i32 1) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32>, i32) + +define <2 x i64> @lsx_vsllwil_d_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vsllwil_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsllwil.d.w $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> %va, i32 1) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8>, i32) + +define <8 x i16> @lsx_vsllwil_hu_bu(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vsllwil_hu_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsllwil.hu.bu $vr0, $vr0, 7 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> %va, i32 7) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16>, i32) + +define <4 x i32> @lsx_vsllwil_wu_hu(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vsllwil_wu_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsllwil.wu.hu $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> %va, i32 15) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32>, i32) + +define <2 x i64> @lsx_vsllwil_du_wu(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vsllwil_du_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsllwil.du.wu $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> %va, i32 31) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..f6d014b19d6c7d7315e393c53efeb8b06db88288 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vslti_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslti.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> %va, i32 -17) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vslti_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslti.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> %va, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vslti_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslti.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> %va, i32 -17) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vslti_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslti.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> %va, i32 16) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vslti_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslti.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> %va, i32 -17) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vslti_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslti.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> %va, i32 16) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vslti_d_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslti.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> %va, i32 -17) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vslti_d_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslti.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> %va, i32 16) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vslti_bu_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslti.bu: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vslti_bu_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslti.bu: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> %va, i32 32) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vslti_hu_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslti.hu: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vslti_hu_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslti.hu: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> %va, i32 32) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vslti_wu_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslti.wu: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vslti_wu_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslti.wu: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> %va, i32 32) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vslti_du_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslti.du: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vslti_du_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslti.du: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> %va, i32 32) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..9a8b757dab4e4a36baeefb08af9ccb6b5e3b39e8 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vslti_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vslti_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vslti_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vslti_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vslti_bu(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vslti_hu(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vslti_wu(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vslti_du(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt.ll new file mode 100644 index 0000000000000000000000000000000000000000..18683e9dc46f6354166d82a05219be43e867f7ad --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt.ll @@ -0,0 +1,194 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vslt_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vslt_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslt.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vslt_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vslt_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslt.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vslt_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vslt_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslt.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vslt_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vslt_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslt.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vslti_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vslti_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslti.b $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> %va, i32 15) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vslti_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vslti_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslti.h $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> %va, i32 15) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vslti_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vslti_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslti.w $vr0, $vr0, -16 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> %va, i32 -16) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vslti_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vslti_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslti.d $vr0, $vr0, -16 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> %va, i32 -16) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vslt_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vslt_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslt.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vslt_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vslt_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslt.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vslt_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vslt_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslt.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vslt_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vslt_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslt.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vslti_bu(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vslti_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslti.bu $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> %va, i32 1) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vslti_hu(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vslti_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslti.hu $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> %va, i32 1) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vslti_wu(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vslti_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslti.wu $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> %va, i32 31) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vslti_du(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vslti_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslti.du $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> %va, i32 31) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..2a033a21b5651eaeb7bb7c05e67316d3ece7f5e1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vsrai_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrai.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vsrai_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrai.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> %va, i32 8) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vsrai_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrai.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vsrai_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrai.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> %va, i32 16) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vsrai_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrai.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vsrai_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrai.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> %va, i32 32) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vsrai_d_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrai.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vsrai_d_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrai.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> %va, i32 64) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..c3b328145864fbfc82d1796e86c8411cd820a870 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vsrai_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vsrai_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vsrai_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vsrai_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra.ll new file mode 100644 index 0000000000000000000000000000000000000000..e85c8464c18e17237635cbc38cc8babd521a5183 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vsra_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsra_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsra.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vsra_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsra_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsra.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vsra_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsra_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsra.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vsra_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsra_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsra.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vsrai_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vsrai_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrai.b $vr0, $vr0, 7 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> %va, i32 7) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vsrai_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vsrai_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrai.h $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> %va, i32 15) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vsrai_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vsrai_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrai.w $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> %va, i32 31) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vsrai_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vsrai_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrai.d $vr0, $vr0, 63 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> %va, i32 63) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sran.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sran.ll new file mode 100644 index 0000000000000000000000000000000000000000..4ffe5a704c2c882082fecbbf5d2f2edf82403ef2 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sran.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16>, <8 x i16>) + +define <16 x i8> @lsx_vsran_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsran_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsran.b.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> %va, <8 x i16> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32>, <4 x i32>) + +define <8 x i16> @lsx_vsran_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsran_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsran.h.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> %va, <4 x i32> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64>, <2 x i64>) + +define <4 x i32> @lsx_vsran_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsran_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsran.w.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> %va, <2 x i64> %vb) + ret <4 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..d68064e9b9024c08a9366652f8ac34e5e2ca16a2 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vsrani_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrani.b.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vsrani_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrani.b.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vsrani_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrani.h.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vsrani_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrani.h.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vsrani_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrani.w.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vsrani_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrani.w.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vsrani_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrani.d.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vsrani_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrani.d.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..38cfde214dc1c2e21286c215288812a2fc69961f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vsrani_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vsrani_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vsrani_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vsrani_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani.ll new file mode 100644 index 0000000000000000000000000000000000000000..717c641616c8d5235c6f57a664db9ef2fbfac77c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vsrani_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsrani_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrani.b.h $vr0, $vr1, 15 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 15) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vsrani_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsrani_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrani.h.w $vr0, $vr1, 31 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 31) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vsrani_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsrani_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrani.w.d $vr0, $vr1, 63 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 63) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vsrani_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsrani_d_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrani.d.q $vr0, $vr1, 127 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 127) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..b6c2d70cebbc02caced08a7d6b53e975fe1f8e60 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vsrari_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrari.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vsrari_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrari.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> %va, i32 8) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vsrari_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrari.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vsrari_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrari.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> %va, i32 16) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vsrari_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrari.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vsrari_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrari.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> %va, i32 32) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vsrari_d_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrari.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vsrari_d_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrari.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> %va, i32 64) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..2ad8adcd823b6fb03a77a84982f6b1b44d3b6afd --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vsrari_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vsrari_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vsrari_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vsrari_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar.ll new file mode 100644 index 0000000000000000000000000000000000000000..8b52b7ac9631f4196e65b450c1cf0418cc6b9ab8 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vsrar_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsrar_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrar.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vsrar_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsrar_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrar.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vsrar_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsrar_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrar.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vsrar_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsrar_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrar.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vsrari_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vsrari_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrari.b $vr0, $vr0, 7 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> %va, i32 7) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vsrari_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vsrari_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrari.h $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> %va, i32 15) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vsrari_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vsrari_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrari.w $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> %va, i32 31) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vsrari_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vsrari_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrari.d $vr0, $vr0, 63 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> %va, i32 63) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarn.ll new file mode 100644 index 0000000000000000000000000000000000000000..d4cdfb5359eaaee9b534de56e5b6e6cb59f3001a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarn.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16>, <8 x i16>) + +define <16 x i8> @lsx_vsrarn_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsrarn_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrarn.b.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> %va, <8 x i16> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32>, <4 x i32>) + +define <8 x i16> @lsx_vsrarn_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsrarn_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrarn.h.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> %va, <4 x i32> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64>, <2 x i64>) + +define <4 x i32> @lsx_vsrarn_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsrarn_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrarn.w.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> %va, <2 x i64> %vb) + ret <4 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..d24cf92a039287c78534aad168f2038b78b0baa0 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vsrarni_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrarni.b.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vsrarni_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrarni.b.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vsrarni_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrarni.h.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vsrarni_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrarni.h.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vsrarni_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrarni.w.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vsrarni_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrarni.w.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vsrarni_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrarni.d.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vsrarni_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrarni.d.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..19de7445cba1cb66ab4c944e7ef4752cab34ee5d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vsrarni_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vsrarni_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vsrarni_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vsrarni_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni.ll new file mode 100644 index 0000000000000000000000000000000000000000..2253e88372fcbdd38c3a59b520ad8500fff08e73 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vsrarni_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsrarni_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrarni.b.h $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 1) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vsrarni_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsrarni_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrarni.h.w $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 1) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vsrarni_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsrarni_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrarni.w.d $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 1) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vsrarni_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsrarni_d_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrarni.d.q $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 1) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..3beff790afab6c15cf29318e553578527202fec1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vsrli_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrli.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vsrli_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrli.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> %va, i32 8) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vsrli_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrli.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vsrli_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrli.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> %va, i32 16) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vsrli_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrli.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vsrli_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrli.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> %va, i32 32) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vsrli_d_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrli.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vsrli_d_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrli.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> %va, i32 64) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..98652aca0d628013eba83149a992fdcf8cf566c7 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vsrli_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vsrli_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vsrli_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vsrli_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl.ll new file mode 100644 index 0000000000000000000000000000000000000000..1cddd9622233a116a952cd63861035df37f35585 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vsrl_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsrl_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrl.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vsrl_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsrl_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrl.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vsrl_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsrl_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrl.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vsrl_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsrl_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrl.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vsrli_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vsrli_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrli.b $vr0, $vr0, 7 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> %va, i32 7) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vsrli_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vsrli_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrli.h $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> %va, i32 15) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vsrli_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vsrli_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrli.w $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> %va, i32 31) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vsrli_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vsrli_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrli.d $vr0, $vr0, 63 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> %va, i32 63) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srln.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srln.ll new file mode 100644 index 0000000000000000000000000000000000000000..1c9b23243ffbdba0e762967669d2e39d74c76f12 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srln.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16>, <8 x i16>) + +define <16 x i8> @lsx_vsrln_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsrln_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrln.b.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> %va, <8 x i16> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32>, <4 x i32>) + +define <8 x i16> @lsx_vsrln_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsrln_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrln.h.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> %va, <4 x i32> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64>, <2 x i64>) + +define <4 x i32> @lsx_vsrln_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsrln_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrln.w.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> %va, <2 x i64> %vb) + ret <4 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..054c4f393548f9fb4a4a646acef6d3f6f53f674d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vsrlni_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlni.b.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vsrlni_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlni.b.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vsrlni_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlni.h.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vsrlni_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlni.h.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vsrlni_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlni.w.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vsrlni_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlni.w.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vsrlni_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlni.d.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vsrlni_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlni.d.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..76341df197fdf57ade83515b39d02032b6291885 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vsrlni_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vsrlni_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vsrlni_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vsrlni_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni.ll new file mode 100644 index 0000000000000000000000000000000000000000..6e523efa182405c4980e1ac7effe6c37e18551c0 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vsrlni_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsrlni_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlni.b.h $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 1) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vsrlni_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsrlni_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlni.h.w $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 1) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vsrlni_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsrlni_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlni.w.d $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 1) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vsrlni_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsrlni_d_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlni.d.q $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 1) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..bcbd38e26e5f550de3c68db7fee327f9003f9945 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vsrlri_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlri.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vsrlri_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlri.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> %va, i32 8) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vsrlri_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlri.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vsrlri_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlri.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> %va, i32 16) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vsrlri_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlri.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vsrlri_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlri.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> %va, i32 32) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vsrlri_d_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlri.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vsrlri_d_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlri.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> %va, i32 64) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..4862b1546ccf516f22c7203d5e6d2c1e5f31929e --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vsrlri_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vsrlri_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vsrlri_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vsrlri_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr.ll new file mode 100644 index 0000000000000000000000000000000000000000..51638fa1a47f41928cdcfcd682e803ccf8065413 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vsrlr_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsrlr_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlr.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vsrlr_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsrlr_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlr.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vsrlr_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsrlr_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlr.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vsrlr_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsrlr_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlr.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vsrlri_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vsrlri_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlri.b $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> %va, i32 1) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vsrlri_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vsrlri_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlri.h $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> %va, i32 1) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vsrlri_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vsrlri_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlri.w $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> %va, i32 1) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vsrlri_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vsrlri_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlri.d $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> %va, i32 1) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrn.ll new file mode 100644 index 0000000000000000000000000000000000000000..893e5139624112217ca69ea4f13be220b5f80b98 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrn.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16>, <8 x i16>) + +define <16 x i8> @lsx_vsrlrn_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsrlrn_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlrn.b.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> %va, <8 x i16> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32>, <4 x i32>) + +define <8 x i16> @lsx_vsrlrn_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsrlrn_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlrn.h.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> %va, <4 x i32> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64>, <2 x i64>) + +define <4 x i32> @lsx_vsrlrn_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsrlrn_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlrn.w.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> %va, <2 x i64> %vb) + ret <4 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..8988ae88f9ebfafedf42e24d0a09f8592d82526e --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vsrlrni_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlrni.b.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vsrlrni_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlrni.b.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vsrlrni_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlrni.h.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vsrlrni_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlrni.h.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vsrlrni_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlrni.w.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vsrlrni_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlrni.w.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vsrlrni_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlrni.d.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vsrlrni_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlrni.d.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..e5530db56fed9f036567f73d7a1057f21942a1f1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vsrlrni_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vsrlrni_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vsrlrni_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vsrlrni_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni.ll new file mode 100644 index 0000000000000000000000000000000000000000..d1ea450d2237dec2fcd5afc0afb2320b2d4b4ae6 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vsrlrni_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsrlrni_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlrni.b.h $vr0, $vr1, 15 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 15) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vsrlrni_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsrlrni_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlrni.h.w $vr0, $vr1, 31 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 31) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vsrlrni_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsrlrni_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlrni.w.d $vr0, $vr1, 63 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 63) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vsrlrni_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsrlrni_d_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlrni.d.q $vr0, $vr1, 127 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 127) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssran.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssran.ll new file mode 100644 index 0000000000000000000000000000000000000000..cecccbb730c950d35b2d769fe194e926bb7a3501 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssran.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16>, <8 x i16>) + +define <16 x i8> @lsx_vssran_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vssran_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssran.b.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> %va, <8 x i16> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32>, <4 x i32>) + +define <8 x i16> @lsx_vssran_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vssran_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssran.h.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> %va, <4 x i32> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64>, <2 x i64>) + +define <4 x i32> @lsx_vssran_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vssran_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssran.w.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> %va, <2 x i64> %vb) + ret <4 x i32> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16>, <8 x i16>) + +define <16 x i8> @lsx_vssran_bu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vssran_bu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssran.bu.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> %va, <8 x i16> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32>, <4 x i32>) + +define <8 x i16> @lsx_vssran_hu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vssran_hu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssran.hu.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> %va, <4 x i32> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64>, <2 x i64>) + +define <4 x i32> @lsx_vssran_wu_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vssran_wu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssran.wu.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> %va, <2 x i64> %vb) + ret <4 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..f7817921ebebcbe513299b0b3989bcc184c72a24 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrani_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrani.b.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vssrani_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrani.b.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrani_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrani.h.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vssrani_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrani.h.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrani_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrani.w.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vssrani_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrani.w.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrani_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrani.d.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vssrani_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrani.d.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrani_bu_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrani.bu.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vssrani_bu_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrani.bu.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrani_hu_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrani.hu.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vssrani_hu_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrani.hu.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 32) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrani_wu_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrani.wu.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vssrani_wu_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrani.wu.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 64) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrani_du_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrani.du.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vssrani_du_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrani.du.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> %va, <2 x i64> %vb, i32 128) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..a80ede9c5243eebee161593644bf4e2bc12bd106 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrani_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrani_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrani_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrani_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrani_bu_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrani_hu_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrani_wu_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrani_du_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani.ll new file mode 100644 index 0000000000000000000000000000000000000000..57b8eb16986660f68e4017420b7af3a09b21e141 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrani_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vssrani_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrani.b.h $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 1) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrani_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vssrani_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrani.h.w $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 1) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrani_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vssrani_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrani.w.d $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 1) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrani_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vssrani_d_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrani.d.q $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 1) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrani_bu_h(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vssrani_bu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrani.bu.h $vr0, $vr1, 15 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 15) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrani_hu_w(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vssrani_hu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrani.hu.w $vr0, $vr1, 31 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 31) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrani_wu_d(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vssrani_wu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrani.wu.d $vr0, $vr1, 63 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 63) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrani_du_q(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vssrani_du_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrani.du.q $vr0, $vr1, 127 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> %va, <2 x i64> %vb, i32 127) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarn.ll new file mode 100644 index 0000000000000000000000000000000000000000..c6b7d9ec8e1d60134ca91bfaf58133540a9be477 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarn.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16>, <8 x i16>) + +define <16 x i8> @lsx_vssrarn_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vssrarn_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrarn.b.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> %va, <8 x i16> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32>, <4 x i32>) + +define <8 x i16> @lsx_vssrarn_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vssrarn_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrarn.h.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> %va, <4 x i32> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64>, <2 x i64>) + +define <4 x i32> @lsx_vssrarn_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vssrarn_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrarn.w.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> %va, <2 x i64> %vb) + ret <4 x i32> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16>, <8 x i16>) + +define <16 x i8> @lsx_vssrarn_bu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vssrarn_bu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrarn.bu.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> %va, <8 x i16> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32>, <4 x i32>) + +define <8 x i16> @lsx_vssrarn_hu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vssrarn_hu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrarn.hu.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> %va, <4 x i32> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64>, <2 x i64>) + +define <4 x i32> @lsx_vssrarn_wu_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vssrarn_wu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrarn.wu.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> %va, <2 x i64> %vb) + ret <4 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..4edda8c0a24adb561907c1c4ce4303b3535daf64 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrarni_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrarni.b.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vssrarni_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrarni.b.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrarni_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrarni.h.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vssrarni_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrarni.h.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrarni_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrarni.w.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vssrarni_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrarni.w.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrarni_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrarni.d.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vssrarni_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrarni.d.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrarni_bu_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrarni.bu.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vssrarni_bu_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrarni.bu.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrarni_hu_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrarni.hu.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vssrarni_hu_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrarni.hu.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 32) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrarni_wu_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrarni.wu.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vssrarni_wu_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrarni.wu.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 64) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrarni_du_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrarni.du.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vssrarni_du_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrarni.du.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 128) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..a77e6e764c9d4d3a92c12f3c40eb74b28bb78cf4 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrarni_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrarni_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrarni_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrarni_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrarni_bu_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrarni_hu_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrarni_wu_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrarni_du_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni.ll new file mode 100644 index 0000000000000000000000000000000000000000..1a2e91962ac3b6ec9541dd3c1bf856cf6f36f342 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrarni_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vssrarni_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrarni.b.h $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 1) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrarni_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vssrarni_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrarni.h.w $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 1) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrarni_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vssrarni_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrarni.w.d $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 1) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrarni_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vssrarni_d_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrarni.d.q $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 1) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrarni_bu_h(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vssrarni_bu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrarni.bu.h $vr0, $vr1, 15 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 15) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrarni_hu_w(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vssrarni_hu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrarni.hu.w $vr0, $vr1, 31 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 31) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrarni_wu_d(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vssrarni_wu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrarni.wu.d $vr0, $vr1, 63 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 63) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrarni_du_q(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vssrarni_du_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrarni.du.q $vr0, $vr1, 127 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 127) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrln.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrln.ll new file mode 100644 index 0000000000000000000000000000000000000000..697ccc3962a81b13aab4300baa411b316da9f7d1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrln.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16>, <8 x i16>) + +define <16 x i8> @lsx_vssrln_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vssrln_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrln.b.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> %va, <8 x i16> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32>, <4 x i32>) + +define <8 x i16> @lsx_vssrln_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vssrln_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrln.h.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> %va, <4 x i32> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64>, <2 x i64>) + +define <4 x i32> @lsx_vssrln_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vssrln_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrln.w.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> %va, <2 x i64> %vb) + ret <4 x i32> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16>, <8 x i16>) + +define <16 x i8> @lsx_vssrln_bu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vssrln_bu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrln.bu.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> %va, <8 x i16> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32>, <4 x i32>) + +define <8 x i16> @lsx_vssrln_hu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vssrln_hu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrln.hu.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> %va, <4 x i32> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64>, <2 x i64>) + +define <4 x i32> @lsx_vssrln_wu_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vssrln_wu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrln.wu.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> %va, <2 x i64> %vb) + ret <4 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..6218af1fa773f3a63f54e4a39530411c7da3a9aa --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrlni_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlni.b.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vssrlni_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlni.b.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrlni_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlni.h.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vssrlni_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlni.h.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrlni_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlni.w.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vssrlni_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlni.w.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrlni_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlni.d.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vssrlni_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlni.d.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrlni_bu_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlni.bu.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vssrlni_bu_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlni.bu.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrlni_hu_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlni.hu.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vssrlni_hu_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlni.hu.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 32) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrlni_wu_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlni.wu.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vssrlni_wu_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlni.wu.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 64) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrlni_du_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlni.du.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vssrlni_du_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlni.du.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 128) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..688be826f467f4c4422f9e74d3cc577c290b0315 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrlni_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrlni_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrlni_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrlni_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrlni_bu_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrlni_hu_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrlni_wu_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrlni_du_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni.ll new file mode 100644 index 0000000000000000000000000000000000000000..8dd41e7abe87391fdd08dadd8b26deb4fb6c4b0b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrlni_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlni_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlni.b.h $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 1) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrlni_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlni_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlni.h.w $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 1) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrlni_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlni_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlni.w.d $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 1) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrlni_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlni_d_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlni.d.q $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 1) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrlni_bu_h(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlni_bu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlni.bu.h $vr0, $vr1, 15 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 15) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrlni_hu_w(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlni_hu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlni.hu.w $vr0, $vr1, 31 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 31) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrlni_wu_d(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlni_wu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlni.wu.d $vr0, $vr1, 63 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 63) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrlni_du_q(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlni_du_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlni.du.q $vr0, $vr1, 127 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 127) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrn.ll new file mode 100644 index 0000000000000000000000000000000000000000..a8e76cbaa7fd12bdfa8cf1d12ba1749606a9f9b6 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrn.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16>, <8 x i16>) + +define <16 x i8> @lsx_vssrlrn_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlrn_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlrn.b.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> %va, <8 x i16> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32>, <4 x i32>) + +define <8 x i16> @lsx_vssrlrn_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlrn_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlrn.h.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> %va, <4 x i32> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64>, <2 x i64>) + +define <4 x i32> @lsx_vssrlrn_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlrn_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlrn.w.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> %va, <2 x i64> %vb) + ret <4 x i32> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16>, <8 x i16>) + +define <16 x i8> @lsx_vssrlrn_bu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlrn_bu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlrn.bu.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> %va, <8 x i16> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32>, <4 x i32>) + +define <8 x i16> @lsx_vssrlrn_hu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlrn_hu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlrn.hu.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> %va, <4 x i32> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64>, <2 x i64>) + +define <4 x i32> @lsx_vssrlrn_wu_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlrn_wu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlrn.wu.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> %va, <2 x i64> %vb) + ret <4 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..98a0c5b3cd28a57e1fb52a7ed54ce1fbccd87186 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrlrni_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlrni.b.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vssrlrni_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlrni.b.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrlrni_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlrni.h.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vssrlrni_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlrni.h.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrlrni_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlrni.w.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vssrlrni_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlrni.w.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrlrni_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlrni.d.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vssrlrni_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlrni.d.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrlrni_bu_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlrni.bu.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vssrlrni_bu_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlrni.bu.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrlrni_hu_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlrni.hu.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vssrlrni_hu_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlrni.hu.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 32) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrlrni_wu_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlrni.wu.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vssrlrni_wu_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlrni.wu.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 64) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrlrni_du_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlrni.du.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vssrlrni_du_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlrni.du.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 128) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..c389b4fd6023b2cdd459bfc1e7b717a83c917e5d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrlrni_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrlrni_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrlrni_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrlrni_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrlrni_bu_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrlrni_hu_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrlrni_wu_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrlrni_du_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni.ll new file mode 100644 index 0000000000000000000000000000000000000000..869e81b2b09d65550f741776bd29c6f71eef3547 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrlrni_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlrni_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlrni.b.h $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 1) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrlrni_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlrni_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlrni.h.w $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 1) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrlrni_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlrni_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlrni.w.d $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 1) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrlrni_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlrni_d_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlrni.d.q $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 1) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrlrni_bu_h(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlrni_bu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlrni.bu.h $vr0, $vr1, 15 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 15) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrlrni_hu_w(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlrni_hu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlrni.hu.w $vr0, $vr1, 31 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 31) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrlrni_wu_d(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlrni_wu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlrni.wu.d $vr0, $vr1, 63 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 63) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrlrni_du_q(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlrni_du_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlrni.du.q $vr0, $vr1, 127 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 127) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssub.ll new file mode 100644 index 0000000000000000000000000000000000000000..c594b426d65031c62e990924df66ac86bb165dd4 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssub.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vssub_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vssub_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssub.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vssub_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vssub_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssub.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vssub_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vssub_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssub.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vssub_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vssub_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssub.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vssub_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vssub_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssub.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vssub_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vssub_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssub.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vssub_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vssub_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssub.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vssub_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vssub_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssub.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..64518380964b4f062c0355a9037c62074086328b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare void @llvm.loongarch.lsx.vst(<16 x i8>, i8*, i32) + +define void @lsx_vst_lo(<16 x i8> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vst: argument out of range +entry: + call void @llvm.loongarch.lsx.vst(<16 x i8> %va, i8* %p, i32 -2049) + ret void +} + +define void @lsx_vst_hi(<16 x i8> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vst: argument out of range +entry: + call void @llvm.loongarch.lsx.vst(<16 x i8> %va, i8* %p, i32 2048) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..119ed9b786586d1700ceecf562f0e3f567f9f0f2 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare void @llvm.loongarch.lsx.vst(<16 x i8>, i8*, i32) + +define void @lsx_vst(<16 x i8> %va, i8* %p, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.lsx.vst(<16 x i8> %va, i8* %p, i32 %b) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st.ll new file mode 100644 index 0000000000000000000000000000000000000000..798f509f2318e9135f027918f3a25ae87bc196c6 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare void @llvm.loongarch.lsx.vst(<16 x i8>, i8*, i32) + +define void @lsx_vst(<16 x i8> %va, i8* %p) nounwind { +; CHECK-LABEL: lsx_vst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vst $vr0, $a0, -2048 +; CHECK-NEXT: ret +entry: + call void @llvm.loongarch.lsx.vst(<16 x i8> %va, i8* %p, i32 -2048) + ret void +} + +declare void @llvm.loongarch.lsx.vstx(<16 x i8>, i8*, i64) + +define void @lsx_vstx(<16 x i8> %va, i8* %p, i64 %c) nounwind { +; CHECK-LABEL: lsx_vstx: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vstx $vr0, $a0, $a1 +; CHECK-NEXT: ret +entry: + call void @llvm.loongarch.lsx.vstx(<16 x i8> %va, i8* %p, i64 %c) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..277abcbd34ccbe2cfc0d8f90a5da24997059079a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-invalid-imm.ll @@ -0,0 +1,121 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare void @llvm.loongarch.lsx.vstelm.b(<16 x i8>, i8*, i32, i32) + +define void @lsx_vstelm_b_lo(<16 x i8> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vstelm.b: argument out of range +entry: + call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 -129, i32 15) + ret void +} + +define void @lsx_vstelm_b_hi(<16 x i8> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vstelm.b: argument out of range +entry: + call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 128, i32 15) + ret void +} + +define void @lsx_vstelm_b_idx_lo(<16 x i8> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vstelm.b: argument out of range +entry: + call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 1, i32 -1) + ret void +} + +define void @lsx_vstelm_b_idx_hi(<16 x i8> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vstelm.b: argument out of range +entry: + call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 1, i32 16) + ret void +} + +declare void @llvm.loongarch.lsx.vstelm.h(<8 x i16>, i8*, i32, i32) + +define void @lsx_vstelm_h_lo(<8 x i16> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vstelm.h: argument out of range or not a multiple of 2. +entry: + call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 -258, i32 7) + ret void +} + +define void @lsx_vstelm_h_hi(<8 x i16> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vstelm.h: argument out of range or not a multiple of 2. +entry: + call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 256, i32 7) + ret void +} + +define void @lsx_vstelm_h_idx_lo(<8 x i16> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vstelm.h: argument out of range or not a multiple of 2. +entry: + call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 2, i32 -1) + ret void +} + +define void @lsx_vstelm_h_idx_hi(<8 x i16> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vstelm.h: argument out of range or not a multiple of 2. +entry: + call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 2, i32 8) + ret void +} + +declare void @llvm.loongarch.lsx.vstelm.w(<4 x i32>, i8*, i32, i32) + +define void @lsx_vstelm_w_lo(<4 x i32> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vstelm.w: argument out of range or not a multiple of 4. +entry: + call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 -516, i32 3) + ret void +} + +define void @lsx_vstelm_w_hi(<4 x i32> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vstelm.w: argument out of range or not a multiple of 4. +entry: + call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 512, i32 3) + ret void +} + +define void @lsx_vstelm_w_idx_lo(<4 x i32> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vstelm.w: argument out of range or not a multiple of 4. +entry: + call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 4, i32 -1) + ret void +} + +define void @lsx_vstelm_w_idx_hi(<4 x i32> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vstelm.w: argument out of range or not a multiple of 4. +entry: + call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 4, i32 4) + ret void +} + +declare void @llvm.loongarch.lsx.vstelm.d(<2 x i64>, i8*, i32, i32) + +define void @lsx_vstelm_d_lo(<2 x i64> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vstelm.d: argument out of range or not a multiple of 8. +entry: + call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 -1032, i32 1) + ret void +} + +define void @lsx_vstelm_d_hi(<2 x i64> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vstelm.d: argument out of range or not a multiple of 8. +entry: + call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 1024, i32 1) + ret void +} + +define void @lsx_vstelm_d_idx_lo(<2 x i64> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vstelm.d: argument out of range or not a multiple of 8. +entry: + call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 8, i32 -1) + ret void +} + +define void @lsx_vstelm_d_idx_hi(<2 x i64> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vstelm.d: argument out of range or not a multiple of 8. +entry: + call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 8, i32 2) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..f53932f7903551d4c5e322ba44498278fff902f7 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-non-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare void @llvm.loongarch.lsx.vstelm.b(<16 x i8>, i8*, i32, i32) + +define void @lsx_vstelm_b(<16 x i8> %va, i8* %p, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 %b, i32 1) + ret void +} + +define void @lsx_vstelm_b_idx(<16 x i8> %va, i8* %p, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 1, i32 %b) + ret void +} + +declare void @llvm.loongarch.lsx.vstelm.h(<8 x i16>, i8*, i32, i32) + +define void @lsx_vstelm_h(<8 x i16> %va, i8* %p, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 %b, i32 1) + ret void +} + +define void @lsx_vstelm_h_idx(<8 x i16> %va, i8* %p, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 2, i32 %b) + ret void +} + +declare void @llvm.loongarch.lsx.vstelm.w(<4 x i32>, i8*, i32, i32) + +define void @lsx_vstelm_w(<4 x i32> %va, i8* %p, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 %b, i32 1) + ret void +} + +define void @lsx_vstelm_w_idx(<4 x i32> %va, i8* %p, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 4, i32 %b) + ret void +} + +declare void @llvm.loongarch.lsx.vstelm.d(<2 x i64>, i8*, i32, i32) + +define void @lsx_vstelm_d(<2 x i64> %va, i8* %p, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 %b, i32 1) + ret void +} + +define void @lsx_vstelm_d_idx(<2 x i64> %va, i8* %p, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 8, i32 %b) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm.ll new file mode 100644 index 0000000000000000000000000000000000000000..6b9e7a9d7462e2ba85cd20748602a1fe2d98586e --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare void @llvm.loongarch.lsx.vstelm.b(<16 x i8>, i8*, i32, i32) + +define void @lsx_vstelm_b(<16 x i8> %va, i8* %p) nounwind { +; CHECK-LABEL: lsx_vstelm_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vstelm.b $vr0, $a0, 1, 15 +; CHECK-NEXT: ret +entry: + call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 1, i32 15) + ret void +} + +declare void @llvm.loongarch.lsx.vstelm.h(<8 x i16>, i8*, i32, i32) + +define void @lsx_vstelm_h(<8 x i16> %va, i8* %p) nounwind { +; CHECK-LABEL: lsx_vstelm_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vstelm.h $vr0, $a0, 2, 7 +; CHECK-NEXT: ret +entry: + call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 2, i32 7) + ret void +} + +declare void @llvm.loongarch.lsx.vstelm.w(<4 x i32>, i8*, i32, i32) + +define void @lsx_vstelm_w(<4 x i32> %va, i8* %p) nounwind { +; CHECK-LABEL: lsx_vstelm_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vstelm.w $vr0, $a0, 4, 3 +; CHECK-NEXT: ret +entry: + call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 4, i32 3) + ret void +} + +declare void @llvm.loongarch.lsx.vstelm.d(<2 x i64>, i8*, i32, i32) + +define void @lsx_vstelm_d(<2 x i64> %va, i8* %p) nounwind { +; CHECK-LABEL: lsx_vstelm_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vstelm.d $vr0, $a0, 8, 1 +; CHECK-NEXT: ret +entry: + call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 8, i32 1) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sub.ll new file mode 100644 index 0000000000000000000000000000000000000000..5c04a3d8de0df6071cc5c277794d2fc5d895546b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sub.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vsub_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsub_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsub.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vsub_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsub_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsub.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vsub_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsub_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsub.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vsub_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsub_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsub.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vsub_q(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsub_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsub.q $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..96cc1241fbf3f42edd804976acda04cb86f94ca6 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vsubi_bu_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsubi.bu: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vsubi_bu_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsubi.bu: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> %va, i32 32) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vsubi_hu_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsubi.hu: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vsubi_hu_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsubi.hu: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> %va, i32 32) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vsubi_wu_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsubi.wu: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vsubi_wu_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsubi.wu: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> %va, i32 32) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vsubi_du_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsubi.du: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vsubi_du_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsubi.du: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> %va, i32 32) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..162f9ad131c75cee7bb8b44ac68e67cc566227e8 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vsubi_bu(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vsubi_hu(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vsubi_wu(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vsubi_du(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi.ll new file mode 100644 index 0000000000000000000000000000000000000000..304a4e4a78cc7c3529906a96cd838ec4a7536500 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vsubi_bu(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vsubi_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubi.bu $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> %va, i32 31) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vsubi_hu(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vsubi_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubi.hu $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> %va, i32 31) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vsubi_wu(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vsubi_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubi.wu $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> %va, i32 31) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vsubi_du(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vsubi_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubi.du $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> %va, i32 31) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subw.ll new file mode 100644 index 0000000000000000000000000000000000000000..48100db743344ef9a02644f22aef4a547cb49272 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subw.ll @@ -0,0 +1,194 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vsubwev_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsubwev_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubwev.h.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vsubwev_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsubwev_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubwev.w.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vsubwev_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsubwev_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubwev.d.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vsubwev_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsubwev_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubwev.q.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vsubwev_h_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsubwev_h_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubwev.h.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vsubwev_w_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsubwev_w_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubwev.w.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vsubwev_d_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsubwev_d_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubwev.d.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vsubwev_q_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsubwev_q_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubwev.q.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vsubwod_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsubwod_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubwod.h.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vsubwod_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsubwod_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubwod.w.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vsubwod_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsubwod_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubwod.d.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vsubwod_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsubwod_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubwod.q.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vsubwod_h_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsubwod_h_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubwod.h.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vsubwod_w_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsubwod_w_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubwod.w.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vsubwod_d_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsubwod_d_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubwod.d.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vsubwod_q_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsubwod_q_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubwod.q.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xor.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xor.ll new file mode 100644 index 0000000000000000000000000000000000000000..72a1fe93c2c0135f1e128d7fd5c649353c6a6055 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xor.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vxor_v(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vxor_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..5f5613189ac81389c27496f879bf9100471e885d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vxori_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vxori.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vxori_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vxori.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> %va, i32 256) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..4238d89120f1a6650a456d982771e7e002c5177e --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vxori_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori.ll new file mode 100644 index 0000000000000000000000000000000000000000..09669cd5ac14c733fbc887d01537881b068fa1a4 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vxori_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vxori_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vxori.b $vr0, $vr0, 3 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> %va, i32 3) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll new file mode 100644 index 0000000000000000000000000000000000000000..2a7c37c2ae346efa58e08f2a2473559d7e2133fd --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @add_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: add_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vadd.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %v2 = add <16 x i8> %v0, %v1 + store <16 x i8> %v2, ptr %res + ret void +} + +define void @add_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: add_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vadd.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %v2 = add <8 x i16> %v0, %v1 + store <8 x i16> %v2, ptr %res + ret void +} + +define void @add_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: add_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vadd.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %v2 = add <4 x i32> %v0, %v1 + store <4 x i32> %v2, ptr %res + ret void +} + +define void @add_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: add_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vadd.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %v2 = add <2 x i64> %v0, %v1 + store <2 x i64> %v2, ptr %res + ret void +} + +define void @add_v16i8_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: add_v16i8_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vaddi.bu $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = add <16 x i8> %v0, + store <16 x i8> %v1, ptr %res + ret void +} + +define void @add_v8i16_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: add_v8i16_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vaddi.hu $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = add <8 x i16> %v0, + store <8 x i16> %v1, ptr %res + ret void +} + +define void @add_v4i32_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: add_v4i32_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vaddi.wu $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = add <4 x i32> %v0, + store <4 x i32> %v1, ptr %res + ret void +} + +define void @add_v2i64_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: add_v2i64_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vaddi.du $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = add <2 x i64> %v0, + store <2 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll new file mode 100644 index 0000000000000000000000000000000000000000..523255159a811525440b470a2b04120b96a8e3f2 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll @@ -0,0 +1,125 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @and_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: and_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %v2 = and <16 x i8> %v0, %v1 + store <16 x i8> %v2, ptr %res + ret void +} + +define void @and_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: and_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %v2 = and <8 x i16> %v0, %v1 + store <8 x i16> %v2, ptr %res + ret void +} + +define void @and_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: and_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %v2 = and <4 x i32> %v0, %v1 + store <4 x i32> %v2, ptr %res + ret void +} + +define void @and_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: and_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %v2 = and <2 x i64> %v0, %v1 + store <2 x i64> %v2, ptr %res + ret void +} + +define void @and_u_v16i8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: and_u_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vandi.b $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = and <16 x i8> %v0, + store <16 x i8> %v1, ptr %res + ret void +} + +define void @and_u_v8i16(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: and_u_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrepli.h $vr1, 31 +; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = and <8 x i16> %v0, + store <8 x i16> %v1, ptr %res + ret void +} + +define void @and_u_v4i32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: and_u_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrepli.w $vr1, 31 +; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = and <4 x i32> %v0, + store <4 x i32> %v1, ptr %res + ret void +} + +define void @and_u_v2i64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: and_u_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrepli.d $vr1, 31 +; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = and <2 x i64> %v0, + store <2 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ashr.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ashr.ll new file mode 100644 index 0000000000000000000000000000000000000000..fbc570d77ba8038a8962a52f6319c58c945611fc --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ashr.ll @@ -0,0 +1,178 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @ashr_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: ashr_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsra.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %v2 = ashr <16 x i8> %v0, %v1 + store <16 x i8> %v2, ptr %res + ret void +} + +define void @ashr_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: ashr_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsra.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %v2 = ashr <8 x i16> %v0, %v1 + store <8 x i16> %v2, ptr %res + ret void +} + +define void @ashr_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: ashr_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsra.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %v2 = ashr <4 x i32> %v0, %v1 + store <4 x i32> %v2, ptr %res + ret void +} + +define void @ashr_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: ashr_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsra.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %v2 = ashr <2 x i64> %v0, %v1 + store <2 x i64> %v2, ptr %res + ret void +} + +define void @ashr_v16i8_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ashr_v16i8_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrai.b $vr0, $vr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = ashr <16 x i8> %v0, + store <16 x i8> %v1, ptr %res + ret void +} + +define void @ashr_v16i8_7(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ashr_v16i8_7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrai.b $vr0, $vr0, 7 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = ashr <16 x i8> %v0, + store <16 x i8> %v1, ptr %res + ret void +} + +define void @ashr_v8i16_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ashr_v8i16_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrai.h $vr0, $vr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = ashr <8 x i16> %v0, + store <8 x i16> %v1, ptr %res + ret void +} + +define void @ashr_v8i16_15(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ashr_v8i16_15: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrai.h $vr0, $vr0, 15 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = ashr <8 x i16> %v0, + store <8 x i16> %v1, ptr %res + ret void +} + +define void @ashr_v4i32_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ashr_v4i32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrai.w $vr0, $vr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = ashr <4 x i32> %v0, + store <4 x i32> %v1, ptr %res + ret void +} + +define void @ashr_v4i32_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ashr_v4i32_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrai.w $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = ashr <4 x i32> %v0, + store <4 x i32> %v1, ptr %res + ret void +} + +define void @ashr_v2i64_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ashr_v2i64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrai.d $vr0, $vr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = ashr <2 x i64> %v0, + store <2 x i64> %v1, ptr %res + ret void +} + +define void @ashr_v2i64_63(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ashr_v2i64_63: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrai.d $vr0, $vr0, 63 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = ashr <2 x i64> %v0, + store <2 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll new file mode 100644 index 0000000000000000000000000000000000000000..b8798c97861e2c90027f5f56f6c0a7b3aa07a835 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll @@ -0,0 +1,170 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @extract_16xi8(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: extract_16xi8: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1 +; CHECK-NEXT: st.b $a0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <16 x i8>, ptr %src + %e = extractelement <16 x i8> %v, i32 1 + store i8 %e, ptr %dst + ret void +} + +define void @extract_8xi16(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: extract_8xi16: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1 +; CHECK-NEXT: st.h $a0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <8 x i16>, ptr %src + %e = extractelement <8 x i16> %v, i32 1 + store i16 %e, ptr %dst + ret void +} + +define void @extract_4xi32(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: extract_4xi32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1 +; CHECK-NEXT: st.w $a0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <4 x i32>, ptr %src + %e = extractelement <4 x i32> %v, i32 1 + store i32 %e, ptr %dst + ret void +} + +define void @extract_2xi64(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: extract_2xi64: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1 +; CHECK-NEXT: st.d $a0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <2 x i64>, ptr %src + %e = extractelement <2 x i64> %v, i32 1 + store i64 %e, ptr %dst + ret void +} + +define void @extract_4xfloat(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: extract_4xfloat: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vreplvei.w $vr0, $vr0, 1 +; CHECK-NEXT: fst.s $fa0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <4 x float>, ptr %src + %e = extractelement <4 x float> %v, i32 1 + store float %e, ptr %dst + ret void +} + +define void @extract_2xdouble(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: extract_2xdouble: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vreplvei.d $vr0, $vr0, 1 +; CHECK-NEXT: fst.d $fa0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <2 x double>, ptr %src + %e = extractelement <2 x double> %v, i32 1 + store double %e, ptr %dst + ret void +} + +define void @extract_16xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind { +; CHECK-LABEL: extract_16xi8_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vreplve.b $vr0, $vr0, $a2 +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: srai.w $a0, $a0, 24 +; CHECK-NEXT: st.b $a0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <16 x i8>, ptr %src + %e = extractelement <16 x i8> %v, i32 %idx + store i8 %e, ptr %dst + ret void +} + +define void @extract_8xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind { +; CHECK-LABEL: extract_8xi16_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vreplve.h $vr0, $vr0, $a2 +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: srai.w $a0, $a0, 16 +; CHECK-NEXT: st.h $a0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <8 x i16>, ptr %src + %e = extractelement <8 x i16> %v, i32 %idx + store i16 %e, ptr %dst + ret void +} + +define void @extract_4xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind { +; CHECK-LABEL: extract_4xi32_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vreplve.w $vr0, $vr0, $a2 +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: st.w $a0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <4 x i32>, ptr %src + %e = extractelement <4 x i32> %v, i32 %idx + store i32 %e, ptr %dst + ret void +} + +define void @extract_2xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind { +; CHECK-LABEL: extract_2xi64_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vreplve.d $vr0, $vr0, $a2 +; CHECK-NEXT: movfr2gr.d $a0, $fa0 +; CHECK-NEXT: st.d $a0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <2 x i64>, ptr %src + %e = extractelement <2 x i64> %v, i32 %idx + store i64 %e, ptr %dst + ret void +} + +define void @extract_4xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind { +; CHECK-LABEL: extract_4xfloat_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vreplve.w $vr0, $vr0, $a2 +; CHECK-NEXT: fst.s $fa0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <4 x float>, ptr %src + %e = extractelement <4 x float> %v, i32 %idx + store float %e, ptr %dst + ret void +} + +define void @extract_2xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind { +; CHECK-LABEL: extract_2xdouble_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0 +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vreplve.d $vr0, $vr0, $a2 +; CHECK-NEXT: fst.d $fa0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <2 x double>, ptr %src + %e = extractelement <2 x double> %v, i32 %idx + store double %e, ptr %dst + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fadd.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fadd.ll new file mode 100644 index 0000000000000000000000000000000000000000..1fa1f611c4a36c7dc881b445a93c1f0de6e79767 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fadd.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @fadd_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: fadd_v4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfadd.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = fadd <4 x float> %v0, %v1 + store <4 x float> %v2, ptr %res + ret void +} + +define void @fadd_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: fadd_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfadd.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = fadd <2 x double> %v0, %v1 + store <2 x double> %v2, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fcmp.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fcmp.ll new file mode 100644 index 0000000000000000000000000000000000000000..53fbf0b2f86fe43c363e29b321c5956782d3f536 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fcmp.ll @@ -0,0 +1,692 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +;; TREU +define void @v4f32_fcmp_true(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_true: +; CHECK: # %bb.0: +; CHECK-NEXT: vrepli.b $vr0, -1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp true <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +;; FALSE +define void @v2f64_fcmp_false(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_false: +; CHECK: # %bb.0: +; CHECK-NEXT: vrepli.b $vr0, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp false <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; SETOEQ +define void @v4f32_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_oeq: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.ceq.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp oeq <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_oeq: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.ceq.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp oeq <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; SETUEQ +define void @v4f32_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_ueq: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cueq.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp ueq <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_ueq: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cueq.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp ueq <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; SETEQ +define void @v4f32_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_eq: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.ceq.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp fast oeq <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_eq: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.ceq.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp fast ueq <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; SETOLE +define void @v4f32_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_ole: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cle.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp ole <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_ole: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cle.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp ole <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; SETULE +define void @v4f32_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_ule: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cule.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp ule <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_ule: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cule.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp ule <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; SETLE +define void @v4f32_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_le: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cle.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp fast ole <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_le: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cle.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp fast ule <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; SETOLT +define void @v4f32_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_olt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.clt.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp olt <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_olt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.clt.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp olt <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; SETULT +define void @v4f32_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_ult: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cult.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp ult <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_ult: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cult.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp ult <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; SETLT +define void @v4f32_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_lt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.clt.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp fast olt <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_lt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.clt.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp fast ult <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; SETONE +define void @v4f32_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_one: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cne.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp one <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_one: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cne.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp one <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; SETUNE +define void @v4f32_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_une: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cune.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp une <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_une: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cune.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp une <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; SETNE +define void @v4f32_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_ne: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cne.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp fast one <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_ne: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cne.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp fast une <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; SETO +define void @v4f32_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_ord: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cor.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp ord <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_ord: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cor.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp ord <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; SETUO +define void @v4f32_fcmp_uno(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_uno: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cun.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp uno <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_uno(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_uno: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cun.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp uno <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; Expand SETOGT +define void @v4f32_fcmp_ogt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_ogt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vfcmp.clt.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp ogt <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_ogt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_ogt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vfcmp.clt.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp ogt <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; Expand SETUGT +define void @v4f32_fcmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_ugt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vfcmp.cult.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp ugt <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_ugt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vfcmp.cult.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp ugt <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; Expand SETGT +define void @v4f32_fcmp_gt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_gt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vfcmp.clt.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp fast ogt <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_gt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_gt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vfcmp.clt.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp fast ugt <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; Expand SETOGE +define void @v4f32_fcmp_oge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_oge: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vfcmp.cle.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp oge <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_oge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_oge: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vfcmp.cle.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp oge <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; Expand SETUGE +define void @v4f32_fcmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_uge: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vfcmp.cule.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp uge <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_uge: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vfcmp.cule.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp uge <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; Expand SETGE +define void @v4f32_fcmp_ge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_ge: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vfcmp.cle.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp fast oge <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_ge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_ge: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vfcmp.cle.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp fast uge <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll new file mode 100644 index 0000000000000000000000000000000000000000..5f1ee9e4d212ebc5e68702d6928ef7d28f53064e --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll @@ -0,0 +1,63 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @fdiv_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: fdiv_v4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfdiv.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = fdiv <4 x float> %v0, %v1 + store <4 x float> %v2, ptr %res + ret void +} + +define void @fdiv_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: fdiv_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfdiv.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = fdiv <2 x double> %v0, %v1 + store <2 x double> %v2, ptr %res + ret void +} + +;; 1.0 / vec +define void @one_fdiv_v4f32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: one_fdiv_v4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vfrecip.s $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %div = fdiv <4 x float> , %v0 + store <4 x float> %div, ptr %res + ret void +} + +define void @one_fdiv_v2f64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: one_fdiv_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vfrecip.d $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %div = fdiv <2 x double> , %v0 + store <2 x double> %div, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fmul.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fmul.ll new file mode 100644 index 0000000000000000000000000000000000000000..e7fb527f7805e81b981bc8ec9d0e6405a1e77f5c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fmul.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @fmul_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: fmul_v4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfmul.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = fmul <4 x float> %v0, %v1 + store <4 x float> %v2, ptr %res + ret void +} + +define void @fmul_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: fmul_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfmul.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = fmul <2 x double> %v0, %v1 + store <2 x double> %v2, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fneg.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fneg.ll new file mode 100644 index 0000000000000000000000000000000000000000..795c1ac8b368451c72dbe8f0bc8657d1335b64c0 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fneg.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @fneg_v4f32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: fneg_v4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vbitrevi.w $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = fneg <4 x float> %v0 + store <4 x float> %v1, ptr %res + ret void +} +define void @fneg_v2f64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: fneg_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vbitrevi.d $vr0, $vr0, 63 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = fneg <2 x double> %v0 + store <2 x double> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptosi.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptosi.ll new file mode 100644 index 0000000000000000000000000000000000000000..c3008fe96e47dacde5cbfab667e7f2387c23e0db --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptosi.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @fptosi_v4f32_v4i32(ptr %res, ptr %in){ +; CHECK-LABEL: fptosi_v4f32_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vftintrz.w.s $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %in + %v1 = fptosi <4 x float> %v0 to <4 x i32> + store <4 x i32> %v1, ptr %res + ret void +} + +define void @fptosi_v2f64_v2i64(ptr %res, ptr %in){ +; CHECK-LABEL: fptosi_v2f64_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vftintrz.l.d $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %in + %v1 = fptosi <2 x double> %v0 to <2 x i64> + store <2 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptoui.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptoui.ll new file mode 100644 index 0000000000000000000000000000000000000000..f0aeb0bd14e75ee9558b99c3ac91836fab8c69cc --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptoui.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @fptoui_v4f32_v4i32(ptr %res, ptr %in){ +; CHECK-LABEL: fptoui_v4f32_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vftintrz.wu.s $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %in + %v1 = fptoui <4 x float> %v0 to <4 x i32> + store <4 x i32> %v1, ptr %res + ret void +} + +define void @fptoui_v2f64_v2i64(ptr %res, ptr %in){ +; CHECK-LABEL: fptoui_v2f64_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vftintrz.lu.d $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %in + %v1 = fptoui <2 x double> %v0 to <2 x i64> + store <2 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fsub.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fsub.ll new file mode 100644 index 0000000000000000000000000000000000000000..df98182321dab9b5700e8310adcdcc4632cf47d1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fsub.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @fsub_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: fsub_v4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfsub.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = fsub <4 x float> %v0, %v1 + store <4 x float> %v2, ptr %res + ret void +} + +define void @fsub_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: fsub_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfsub.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = fsub <2 x double> %v0, %v1 + store <2 x double> %v2, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll new file mode 100644 index 0000000000000000000000000000000000000000..448f3fa6c6e0e494b2b72cffee64fa5b2e787d16 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll @@ -0,0 +1,939 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +;; SETEQ +define void @v16i8_icmp_eq_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v16i8_icmp_eq_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vseqi.b $vr0, $vr0, 15 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %cmp = icmp eq <16 x i8> %v0, + %ext = sext <16 x i1> %cmp to <16 x i8> + store <16 x i8> %ext, ptr %res + ret void +} + +define void @v16i8_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i8_icmp_eq: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vseq.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %cmp = icmp eq <16 x i8> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i8> + store <16 x i8> %ext, ptr %res + ret void +} + +define void @v8i16_icmp_eq_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v8i16_icmp_eq_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vseqi.h $vr0, $vr0, 15 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %cmp = icmp eq <8 x i16> %v0, + %ext = sext <8 x i1> %cmp to <8 x i16> + store <8 x i16> %ext, ptr %res + ret void +} + +define void @v8i16_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i16_icmp_eq: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vseq.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %cmp = icmp eq <8 x i16> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i16> + store <8 x i16> %ext, ptr %res + ret void +} + +define void @v4i32_icmp_eq_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v4i32_icmp_eq_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vseqi.w $vr0, $vr0, 15 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %cmp = icmp eq <4 x i32> %v0, + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v4i32_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i32_icmp_eq: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vseq.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %cmp = icmp eq <4 x i32> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2i64_icmp_eq_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v2i64_icmp_eq_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vseqi.d $vr0, $vr0, 15 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %a0 + %cmp = icmp eq <2 x i64> %v0, + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +define void @v2i64_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2i64_icmp_eq: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vseq.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %cmp = icmp eq <2 x i64> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; SETLE +define void @v16i8_icmp_sle_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v16i8_icmp_sle_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslei.b $vr0, $vr0, 15 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %cmp = icmp sle <16 x i8> %v0, + %ext = sext <16 x i1> %cmp to <16 x i8> + store <16 x i8> %ext, ptr %res + ret void +} + +define void @v16i8_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i8_icmp_sle: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsle.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %cmp = icmp sle <16 x i8> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i8> + store <16 x i8> %ext, ptr %res + ret void +} + +define void @v8i16_icmp_sle_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v8i16_icmp_sle_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslei.h $vr0, $vr0, 15 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %cmp = icmp sle <8 x i16> %v0, + %ext = sext <8 x i1> %cmp to <8 x i16> + store <8 x i16> %ext, ptr %res + ret void +} + +define void @v8i16_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i16_icmp_sle: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsle.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %cmp = icmp sle <8 x i16> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i16> + store <8 x i16> %ext, ptr %res + ret void +} + +define void @v4i32_icmp_sle_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v4i32_icmp_sle_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslei.w $vr0, $vr0, 15 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %cmp = icmp sle <4 x i32> %v0, + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v4i32_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i32_icmp_sle: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsle.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %cmp = icmp sle <4 x i32> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2i64_icmp_sle_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v2i64_icmp_sle_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslei.d $vr0, $vr0, 15 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %a0 + %cmp = icmp sle <2 x i64> %v0, + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +define void @v2i64_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2i64_icmp_sle: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsle.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %cmp = icmp sle <2 x i64> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; SETULE +define void @v16i8_icmp_ule_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v16i8_icmp_ule_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslei.bu $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %cmp = icmp ule <16 x i8> %v0, + %ext = sext <16 x i1> %cmp to <16 x i8> + store <16 x i8> %ext, ptr %res + ret void +} + +define void @v16i8_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i8_icmp_ule: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsle.bu $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %cmp = icmp ule <16 x i8> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i8> + store <16 x i8> %ext, ptr %res + ret void +} + +define void @v8i16_icmp_ule_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v8i16_icmp_ule_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslei.hu $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %cmp = icmp ule <8 x i16> %v0, + %ext = sext <8 x i1> %cmp to <8 x i16> + store <8 x i16> %ext, ptr %res + ret void +} + +define void @v8i16_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i16_icmp_ule: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsle.hu $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %cmp = icmp ule <8 x i16> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i16> + store <8 x i16> %ext, ptr %res + ret void +} + +define void @v4i32_icmp_ule_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v4i32_icmp_ule_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslei.wu $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %cmp = icmp ule <4 x i32> %v0, + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v4i32_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i32_icmp_ule: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsle.wu $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %cmp = icmp ule <4 x i32> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2i64_icmp_ule_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v2i64_icmp_ule_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslei.du $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %a0 + %cmp = icmp ule <2 x i64> %v0, + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +define void @v2i64_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2i64_icmp_ule: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsle.du $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %cmp = icmp ule <2 x i64> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; SETLT +define void @v16i8_icmp_slt_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v16i8_icmp_slt_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslti.b $vr0, $vr0, 15 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %cmp = icmp slt <16 x i8> %v0, + %ext = sext <16 x i1> %cmp to <16 x i8> + store <16 x i8> %ext, ptr %res + ret void +} + +define void @v16i8_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i8_icmp_slt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vslt.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %cmp = icmp slt <16 x i8> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i8> + store <16 x i8> %ext, ptr %res + ret void +} + +define void @v8i16_icmp_slt_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v8i16_icmp_slt_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslti.h $vr0, $vr0, 15 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %cmp = icmp slt <8 x i16> %v0, + %ext = sext <8 x i1> %cmp to <8 x i16> + store <8 x i16> %ext, ptr %res + ret void +} + +define void @v8i16_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i16_icmp_slt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vslt.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %cmp = icmp slt <8 x i16> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i16> + store <8 x i16> %ext, ptr %res + ret void +} + +define void @v4i32_icmp_slt_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v4i32_icmp_slt_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslti.w $vr0, $vr0, 15 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %cmp = icmp slt <4 x i32> %v0, + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v4i32_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i32_icmp_slt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vslt.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %cmp = icmp slt <4 x i32> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2i64_icmp_slt_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v2i64_icmp_slt_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslti.d $vr0, $vr0, 15 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %a0 + %cmp = icmp slt <2 x i64> %v0, + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +define void @v2i64_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2i64_icmp_slt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vslt.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %cmp = icmp slt <2 x i64> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; SETULT +define void @v16i8_icmp_ult_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v16i8_icmp_ult_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslti.bu $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %cmp = icmp ult <16 x i8> %v0, + %ext = sext <16 x i1> %cmp to <16 x i8> + store <16 x i8> %ext, ptr %res + ret void +} + +define void @v16i8_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i8_icmp_ult: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vslt.bu $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %cmp = icmp ult <16 x i8> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i8> + store <16 x i8> %ext, ptr %res + ret void +} + +define void @v8i16_icmp_ult_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v8i16_icmp_ult_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslti.hu $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %cmp = icmp ult <8 x i16> %v0, + %ext = sext <8 x i1> %cmp to <8 x i16> + store <8 x i16> %ext, ptr %res + ret void +} + +define void @v8i16_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i16_icmp_ult: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vslt.hu $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %cmp = icmp ult <8 x i16> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i16> + store <8 x i16> %ext, ptr %res + ret void +} + +define void @v4i32_icmp_ult_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v4i32_icmp_ult_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslti.wu $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %cmp = icmp ult <4 x i32> %v0, + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v4i32_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i32_icmp_ult: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vslt.wu $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %cmp = icmp ult <4 x i32> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2i64_icmp_ult_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v2i64_icmp_ult_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslti.du $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %a0 + %cmp = icmp ult <2 x i64> %v0, + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +define void @v2i64_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2i64_icmp_ult: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vslt.du $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %cmp = icmp ult <2 x i64> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; Expand SETNE +define void @v16i8_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i8_icmp_ne: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vseq.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vxori.b $vr0, $vr0, 255 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %cmp = icmp ne <16 x i8> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i8> + store <16 x i8> %ext, ptr %res + ret void +} + +define void @v8i16_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i16_icmp_ne: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vseq.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vrepli.b $vr1, -1 +; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %cmp = icmp ne <8 x i16> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i16> + store <8 x i16> %ext, ptr %res + ret void +} + +define void @v4i32_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i32_icmp_ne: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vseq.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vrepli.b $vr1, -1 +; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %cmp = icmp ne <4 x i32> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2i64_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2i64_icmp_ne: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vseq.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vrepli.b $vr1, -1 +; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %cmp = icmp ne <2 x i64> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; Expand SETGE +define void @v16i8_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i8_icmp_sge: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vsle.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %cmp = icmp sge <16 x i8> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i8> + store <16 x i8> %ext, ptr %res + ret void +} + +define void @v8i16_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i16_icmp_sge: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vsle.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %cmp = icmp sge <8 x i16> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i16> + store <8 x i16> %ext, ptr %res + ret void +} + +define void @v4i32_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i32_icmp_sge: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vsle.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %cmp = icmp sge <4 x i32> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2i64_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2i64_icmp_sge: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vsle.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %cmp = icmp sge <2 x i64> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; Expand SETUGE +define void @v16i8_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i8_icmp_uge: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vsle.bu $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %cmp = icmp uge <16 x i8> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i8> + store <16 x i8> %ext, ptr %res + ret void +} + +define void @v8i16_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i16_icmp_uge: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vsle.hu $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %cmp = icmp uge <8 x i16> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i16> + store <8 x i16> %ext, ptr %res + ret void +} + +define void @v4i32_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i32_icmp_uge: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vsle.wu $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %cmp = icmp uge <4 x i32> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2i64_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2i64_icmp_uge: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vsle.du $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %cmp = icmp uge <2 x i64> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; Expand SETGT +define void @v16i8_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i8_icmp_sgt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vslt.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %cmp = icmp sgt <16 x i8> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i8> + store <16 x i8> %ext, ptr %res + ret void +} + +define void @v8i16_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i16_icmp_sgt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vslt.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %cmp = icmp sgt <8 x i16> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i16> + store <8 x i16> %ext, ptr %res + ret void +} + +define void @v4i32_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i32_icmp_sgt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vslt.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %cmp = icmp sgt <4 x i32> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2i64_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2i64_icmp_sgt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vslt.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %cmp = icmp sgt <2 x i64> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; Expand SETUGT +define void @v16i8_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i8_icmp_ugt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vslt.bu $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %cmp = icmp ugt <16 x i8> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i8> + store <16 x i8> %ext, ptr %res + ret void +} + +define void @v8i16_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i16_icmp_ugt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vslt.hu $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %cmp = icmp ugt <8 x i16> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i16> + store <8 x i16> %ext, ptr %res + ret void +} + +define void @v4i32_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i32_icmp_ugt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vslt.wu $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %cmp = icmp ugt <4 x i32> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2i64_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2i64_icmp_ugt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vslt.du $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %cmp = icmp ugt <2 x i64> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll new file mode 100644 index 0000000000000000000000000000000000000000..a9834591aa0e8544c3269f8552df9367eb749767 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll @@ -0,0 +1,196 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @insert_16xi8(ptr %src, ptr %dst, i8 %ins) nounwind { +; CHECK-LABEL: insert_16xi8: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <16 x i8>, ptr %src + %v_new = insertelement <16 x i8> %v, i8 %ins, i32 1 + store <16 x i8> %v_new, ptr %dst + ret void +} + +define void @insert_8xi16(ptr %src, ptr %dst, i16 %ins) nounwind { +; CHECK-LABEL: insert_8xi16: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <8 x i16>, ptr %src + %v_new = insertelement <8 x i16> %v, i16 %ins, i32 1 + store <8 x i16> %v_new, ptr %dst + ret void +} + +define void @insert_4xi32(ptr %src, ptr %dst, i32 %ins) nounwind { +; CHECK-LABEL: insert_4xi32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a2, 1 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <4 x i32>, ptr %src + %v_new = insertelement <4 x i32> %v, i32 %ins, i32 1 + store <4 x i32> %v_new, ptr %dst + ret void +} + +define void @insert_2xi64(ptr %src, ptr %dst, i64 %ins) nounwind { +; CHECK-LABEL: insert_2xi64: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vinsgr2vr.d $vr0, $a2, 1 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <2 x i64>, ptr %src + %v_new = insertelement <2 x i64> %v, i64 %ins, i32 1 + store <2 x i64> %v_new, ptr %dst + ret void +} + +define void @insert_4xfloat(ptr %src, ptr %dst, float %ins) nounwind { +; CHECK-LABEL: insert_4xfloat: +; CHECK: # %bb.0: +; CHECK-NEXT: movfr2gr.s $a2, $fa0 +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a2, 1 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <4 x float>, ptr %src + %v_new = insertelement <4 x float> %v, float %ins, i32 1 + store <4 x float> %v_new, ptr %dst + ret void +} + +define void @insert_2xdouble(ptr %src, ptr %dst, double %ins) nounwind { +; CHECK-LABEL: insert_2xdouble: +; CHECK: # %bb.0: +; CHECK-NEXT: movfr2gr.d $a2, $fa0 +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vinsgr2vr.d $vr0, $a2, 1 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <2 x double>, ptr %src + %v_new = insertelement <2 x double> %v, double %ins, i32 1 + store <2 x double> %v_new, ptr %dst + ret void +} + +define void @insert_16xi8_idx(ptr %src, ptr %dst, i8 %ins, i32 %idx) nounwind { +; CHECK-LABEL: insert_16xi8_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vst $vr0, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a3, 3, 0 +; CHECK-NEXT: st.b $a2, $a0, 0 +; CHECK-NEXT: vld $vr0, $sp, 0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret + %v = load volatile <16 x i8>, ptr %src + %v_new = insertelement <16 x i8> %v, i8 %ins, i32 %idx + store <16 x i8> %v_new, ptr %dst + ret void +} + +define void @insert_8xi16_idx(ptr %src, ptr %dst, i16 %ins, i32 %idx) nounwind { +; CHECK-LABEL: insert_8xi16_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vst $vr0, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a3, 3, 1 +; CHECK-NEXT: st.h $a2, $a0, 0 +; CHECK-NEXT: vld $vr0, $sp, 0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret + %v = load volatile <8 x i16>, ptr %src + %v_new = insertelement <8 x i16> %v, i16 %ins, i32 %idx + store <8 x i16> %v_new, ptr %dst + ret void +} + +define void @insert_4xi32_idx(ptr %src, ptr %dst, i32 %ins, i32 %idx) nounwind { +; CHECK-LABEL: insert_4xi32_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vst $vr0, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a3, 3, 2 +; CHECK-NEXT: st.w $a2, $a0, 0 +; CHECK-NEXT: vld $vr0, $sp, 0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret + %v = load volatile <4 x i32>, ptr %src + %v_new = insertelement <4 x i32> %v, i32 %ins, i32 %idx + store <4 x i32> %v_new, ptr %dst + ret void +} + +define void @insert_2xi64_idx(ptr %src, ptr %dst, i64 %ins, i32 %idx) nounwind { +; CHECK-LABEL: insert_2xi64_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vst $vr0, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a3, 3, 3 +; CHECK-NEXT: st.d $a2, $a0, 0 +; CHECK-NEXT: vld $vr0, $sp, 0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret + %v = load volatile <2 x i64>, ptr %src + %v_new = insertelement <2 x i64> %v, i64 %ins, i32 %idx + store <2 x i64> %v_new, ptr %dst + ret void +} + +define void @insert_4xfloat_idx(ptr %src, ptr %dst, float %ins, i32 %idx) nounwind { +; CHECK-LABEL: insert_4xfloat_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: vld $vr1, $a0, 0 +; CHECK-NEXT: vst $vr1, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a2, 3, 2 +; CHECK-NEXT: fst.s $fa0, $a0, 0 +; CHECK-NEXT: vld $vr0, $sp, 0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret + %v = load volatile <4 x float>, ptr %src + %v_new = insertelement <4 x float> %v, float %ins, i32 %idx + store <4 x float> %v_new, ptr %dst + ret void +} + +define void @insert_2xdouble_idx(ptr %src, ptr %dst, double %ins, i32 %idx) nounwind { +; CHECK-LABEL: insert_2xdouble_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: vld $vr1, $a0, 0 +; CHECK-NEXT: vst $vr1, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a2, 3, 3 +; CHECK-NEXT: fst.d $fa0, $a0, 0 +; CHECK-NEXT: vld $vr0, $sp, 0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret + %v = load volatile <2 x double>, ptr %src + %v_new = insertelement <2 x double> %v, double %ins, i32 %idx + store <2 x double> %v_new, ptr %dst + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/lshr.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/lshr.ll new file mode 100644 index 0000000000000000000000000000000000000000..dada52f93060e10d78ea9d1f524e2e548de55252 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/lshr.ll @@ -0,0 +1,178 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @lshr_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: lshr_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsrl.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %v2 = lshr <16 x i8> %v0, %v1 + store <16 x i8> %v2, ptr %res + ret void +} + +define void @lshr_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: lshr_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsrl.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %v2 = lshr <8 x i16> %v0, %v1 + store <8 x i16> %v2, ptr %res + ret void +} + +define void @lshr_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: lshr_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsrl.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %v2 = lshr <4 x i32> %v0, %v1 + store <4 x i32> %v2, ptr %res + ret void +} + +define void @lshr_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: lshr_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsrl.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %v2 = lshr <2 x i64> %v0, %v1 + store <2 x i64> %v2, ptr %res + ret void +} + +define void @lshr_v16i8_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: lshr_v16i8_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrli.b $vr0, $vr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = lshr <16 x i8> %v0, + store <16 x i8> %v1, ptr %res + ret void +} + +define void @lshr_v16i8_7(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: lshr_v16i8_7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrli.b $vr0, $vr0, 7 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = lshr <16 x i8> %v0, + store <16 x i8> %v1, ptr %res + ret void +} + +define void @lshr_v8i16_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: lshr_v8i16_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrli.h $vr0, $vr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = lshr <8 x i16> %v0, + store <8 x i16> %v1, ptr %res + ret void +} + +define void @lshr_v8i16_15(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: lshr_v8i16_15: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrli.h $vr0, $vr0, 15 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = lshr <8 x i16> %v0, + store <8 x i16> %v1, ptr %res + ret void +} + +define void @lshr_v4i32_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: lshr_v4i32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrli.w $vr0, $vr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = lshr <4 x i32> %v0, + store <4 x i32> %v1, ptr %res + ret void +} + +define void @lshr_v4i32_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: lshr_v4i32_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrli.w $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = lshr <4 x i32> %v0, + store <4 x i32> %v1, ptr %res + ret void +} + +define void @lshr_v2i64_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: lshr_v2i64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrli.d $vr0, $vr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = lshr <2 x i64> %v0, + store <2 x i64> %v1, ptr %res + ret void +} + +define void @lshr_v2i64_63(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: lshr_v2i64_63: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrli.d $vr0, $vr0, 63 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = lshr <2 x i64> %v0, + store <2 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll new file mode 100644 index 0000000000000000000000000000000000000000..d0be9cb7e3c8bdcc07626d86897161d98e5eabaa --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll @@ -0,0 +1,238 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @mul_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mul_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vmul.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %v2 = mul <16 x i8> %v0, %v1 + store <16 x i8> %v2, ptr %res + ret void +} + +define void @mul_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mul_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vmul.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %v2 = mul <8 x i16> %v0, %v1 + store <8 x i16> %v2, ptr %res + ret void +} + +define void @mul_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mul_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vmul.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %v2 = mul <4 x i32> %v0, %v1 + store <4 x i32> %v2, ptr %res + ret void +} + +define void @mul_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mul_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vmul.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %v2 = mul <2 x i64> %v0, %v1 + store <2 x i64> %v2, ptr %res + ret void +} + +define void @mul_square_v16i8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_square_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vmul.b $vr0, $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = mul <16 x i8> %v0, %v0 + store <16 x i8> %v1, ptr %res + ret void +} + +define void @mul_square_v8i16(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_square_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vmul.h $vr0, $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = mul <8 x i16> %v0, %v0 + store <8 x i16> %v1, ptr %res + ret void +} + +define void @mul_square_v4i32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_square_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vmul.w $vr0, $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = mul <4 x i32> %v0, %v0 + store <4 x i32> %v1, ptr %res + ret void +} + +define void @mul_square_v2i64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_square_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vmul.d $vr0, $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = mul <2 x i64> %v0, %v0 + store <2 x i64> %v1, ptr %res + ret void +} + +define void @mul_v16i8_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_v16i8_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslli.b $vr0, $vr0, 3 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = mul <16 x i8> %v0, + store <16 x i8> %v1, ptr %res + ret void +} + +define void @mul_v8i16_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_v8i16_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslli.h $vr0, $vr0, 3 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = mul <8 x i16> %v0, + store <8 x i16> %v1, ptr %res + ret void +} + +define void @mul_v4i32_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_v4i32_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslli.w $vr0, $vr0, 3 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = mul <4 x i32> %v0, + store <4 x i32> %v1, ptr %res + ret void +} + +define void @mul_v2i64_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_v2i64_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslli.d $vr0, $vr0, 3 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = mul <2 x i64> %v0, + store <2 x i64> %v1, ptr %res + ret void +} + +define void @mul_v16i8_17(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_v16i8_17: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrepli.b $vr1, 17 +; CHECK-NEXT: vmul.b $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = mul <16 x i8> %v0, + store <16 x i8> %v1, ptr %res + ret void +} + +define void @mul_v8i16_17(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_v8i16_17: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrepli.h $vr1, 17 +; CHECK-NEXT: vmul.h $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = mul <8 x i16> %v0, + store <8 x i16> %v1, ptr %res + ret void +} + +define void @mul_v4i32_17(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_v4i32_17: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrepli.w $vr1, 17 +; CHECK-NEXT: vmul.w $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = mul <4 x i32> %v0, + store <4 x i32> %v1, ptr %res + ret void +} + +define void @mul_v2i64_17(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_v2i64_17: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrepli.d $vr1, 17 +; CHECK-NEXT: vmul.d $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = mul <2 x i64> %v0, + store <2 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll new file mode 100644 index 0000000000000000000000000000000000000000..f124512acce73deac3fee2138b51b3db3b2bb822 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll @@ -0,0 +1,125 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @or_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: or_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %v2 = or <16 x i8> %v0, %v1 + store <16 x i8> %v2, ptr %res + ret void +} + +define void @or_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: or_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %v2 = or <8 x i16> %v0, %v1 + store <8 x i16> %v2, ptr %res + ret void +} + +define void @or_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: or_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %v2 = or <4 x i32> %v0, %v1 + store <4 x i32> %v2, ptr %res + ret void +} + +define void @or_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: or_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %v2 = or <2 x i64> %v0, %v1 + store <2 x i64> %v2, ptr %res + ret void +} + +define void @or_u_v16i8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: or_u_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vori.b $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = or <16 x i8> %v0, + store <16 x i8> %v1, ptr %res + ret void +} + +define void @or_u_v8i16(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: or_u_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrepli.h $vr1, 31 +; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = or <8 x i16> %v0, + store <8 x i16> %v1, ptr %res + ret void +} + +define void @or_u_v4i32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: or_u_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrepli.w $vr1, 31 +; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = or <4 x i32> %v0, + store <4 x i32> %v1, ptr %res + ret void +} + +define void @or_u_v2i64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: or_u_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrepli.d $vr1, 31 +; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = or <2 x i64> %v0, + store <2 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sdiv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sdiv.ll new file mode 100644 index 0000000000000000000000000000000000000000..b68f73a749135d6a46f73e49ac9cb5e3d5ea1971 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sdiv.ll @@ -0,0 +1,134 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @sdiv_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: sdiv_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vdiv.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %v2 = sdiv <16 x i8> %v0, %v1 + store <16 x i8> %v2, ptr %res + ret void +} + +define void @sdiv_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: sdiv_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vdiv.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %v2 = sdiv <8 x i16> %v0, %v1 + store <8 x i16> %v2, ptr %res + ret void +} + +define void @sdiv_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: sdiv_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vdiv.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %v2 = sdiv <4 x i32> %v0, %v1 + store <4 x i32> %v2, ptr %res + ret void +} + +define void @sdiv_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: sdiv_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vdiv.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %v2 = sdiv <2 x i64> %v0, %v1 + store <2 x i64> %v2, ptr %res + ret void +} + +define void @sdiv_v16i8_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sdiv_v16i8_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrai.b $vr1, $vr0, 7 +; CHECK-NEXT: vsrli.b $vr1, $vr1, 5 +; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1 +; CHECK-NEXT: vsrai.b $vr0, $vr0, 3 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = sdiv <16 x i8> %v0, + store <16 x i8> %v1, ptr %res + ret void +} + +define void @sdiv_v8i16_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sdiv_v8i16_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrai.h $vr1, $vr0, 15 +; CHECK-NEXT: vsrli.h $vr1, $vr1, 13 +; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1 +; CHECK-NEXT: vsrai.h $vr0, $vr0, 3 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = sdiv <8 x i16> %v0, + store <8 x i16> %v1, ptr %res + ret void +} + +define void @sdiv_v4i32_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sdiv_v4i32_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrai.w $vr1, $vr0, 31 +; CHECK-NEXT: vsrli.w $vr1, $vr1, 29 +; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1 +; CHECK-NEXT: vsrai.w $vr0, $vr0, 3 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = sdiv <4 x i32> %v0, + store <4 x i32> %v1, ptr %res + ret void +} + +define void @sdiv_v2i64_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sdiv_v2i64_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrai.d $vr1, $vr0, 63 +; CHECK-NEXT: vsrli.d $vr1, $vr1, 61 +; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: vsrai.d $vr0, $vr0, 3 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = sdiv <2 x i64> %v0, + store <2 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shl.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shl.ll new file mode 100644 index 0000000000000000000000000000000000000000..fa0aebaf28b3c5eef7c2cff40ae0e824ae66180b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shl.ll @@ -0,0 +1,178 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @shl_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: shl_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsll.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %v2 = shl <16 x i8> %v0, %v1 + store <16 x i8> %v2, ptr %res + ret void +} + +define void @shl_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: shl_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsll.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %v2 = shl <8 x i16> %v0, %v1 + store <8 x i16> %v2, ptr %res + ret void +} + +define void @shl_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: shl_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsll.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %v2 = shl <4 x i32> %v0, %v1 + store <4 x i32> %v2, ptr %res + ret void +} + +define void @shl_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: shl_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsll.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %v2 = shl <2 x i64> %v0, %v1 + store <2 x i64> %v2, ptr %res + ret void +} + +define void @shl_v16i8_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: shl_v16i8_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslli.b $vr0, $vr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = shl <16 x i8> %v0, + store <16 x i8> %v1, ptr %res + ret void +} + +define void @shl_v16i8_7(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: shl_v16i8_7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslli.b $vr0, $vr0, 7 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = shl <16 x i8> %v0, + store <16 x i8> %v1, ptr %res + ret void +} + +define void @shl_v8i16_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: shl_v8i16_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslli.h $vr0, $vr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = shl <8 x i16> %v0, + store <8 x i16> %v1, ptr %res + ret void +} + +define void @shl_v8i16_15(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: shl_v8i16_15: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslli.h $vr0, $vr0, 15 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = shl <8 x i16> %v0, + store <8 x i16> %v1, ptr %res + ret void +} + +define void @shl_v4i32_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: shl_v4i32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslli.w $vr0, $vr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = shl <4 x i32> %v0, + store <4 x i32> %v1, ptr %res + ret void +} + +define void @shl_v4i32_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: shl_v4i32_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslli.w $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = shl <4 x i32> %v0, + store <4 x i32> %v1, ptr %res + ret void +} + +define void @shl_v2i64_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: shl_v2i64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslli.d $vr0, $vr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = shl <2 x i64> %v0, + store <2 x i64> %v1, ptr %res + ret void +} + +define void @shl_v2i64_63(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: shl_v2i64_63: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslli.d $vr0, $vr0, 63 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = shl <2 x i64> %v0, + store <2 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sitofp.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sitofp.ll new file mode 100644 index 0000000000000000000000000000000000000000..1e820a37a2409eae7187ddd97e4b5b021ac51365 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sitofp.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @sitofp_v4i32_v4f32(ptr %res, ptr %in){ +; CHECK-LABEL: sitofp_v4i32_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vffint.s.w $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %in + %v1 = sitofp <4 x i32> %v0 to <4 x float> + store <4 x float> %v1, ptr %res + ret void +} + +define void @sitofp_v2i64_v2f64(ptr %res, ptr %in){ +; CHECK-LABEL: sitofp_v2i64_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vffint.d.l $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %in + %v1 = sitofp <2 x i64> %v0 to <2 x double> + store <2 x double> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll new file mode 100644 index 0000000000000000000000000000000000000000..25b4623a47d1fc15b1361a4ba843fc2c45349de3 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @sub_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: sub_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsub.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %v2 = sub <16 x i8> %v0, %v1 + store <16 x i8> %v2, ptr %res + ret void +} + +define void @sub_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: sub_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsub.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %v2 = sub <8 x i16> %v0, %v1 + store <8 x i16> %v2, ptr %res + ret void +} + +define void @sub_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: sub_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsub.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %v2 = sub <4 x i32> %v0, %v1 + store <4 x i32> %v2, ptr %res + ret void +} + +define void @sub_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: sub_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsub.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %v2 = sub <2 x i64> %v0, %v1 + store <2 x i64> %v2, ptr %res + ret void +} + +define void @sub_v16i8_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sub_v16i8_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsubi.bu $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = sub <16 x i8> %v0, + store <16 x i8> %v1, ptr %res + ret void +} + +define void @sub_v8i16_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sub_v8i16_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsubi.hu $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = sub <8 x i16> %v0, + store <8 x i16> %v1, ptr %res + ret void +} + +define void @sub_v4i32_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sub_v4i32_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsubi.wu $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = sub <4 x i32> %v0, + store <4 x i32> %v1, ptr %res + ret void +} + +define void @sub_v2i64_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sub_v2i64_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsubi.du $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = sub <2 x i64> %v0, + store <2 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/udiv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/udiv.ll new file mode 100644 index 0000000000000000000000000000000000000000..abb60b91dd488f548b8b23e46063b5e6cad4931e --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/udiv.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @udiv_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: udiv_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vdiv.bu $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %v2 = udiv <16 x i8> %v0, %v1 + store <16 x i8> %v2, ptr %res + ret void +} + +define void @udiv_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: udiv_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vdiv.hu $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %v2 = udiv <8 x i16> %v0, %v1 + store <8 x i16> %v2, ptr %res + ret void +} + +define void @udiv_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: udiv_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vdiv.wu $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %v2 = udiv <4 x i32> %v0, %v1 + store <4 x i32> %v2, ptr %res + ret void +} + +define void @udiv_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: udiv_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vdiv.du $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %v2 = udiv <2 x i64> %v0, %v1 + store <2 x i64> %v2, ptr %res + ret void +} + +define void @udiv_v16i8_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: udiv_v16i8_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrli.b $vr0, $vr0, 3 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = udiv <16 x i8> %v0, + store <16 x i8> %v1, ptr %res + ret void +} + +define void @udiv_v8i16_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: udiv_v8i16_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrli.h $vr0, $vr0, 3 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = udiv <8 x i16> %v0, + store <8 x i16> %v1, ptr %res + ret void +} + +define void @udiv_v4i32_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: udiv_v4i32_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrli.w $vr0, $vr0, 3 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = udiv <4 x i32> %v0, + store <4 x i32> %v1, ptr %res + ret void +} + +define void @udiv_v2i64_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: udiv_v2i64_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrli.d $vr0, $vr0, 3 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = udiv <2 x i64> %v0, + store <2 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/uitofp.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/uitofp.ll new file mode 100644 index 0000000000000000000000000000000000000000..3d4913f12e57e1400ba3a2700ab7a1a6f7ef7037 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/uitofp.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @uitofp_v4i32_v4f32(ptr %res, ptr %in){ +; CHECK-LABEL: uitofp_v4i32_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vffint.s.wu $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %in + %v1 = uitofp <4 x i32> %v0 to <4 x float> + store <4 x float> %v1, ptr %res + ret void +} + +define void @uitofp_v2i64_v2f64(ptr %res, ptr %in){ +; CHECK-LABEL: uitofp_v2i64_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vffint.d.lu $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %in + %v1 = uitofp <2 x i64> %v0 to <2 x double> + store <2 x double> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll new file mode 100644 index 0000000000000000000000000000000000000000..ce3e49c990ffb014baac4bfbe2607b261c231dba --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll @@ -0,0 +1,125 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @xor_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: xor_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %v2 = xor <16 x i8> %v0, %v1 + store <16 x i8> %v2, ptr %res + ret void +} + +define void @xor_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: xor_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %v2 = xor <8 x i16> %v0, %v1 + store <8 x i16> %v2, ptr %res + ret void +} + +define void @xor_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: xor_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %v2 = xor <4 x i32> %v0, %v1 + store <4 x i32> %v2, ptr %res + ret void +} + +define void @xor_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: xor_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %v2 = xor <2 x i64> %v0, %v1 + store <2 x i64> %v2, ptr %res + ret void +} + +define void @xor_u_v16i8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: xor_u_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vxori.b $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = xor <16 x i8> %v0, + store <16 x i8> %v1, ptr %res + ret void +} + +define void @xor_u_v8i16(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: xor_u_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrepli.h $vr1, 31 +; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = xor <8 x i16> %v0, + store <8 x i16> %v1, ptr %res + ret void +} + +define void @xor_u_v4i32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: xor_u_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrepli.w $vr1, 31 +; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = xor <4 x i32> %v0, + store <4 x i32> %v1, ptr %res + ret void +} + +define void @xor_u_v2i64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: xor_u_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrepli.d $vr1, 31 +; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = xor <2 x i64> %v0, + store <2 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/mulh.ll b/llvm/test/CodeGen/LoongArch/lsx/mulh.ll new file mode 100644 index 0000000000000000000000000000000000000000..e1388f00e355fb81d68f7c6da9b7bdaa6827b545 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/mulh.ll @@ -0,0 +1,162 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @mulhs_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mulhs_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vmuh.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %v0s = sext <16 x i8> %v0 to <16 x i16> + %v1s = sext <16 x i8> %v1 to <16 x i16> + %m = mul <16 x i16> %v0s, %v1s + %s = ashr <16 x i16> %m, + %v2 = trunc <16 x i16> %s to <16 x i8> + store <16 x i8> %v2, ptr %res + ret void +} + +define void @mulhu_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mulhu_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vmuh.bu $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %v0z = zext <16 x i8> %v0 to <16 x i16> + %v1z = zext <16 x i8> %v1 to <16 x i16> + %m = mul <16 x i16> %v0z, %v1z + %s = lshr <16 x i16> %m, + %v2 = trunc <16 x i16> %s to <16 x i8> + store <16 x i8> %v2, ptr %res + ret void +} + +define void @mulhs_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mulhs_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vmuh.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %v0s = sext <8 x i16> %v0 to <8 x i32> + %v1s = sext <8 x i16> %v1 to <8 x i32> + %m = mul <8 x i32> %v0s, %v1s + %s = ashr <8 x i32> %m, + %v2 = trunc <8 x i32> %s to <8 x i16> + store <8 x i16> %v2, ptr %res + ret void +} + +define void @mulhu_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mulhu_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vmuh.hu $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %v0z = zext <8 x i16> %v0 to <8 x i32> + %v1z = zext <8 x i16> %v1 to <8 x i32> + %m = mul <8 x i32> %v0z, %v1z + %s = lshr <8 x i32> %m, + %v2 = trunc <8 x i32> %s to <8 x i16> + store <8 x i16> %v2, ptr %res + ret void +} + +define void @mulhs_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mulhs_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vmuh.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %v0s = sext <4 x i32> %v0 to <4 x i64> + %v1s = sext <4 x i32> %v1 to <4 x i64> + %m = mul <4 x i64> %v0s, %v1s + %s = ashr <4 x i64> %m, + %v2 = trunc <4 x i64> %s to <4 x i32> + store <4 x i32> %v2, ptr %res + ret void +} + +define void @mulhu_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mulhu_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vmuh.wu $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %v0z = zext <4 x i32> %v0 to <4 x i64> + %v1z = zext <4 x i32> %v1 to <4 x i64> + %m = mul <4 x i64> %v0z, %v1z + %s = lshr <4 x i64> %m, + %v2 = trunc <4 x i64> %s to <4 x i32> + store <4 x i32> %v2, ptr %res + ret void +} + +define void @mulhs_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mulhs_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vmuh.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %v0s = sext <2 x i64> %v0 to <2 x i128> + %v1s = sext <2 x i64> %v1 to <2 x i128> + %m = mul <2 x i128> %v0s, %v1s + %s = ashr <2 x i128> %m, + %v2 = trunc <2 x i128> %s to <2 x i64> + store <2 x i64> %v2, ptr %res + ret void +} + +define void @mulhu_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mulhu_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vmuh.du $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %v0z = zext <2 x i64> %v0 to <2 x i128> + %v1z = zext <2 x i64> %v1 to <2 x i128> + %m = mul <2 x i128> %v0z, %v1z + %s = lshr <2 x i128> %m, + %v2 = trunc <2 x i128> %s to <2 x i64> + store <2 x i64> %v2, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll new file mode 100644 index 0000000000000000000000000000000000000000..746152f0f0264190d5b3f3725dc477132512db88 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll @@ -0,0 +1,86 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @select_v16i8_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: select_v16i8_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrepli.h $vr1, -256 +; CHECK-NEXT: vbitseli.b $vr1, $vr0, 255 +; CHECK-NEXT: vst $vr1, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %sel = select <16 x i1> , <16 x i8> , <16 x i8> %v0 + store <16 x i8> %sel, ptr %res + ret void +} + +define void @select_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vrepli.h $vr2, -256 +; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %sel = select <16 x i1> , <16 x i8> %v0, <16 x i8> %v1 + store <16 x i8> %sel, ptr %res + ret void +} + +define void @select_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: lu12i.w $a1, -16 +; CHECK-NEXT: lu32i.d $a1, 0 +; CHECK-NEXT: vreplgr2vr.w $vr2, $a1 +; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %sel = select <8 x i1> , <8 x i16> %v0, <8 x i16> %v1 + store <8 x i16> %sel, ptr %res + ret void +} + +define void @select_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: ori $a1, $zero, 0 +; CHECK-NEXT: lu32i.d $a1, -1 +; CHECK-NEXT: vreplgr2vr.d $vr2, $a1 +; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %sel = select <4 x i1> , <4 x i32> %v0, <4 x i32> %v1 + store <4 x i32> %sel, ptr %res + ret void +} + +define void @select_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_0) +; CHECK-NEXT: addi.d $a3, $a3, %pc_lo12(.LCPI4_0) +; CHECK-NEXT: vld $vr0, $a3, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vld $vr2, $a2, 0 +; CHECK-NEXT: vbitsel.v $vr0, $vr2, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %sel = select <2 x i1> , <2 x i64> %v0, <2 x i64> %v1 + store <2 x i64> %sel, ptr %res + ret void +} diff --git a/llvm/test/MC/LoongArch/lasx/invalid-imm.s b/llvm/test/MC/LoongArch/lasx/invalid-imm.s index 5c61a7a4200913fc42b0dae770611c6af7ae9908..6f64a6f87802be967a1444c41fade9db1dec5484 100644 --- a/llvm/test/MC/LoongArch/lasx/invalid-imm.s +++ b/llvm/test/MC/LoongArch/lasx/invalid-imm.s @@ -3,53 +3,1190 @@ # RUN: not llvm-mc --triple=loongarch64 %s 2>&1 | FileCheck %s ## uimm1 +xvrepl128vei.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 1] + xvrepl128vei.d $xr0, $xr1, 2 # CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 1] +## uimm2 +xvpickve.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] + +xvpickve.d $xr0, $xr1, 4 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] + +xvinsve0.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] + +xvinsve0.d $xr0, $xr1, 4 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] + +xvinsgr2vr.d $xr0, $a0, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 3] + +xvinsgr2vr.d $xr0, $a0, 4 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 3] + +xvpickve2gr.d $a0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 3] + +xvpickve2gr.d $a0, $xr1, 4 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 3] + +xvpickve2gr.du $a0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 3] + +xvpickve2gr.du $a0, $xr1, 4 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 3] + +xvstelm.d $xr0, $a0, 8, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 3] + +xvstelm.d $xr0, $a0, 8, 4 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 3] + +xvrepl128vei.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 3] + +xvrepl128vei.w $xr0, $xr1, 4 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 3] + +## uimm3 +xvpickve.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] + +xvpickve.w $xr0, $xr1, 8 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] + +xvinsve0.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] + +xvinsve0.w $xr0, $xr1, 8 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] + +xvinsgr2vr.w $xr0, $a0, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] + +xvinsgr2vr.w $xr0, $a0, 8 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] + +xvpickve2gr.wu $a0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 7] + +xvpickve2gr.wu $a0, $xr1, 8 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 7] + +xvpickve2gr.w $a0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 7] + +xvpickve2gr.w $a0, $xr1, 8 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 7] + +xvstelm.w $xr0, $a0, 4, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] + +xvstelm.w $xr0, $a0, 4, 8 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] + +xvrepl128vei.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 7] + +xvrepl128vei.h $xr0, $xr1, 8 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 7] + +xvbitrevi.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] + +xvbitrevi.b $xr0, $xr1, 8 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] + +xvbitseti.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] + +xvbitseti.b $xr0, $xr1, 8 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] + +xvbitclri.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] + +xvbitclri.b $xr0, $xr1, 8 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] + +xvsrari.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 7] + +xvsrari.b $xr0, $xr1, 8 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 7] + +xvsrlri.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 7] + +xvsrlri.b $xr0, $xr1, 8 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 7] + +xvsllwil.hu.bu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 7] + +xvsllwil.hu.bu $xr0, $xr1, 8 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 7] + +xvsllwil.h.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 7] + +xvsllwil.h.b $xr0, $xr1, 8 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 7] + +xvrotri.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 7] + +xvrotri.b $xr0, $xr1, 8 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 7] + +xvsrai.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] + +xvsrai.b $xr0, $xr1, 8 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] + +xvsrli.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] + +xvsrli.b $xr0, $xr1, 8 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] + +xvslli.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] + +xvslli.b $xr0, $xr1, 8 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] + +xvsat.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] + +xvsat.b $xr0, $xr1, 8 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] + +xvsat.bu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] + +xvsat.bu $xr0, $xr1, 8 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] + ## uimm4 +xvstelm.h $xr0, $a0, 2, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +xvstelm.h $xr0, $a0, 2, 16 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +xvrepl128vei.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] + +xvrepl128vei.b $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] + +xvbitrevi.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +xvbitrevi.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +xvbitseti.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +xvbitseti.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +xvbitclri.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +xvbitclri.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +xvssrarni.bu.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] + +xvssrarni.bu.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] + +xvssrlrni.bu.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] + +xvssrlrni.bu.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] + +xvssrarni.b.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] + +xvssrarni.b.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] + +xvssrlrni.b.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] + +xvssrlrni.b.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] + +xvssrani.bu.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] + +xvssrani.bu.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] + +xvssrlni.bu.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] + +xvssrlni.bu.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] + +xvssrani.b.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +xvssrani.b.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +xvssrlni.b.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +xvssrlni.b.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +xvsrarni.b.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +xvsrarni.b.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +xvsrlrni.b.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +xvsrlrni.b.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +xvsrani.b.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +xvsrani.b.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +xvsrlni.b.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +xvsrlni.b.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +xvsrari.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 15] + +xvsrari.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 15] + +xvsrlri.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 15] + +xvsrlri.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 15] + +xvsllwil.wu.hu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] + +xvsllwil.wu.hu $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] + +xvsllwil.w.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +xvsllwil.w.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +xvrotri.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 15] + +xvrotri.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 15] + +xvsrai.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] + +xvsrai.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] + +xvsrli.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] + +xvsrli.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] + +xvslli.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] + +xvslli.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] + +xvsat.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] + xvsat.h $xr0, $xr1, 16 # CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] +xvsat.hu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] + +xvsat.hu $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] + +## uimm5 +xvstelm.b $xr0, $a0, 1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +xvstelm.b $xr0, $a0, 1, 32 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +xvbsrl.v $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +xvbsrl.v $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +xvbsll.v $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +xvbsll.v $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +xvslti.du $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvslti.du $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvslti.wu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvslti.wu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvslti.hu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvslti.hu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvslti.bu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvslti.bu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvslei.du $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvslei.du $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvslei.wu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvslei.wu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvslei.hu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvslei.hu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvslei.bu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvslei.bu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvfrstpi.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] + +xvfrstpi.h $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] + +xvfrstpi.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] + +xvfrstpi.b $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] + +xvbitrevi.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +xvbitrevi.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +xvbitseti.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +xvbitseti.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +xvbitclri.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +xvbitclri.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +xvssrarni.hu.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 31] + +xvssrarni.hu.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 31] + +xvssrlrni.hu.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 31] + +xvssrlrni.hu.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 31] + +xvssrarni.h.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] + +xvssrarni.h.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] + +xvssrlrni.h.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] + +xvssrlrni.h.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] + +xvssrani.hu.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] + +xvssrani.hu.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] + +xvssrlni.hu.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] + +xvssrlni.hu.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] + +xvssrani.h.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] + +xvssrani.h.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] + +xvssrlni.h.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] + +xvssrlni.h.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] + +xvsrarni.h.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] + +xvsrarni.h.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] + +xvsrlrni.h.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] + +xvsrlrni.h.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] + +xvsrani.h.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +xvsrani.h.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +xvsrlni.h.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +xvsrlni.h.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +xvsrari.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvsrari.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvsrlri.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvsrlri.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvsllwil.du.wu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 31] + +xvsllwil.du.wu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 31] + +xvsllwil.d.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] + +xvsllwil.d.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] + +xvrotri.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvrotri.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvsrai.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +xvsrai.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +xvsrli.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +xvsrli.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +xvslli.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +xvslli.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +xvaddi.bu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvaddi.bu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvaddi.hu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvaddi.hu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvaddi.wu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvaddi.wu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvaddi.du $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvaddi.du $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvsubi.bu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvsubi.bu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvsubi.hu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvsubi.hu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvsubi.wu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvsubi.wu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvsubi.du $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvsubi.du $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvmaxi.bu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvmaxi.bu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvmaxi.hu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvmaxi.hu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvmaxi.wu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvmaxi.wu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvmaxi.du $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvmaxi.du $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvmini.bu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvmini.bu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvmini.hu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvmini.hu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvmini.wu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvmini.wu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvmini.du $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvmini.du $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvsat.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] + +xvsat.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] + +xvsat.wu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +xvsat.wu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + ## simm5 +xvslti.d $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvslti.d $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvslti.w $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvslti.w $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvslti.h $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvslti.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvslti.b $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvslti.b $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvslei.d $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvslei.d $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvslei.w $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvslei.w $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvslei.h $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvslei.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvslei.b $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvslei.b $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvseqi.d $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvseqi.d $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvseqi.w $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvseqi.w $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvseqi.h $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvseqi.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvseqi.b $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + xvseqi.b $xr0, $xr1, 16 # CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] +xvmaxi.b $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvmaxi.b $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvmaxi.h $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvmaxi.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvmaxi.w $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvmaxi.w $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvmaxi.d $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvmaxi.d $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvmini.b $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvmini.b $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvmini.h $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvmini.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvmini.w $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvmini.w $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvmini.d $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvmini.d $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +## uimm6 +xvbitrevi.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] + +xvbitrevi.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] + +xvbitseti.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] + +xvbitseti.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] + +xvbitclri.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] + +xvbitclri.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] + +xvssrarni.wu.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 63] + +xvssrarni.wu.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 63] + +xvssrlrni.wu.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 63] + +xvssrlrni.wu.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 63] + +xvssrarni.w.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] + +xvssrarni.w.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] + +xvssrlrni.w.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] + +xvssrlrni.w.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] + +xvssrani.wu.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] + +xvssrani.wu.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] + +xvssrlni.wu.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] + +xvssrlni.wu.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] + +xvssrani.w.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] + +xvssrani.w.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] + +xvssrlni.w.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] + +xvssrlni.w.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] + +xvsrarni.w.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] + +xvsrarni.w.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] + +xvsrlrni.w.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] + +xvsrlrni.w.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] + +xvsrani.w.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] + +xvsrani.w.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] + +xvsrlni.w.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] + +xvsrlni.w.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] + +xvsrari.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 63] + +xvsrari.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 63] + +xvsrlri.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 63] + +xvsrlri.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 63] + +xvrotri.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 63] + +xvrotri.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 63] + +xvsrai.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] + +xvsrai.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] + +xvsrli.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] + +xvsrli.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] + +xvslli.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] + +xvslli.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] + +xvsat.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] + +xvsat.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] + +xvsat.du $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] + +xvsat.du $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] + ## uimm7 +xvssrarni.du.q $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 127] + +xvssrarni.du.q $xr0, $xr1, 128 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 127] + +xvssrlrni.du.q $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 127] + +xvssrlrni.du.q $xr0, $xr1, 128 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 127] + +xvssrarni.d.q $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] + +xvssrarni.d.q $xr0, $xr1, 128 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] + +xvssrlrni.d.q $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] + +xvssrlrni.d.q $xr0, $xr1, 128 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] + +xvssrani.du.q $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] + +xvssrani.du.q $xr0, $xr1, 128 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] + +xvssrlni.du.q $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] + +xvssrlni.du.q $xr0, $xr1, 128 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] + +xvssrani.d.q $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] + +xvssrani.d.q $xr0, $xr1, 128 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] + +xvssrlni.d.q $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] + +xvssrlni.d.q $xr0, $xr1, 128 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] + +xvsrarni.d.q $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] + +xvsrarni.d.q $xr0, $xr1, 128 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] + +xvsrlrni.d.q $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] + +xvsrlrni.d.q $xr0, $xr1, 128 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] + +xvsrani.d.q $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] + +xvsrani.d.q $xr0, $xr1, 128 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] + +xvsrlni.d.q $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] + xvsrlni.d.q $xr0, $xr1, 128 # CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] -## simm8 +## uimm8 +xvextrins.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] + +xvextrins.d $xr0, $xr1, 256 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] + +xvextrins.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] + +xvextrins.w $xr0, $xr1, 256 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] + +xvextrins.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] + +xvextrins.h $xr0, $xr1, 256 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] + +xvextrins.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] + +xvextrins.b $xr0, $xr1, 256 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] + +xvpermi.q $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] + +xvpermi.q $xr0, $xr1, 256 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] + +xvpermi.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] + +xvpermi.d $xr0, $xr1, 256 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] + +xvpermi.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] + xvpermi.w $xr0, $xr1, 256 # CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] +xvshuf4i.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] + +xvshuf4i.d $xr0, $xr1, 256 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] + +xvshuf4i.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] + +xvshuf4i.w $xr0, $xr1, 256 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] + +xvshuf4i.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] + +xvshuf4i.h $xr0, $xr1, 256 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] + +xvshuf4i.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] + +xvshuf4i.b $xr0, $xr1, 256 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] + +xvbitseli.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] + +xvbitseli.b $xr0, $xr1, 256 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] + +xvandi.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] + +xvandi.b $xr0, $xr1, 256 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] + +xvori.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] + +xvori.b $xr0, $xr1, 256 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] + +xvxori.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] + +xvxori.b $xr0, $xr1, 256 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] + +xvnori.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] + +xvnori.b $xr0, $xr1, 256 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] + +## simm8 +xvstelm.b $xr0, $a0, -129, 1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-128, 127] + +xvstelm.b $xr0, $a0, 128, 1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-128, 127] + ## simm8_lsl1 -xvstelm.h $xr0, $a0, 255, 1 +xvstelm.h $xr0, $a0, -258, 1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 2 in the range [-256, 254] + +xvstelm.h $xr0, $a0, 256, 1 # CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 2 in the range [-256, 254] ## simm8_lsl2 -xvstelm.w $xr0, $a0, 512, 1 +xvstelm.w $xr0, $a0, -516, 1 # CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 4 in the range [-512, 508] -## simm10 -xvrepli.b $xr0, 512 -# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] +xvstelm.w $xr0, $a0, 512, 1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 4 in the range [-512, 508] ## simm8_lsl3 +xvstelm.d $xr0, $a0, -1032, 1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 8 in the range [-1024, 1016] + xvstelm.d $xr0, $a0, 1024, 1 # CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 8 in the range [-1024, 1016] ## simm9_lsl3 +xvldrepl.d $xr0, $a0, -2056 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be a multiple of 8 in the range [-2048, 2040] + xvldrepl.d $xr0, $a0, 2048 # CHECK: :[[#@LINE-1]]:23: error: immediate must be a multiple of 8 in the range [-2048, 2040] ## simm10_lsl2 +xvldrepl.w $xr0, $a0, -2052 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be a multiple of 4 in the range [-2048, 2044] + xvldrepl.w $xr0, $a0, 2048 # CHECK: :[[#@LINE-1]]:23: error: immediate must be a multiple of 4 in the range [-2048, 2044] +## simm10 +xvrepli.b $xr0, -513 +# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] + +xvrepli.b $xr0, 512 +# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] + +xvrepli.h $xr0, -513 +# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] + +xvrepli.h $xr0, 512 +# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] + +xvrepli.w $xr0, -513 +# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] + +xvrepli.w $xr0, 512 +# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] + +xvrepli.d $xr0, -513 +# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] + +xvrepli.d $xr0, 512 +# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] + ## simm11_lsl1 +xvldrepl.h $xr0, $a0, -2050 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be a multiple of 2 in the range [-2048, 2046] + xvldrepl.h $xr0, $a0, 2048 # CHECK: :[[#@LINE-1]]:23: error: immediate must be a multiple of 2 in the range [-2048, 2046] +## simm12 +xvldrepl.b $xr0, $a0, -2049 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [-2048, 2047] + +xvldrepl.b $xr0, $a0, 2048 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [-2048, 2047] + +xvst $xr0, $a0, -2049 +# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-2048, 2047] + +xvst $xr0, $a0, 2048 +# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-2048, 2047] + +xvld $xr0, $a0, -2049 +# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-2048, 2047] + +xvld $xr0, $a0, 2048 +# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-2048, 2047] + ## simm13 +xvldi $xr0, -4097 +# CHECK: :[[#@LINE-1]]:13: error: immediate must be an integer in the range [-4096, 4095] + xvldi $xr0, 4096 # CHECK: :[[#@LINE-1]]:13: error: immediate must be an integer in the range [-4096, 4095] diff --git a/llvm/test/MC/LoongArch/lsx/invalid-imm.s b/llvm/test/MC/LoongArch/lsx/invalid-imm.s index fb7e24c8348817d75e500e0d382273191a0ca316..c3f9aaa082818ee7b5a5a1a70fba325474ed82ca 100644 --- a/llvm/test/MC/LoongArch/lsx/invalid-imm.s +++ b/llvm/test/MC/LoongArch/lsx/invalid-imm.s @@ -3,53 +3,1190 @@ # RUN: not llvm-mc --triple=loongarch64 %s 2>&1 | FileCheck %s ## uimm1 +vstelm.d $vr0, $a0, 8, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 1] + +vstelm.d $vr0, $a0, 8, 2 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 1] + +vreplvei.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 1] + vreplvei.d $vr0, $vr1, 2 # CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 1] +vpickve2gr.du $a0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 1] + +vpickve2gr.du $a0, $vr1, 2 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 1] + +vpickve2gr.d $a0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 1] + +vpickve2gr.d $a0, $vr1, 2 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 1] + +vinsgr2vr.d $vr0, $a0, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 1] + +vinsgr2vr.d $vr0, $a0, 2 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 1] + +## uimm2 +vstelm.w $vr0, $a0, 4, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] + +vstelm.w $vr0, $a0, 4, 4 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] + +vreplvei.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] + +vreplvei.w $vr0, $vr1, 4 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] + +vpickve2gr.wu $a0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 3] + +vpickve2gr.wu $a0, $vr1, 4 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 3] + +vpickve2gr.w $a0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 3] + +vpickve2gr.w $a0, $vr1, 4 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 3] + +vinsgr2vr.w $vr0, $a0, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] + +vinsgr2vr.w $vr0, $a0, 4 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] + +## uimm3 +vstelm.h $vr0, $a0, 2, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] + +vstelm.h $vr0, $a0, 2, 8 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] + +vreplvei.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] + +vreplvei.h $vr0, $vr1, 8 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] + +vpickve2gr.hu $a0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 7] + +vpickve2gr.hu $a0, $vr1, 8 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 7] + +vpickve2gr.h $a0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] + +vpickve2gr.h $a0, $vr1, 8 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] + +vinsgr2vr.h $vr0, $a0, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] + +vinsgr2vr.h $vr0, $a0, 8 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] + +vbitrevi.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] + +vbitrevi.b $vr0, $vr1, 8 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] + +vbitseti.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] + +vbitseti.b $vr0, $vr1, 8 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] + +vbitclri.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] + +vbitclri.b $vr0, $vr1, 8 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] + +vsrari.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] + +vsrari.b $vr0, $vr1, 8 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] + +vsrlri.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] + +vsrlri.b $vr0, $vr1, 8 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] + +vsllwil.hu.bu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 7] + +vsllwil.hu.bu $vr0, $vr1, 8 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 7] + +vsllwil.h.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] + +vsllwil.h.b $vr0, $vr1, 8 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] + +vrotri.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] + +vrotri.b $vr0, $vr1, 8 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] + +vsrai.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] + +vsrai.b $vr0, $vr1, 8 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] + +vsrli.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] + +vsrli.b $vr0, $vr1, 8 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] + +vslli.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] + +vslli.b $vr0, $vr1, 8 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] + +vsat.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 7] + +vsat.b $vr0, $vr1, 8 +# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 7] + +vsat.bu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] + +vsat.bu $vr0, $vr1, 8 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] + ## uimm4 +vstelm.b $vr0, $a0, 1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] + +vstelm.b $vr0, $a0, 1, 16 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] + +vreplvei.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] + +vreplvei.b $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] + +vpickve2gr.bu $a0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +vpickve2gr.bu $a0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +vpickve2gr.b $a0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +vpickve2gr.b $a0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +vinsgr2vr.b $vr0, $a0, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] + +vinsgr2vr.b $vr0, $a0, 16 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] + +vbitrevi.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] + +vbitrevi.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] + +vbitseti.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] + +vbitseti.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] + +vbitclri.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] + +vbitclri.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] + +vssrarni.bu.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] + +vssrarni.bu.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] + +vssrlrni.bu.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] + +vssrlrni.bu.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] + +vssrarni.b.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +vssrarni.b.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +vssrlrni.b.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +vssrlrni.b.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +vssrani.bu.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +vssrani.bu.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +vssrlni.bu.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +vssrlni.bu.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +vssrani.b.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +vssrani.b.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +vssrlni.b.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +vssrlni.b.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +vsrarni.b.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +vsrarni.b.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +vsrlrni.b.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +vsrlrni.b.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +vsrani.b.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] + +vsrani.b.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] + +vsrlni.b.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] + +vsrlni.b.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] + +vsrari.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] + +vsrari.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] + +vsrlri.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] + +vsrlri.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] + +vsllwil.wu.hu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] + +vsllwil.wu.hu $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] + +vsllwil.w.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +vsllwil.w.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +vrotri.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] + +vrotri.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] + +vsrai.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] + +vsrai.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] + +vsrli.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] + +vsrli.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] + +vslli.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] + +vslli.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] + +vsat.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 15] + vsat.h $vr0, $vr1, 16 # CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 15] +vsat.hu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] + +vsat.hu $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] + +## uimm5 +vbsrl.v $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] + +vbsrl.v $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] + +vbsll.v $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] + +vbsll.v $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] + +vslti.du $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vslti.du $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vslti.wu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vslti.wu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vslti.hu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vslti.hu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vslti.bu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vslti.bu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vslei.du $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vslei.du $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vslei.wu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vslei.wu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vslei.hu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vslei.hu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vslei.bu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vslei.bu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vfrstpi.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +vfrstpi.h $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +vfrstpi.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +vfrstpi.b $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +vbitrevi.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] + +vbitrevi.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] + +vbitseti.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] + +vbitseti.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] + +vbitclri.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] + +vbitclri.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] + +vssrarni.hu.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] + +vssrarni.hu.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] + +vssrlrni.hu.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] + +vssrlrni.hu.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] + +vssrarni.h.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] + +vssrarni.h.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] + +vssrlrni.h.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] + +vssrlrni.h.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] + +vssrani.hu.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] + +vssrani.hu.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] + +vssrlni.hu.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] + +vssrlni.hu.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] + +vssrani.h.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +vssrani.h.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +vssrlni.h.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +vssrlni.h.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +vsrarni.h.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +vsrarni.h.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +vsrlrni.h.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +vsrlrni.h.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +vsrani.h.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] + +vsrani.h.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] + +vsrlni.h.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] + +vsrlni.h.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] + +vsrari.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vsrari.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vsrlri.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vsrlri.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vsllwil.du.wu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] + +vsllwil.du.wu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] + +vsllwil.d.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +vsllwil.d.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +vrotri.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vrotri.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vsrai.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] + +vsrai.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] + +vsrli.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] + +vsrli.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] + +vslli.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] + +vslli.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] + +vaddi.bu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vaddi.bu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vaddi.hu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vaddi.hu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vaddi.wu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vaddi.wu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vaddi.du $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vaddi.du $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vsubi.bu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vsubi.bu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vsubi.hu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vsubi.hu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vsubi.wu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vsubi.wu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vsubi.du $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vsubi.du $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vmaxi.bu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vmaxi.bu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vmaxi.hu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vmaxi.hu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vmaxi.wu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vmaxi.wu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vmaxi.du $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vmaxi.du $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vmini.bu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vmini.bu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vmini.hu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vmini.hu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vmini.wu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vmini.wu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vmini.du $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vmini.du $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vsat.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 31] + +vsat.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 31] + +vsat.wu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] + +vsat.wu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] + ## simm5 +vslti.d $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vslti.d $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vslti.w $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vslti.w $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vslti.h $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vslti.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vslti.b $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vslti.b $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vslei.d $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vslei.d $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vslei.w $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vslei.w $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vslei.h $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vslei.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vslei.b $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vslei.b $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vseqi.d $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vseqi.d $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vseqi.w $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vseqi.w $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vseqi.h $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vseqi.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vseqi.b $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + vseqi.b $vr0, $vr1, 16 # CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] +vmaxi.b $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vmaxi.b $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vmaxi.h $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vmaxi.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vmaxi.w $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vmaxi.w $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vmaxi.d $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vmaxi.d $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vmini.b $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vmini.b $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vmini.h $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vmini.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vmini.w $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vmini.w $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vmini.d $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vmini.d $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +## uimm6 +vbitrevi.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] + +vbitrevi.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] + +vbitseti.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] + +vbitseti.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] + +vbitclri.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] + +vbitclri.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] + +vssrarni.wu.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] + +vssrarni.wu.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] + +vssrlrni.wu.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] + +vssrlrni.wu.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] + +vssrarni.w.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] + +vssrarni.w.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] + +vssrlrni.w.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] + +vssrlrni.w.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] + +vssrani.wu.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] + +vssrani.wu.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] + +vssrlni.wu.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] + +vssrlni.wu.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] + +vssrani.w.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] + +vssrani.w.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] + +vssrlni.w.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] + +vssrlni.w.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] + +vsrarni.w.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] + +vsrarni.w.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] + +vsrlrni.w.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] + +vsrlrni.w.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] + +vsrani.w.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] + +vsrani.w.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] + +vsrlni.w.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] + +vsrlni.w.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] + +vsrari.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] + +vsrari.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] + +vsrlri.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] + +vsrlri.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] + +vrotri.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] + +vrotri.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] + +vsrai.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] + +vsrai.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] + +vsrli.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] + +vsrli.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] + +vslli.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] + +vslli.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] + +vsat.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 63] + +vsat.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 63] + +vsat.du $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] + +vsat.du $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] + ## uimm7 +vssrarni.du.q $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] + +vssrarni.du.q $vr0, $vr1, 128 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] + +vssrlrni.du.q $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] + +vssrlrni.du.q $vr0, $vr1, 128 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] + +vssrarni.d.q $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] + +vssrarni.d.q $vr0, $vr1, 128 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] + +vssrlrni.d.q $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] + +vssrlrni.d.q $vr0, $vr1, 128 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] + +vssrani.du.q $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] + +vssrani.du.q $vr0, $vr1, 128 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] + +vssrlni.du.q $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] + +vssrlni.du.q $vr0, $vr1, 128 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] + +vssrani.d.q $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] + +vssrani.d.q $vr0, $vr1, 128 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] + +vssrlni.d.q $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] + +vssrlni.d.q $vr0, $vr1, 128 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] + +vsrarni.d.q $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] + +vsrarni.d.q $vr0, $vr1, 128 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] + +vsrlrni.d.q $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] + +vsrlrni.d.q $vr0, $vr1, 128 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] + +vsrani.d.q $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 127] + +vsrani.d.q $vr0, $vr1, 128 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 127] + +vsrlni.d.q $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 127] + vsrlni.d.q $vr0, $vr1, 128 # CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 127] -## simm8 +## uimm8 +vextrins.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] + +vextrins.d $vr0, $vr1, 256 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] + +vextrins.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] + +vextrins.w $vr0, $vr1, 256 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] + +vextrins.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] + +vextrins.h $vr0, $vr1, 256 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] + +vextrins.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] + +vextrins.b $vr0, $vr1, 256 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] + +vpermi.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] + vpermi.w $vr0, $vr1, 256 # CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] +vshuf4i.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] + +vshuf4i.d $vr0, $vr1, 256 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] + +vshuf4i.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] + +vshuf4i.w $vr0, $vr1, 256 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] + +vshuf4i.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] + +vshuf4i.h $vr0, $vr1, 256 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] + +vshuf4i.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] + +vshuf4i.b $vr0, $vr1, 256 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] + +vbitseli.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] + +vbitseli.b $vr0, $vr1, 256 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] + +vandi.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] + +vandi.b $vr0, $vr1, 256 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] + +vori.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 255] + +vori.b $vr0, $vr1, 256 +# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 255] + +vxori.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] + +vxori.b $vr0, $vr1, 256 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] + +vnori.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] + +vnori.b $vr0, $vr1, 256 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] + +## simm8 +vstelm.b $vr0, $a0, -129, 1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-128, 127] + +vstelm.b $vr0, $a0, 128, 1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-128, 127] + ## simm8_lsl1 -vstelm.h $vr0, $a0, 255, 1 +vstelm.h $vr0, $a0, -258, 1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be a multiple of 2 in the range [-256, 254] + +vstelm.h $vr0, $a0, 256, 1 # CHECK: :[[#@LINE-1]]:21: error: immediate must be a multiple of 2 in the range [-256, 254] ## simm8_lsl2 -vstelm.w $vr0, $a0, 512, 1 +vstelm.w $vr0, $a0, -516, 1 # CHECK: :[[#@LINE-1]]:21: error: immediate must be a multiple of 4 in the range [-512, 508] -## simm10 -vrepli.b $vr0, 512 -# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] +vstelm.w $vr0, $a0, 512, 1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be a multiple of 4 in the range [-512, 508] ## simm8_lsl3 +vstelm.d $vr0, $a0, -1032, 1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be a multiple of 8 in the range [-1024, 1016] + vstelm.d $vr0, $a0, 1024, 1 # CHECK: :[[#@LINE-1]]:21: error: immediate must be a multiple of 8 in the range [-1024, 1016] ## simm9_lsl3 +vldrepl.d $vr0, $a0, -2056 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 8 in the range [-2048, 2040] + vldrepl.d $vr0, $a0, 2048 # CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 8 in the range [-2048, 2040] ## simm10_lsl2 +vldrepl.w $vr0, $a0, -2052 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 4 in the range [-2048, 2044] + vldrepl.w $vr0, $a0, 2048 # CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 4 in the range [-2048, 2044] +## simm10 +vrepli.b $vr0, -513 +# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] + +vrepli.b $vr0, 512 +# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] + +vrepli.h $vr0, -513 +# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] + +vrepli.h $vr0, 512 +# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] + +vrepli.w $vr0, -513 +# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] + +vrepli.w $vr0, 512 +# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] + +vrepli.d $vr0, -513 +# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] + +vrepli.d $vr0, 512 +# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] + ## simm11_lsl1 +vldrepl.h $vr0, $a0, -2050 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 2 in the range [-2048, 2046] + vldrepl.h $vr0, $a0, 2048 # CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 2 in the range [-2048, 2046] +## simm12 +vldrepl.b $vr0, $a0, -2049 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-2048, 2047] + +vldrepl.b $vr0, $a0, 2048 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-2048, 2047] + +vst $vr0, $a0, -2049 +# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] + +vst $vr0, $a0, 2048 +# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] + +vld $vr0, $a0, -2049 +# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] + +vld $vr0, $a0, 2048 +# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] + ## simm13 +vldi $vr0, -4097 +# CHECK: :[[#@LINE-1]]:12: error: immediate must be an integer in the range [-4096, 4095] + vldi $vr0, 4096 # CHECK: :[[#@LINE-1]]:12: error: immediate must be an integer in the range [-4096, 4095] diff --git a/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll b/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll new file mode 100644 index 0000000000000000000000000000000000000000..a8ac2411dd82166c805a79f3f03dd318f999777a --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -passes=loop-vectorize -mtriple loongarch64-linux-gnu -mattr=+lasx,+auto-vec -S | FileCheck %s + +;; This is a collection of tests whose only purpose is to show changes in the +;; default configuration. Please keep these tests minimal - if you're testing +;; functionality of some specific configuration, please place that in a +;; seperate test file with a hard coded configuration (even if that +;; configuration is the current default). + +target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" +target triple = "loongarch64" + +define void @vector_add(ptr noalias nocapture %a, i64 %v) { +; CHECK-LABEL: define void @vector_add +; CHECK-SAME: (ptr noalias nocapture [[A:%.*]], i64 [[V:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[TMP2]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V]] +; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv + %elem = load i64, ptr %arrayidx + %add = add i64 %elem, %v + store i64 %add, ptr %arrayidx + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/LoongArch/lit.local.cfg b/llvm/test/Transforms/LoopVectorize/LoongArch/lit.local.cfg new file mode 100644 index 0000000000000000000000000000000000000000..9570af17fe5f1fc84b1485630bbd7548e0b65d8e --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/LoongArch/lit.local.cfg @@ -0,0 +1,4 @@ +config.suffixes = [".ll"] + +if not "LoongArch" in config.root.targets: + config.unsupported = True