diff --git a/config.guess b/config.guess index 97ad0733304d51c825cb2abbc5db47d31d32c0ef..927726fdd531776d29522b257edcd2087842f269 100644 --- a/config.guess +++ b/config.guess @@ -980,6 +980,9 @@ EOF k1om:Linux:*:*) echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; + loongarch*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; m32r*:Linux:*:*) echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" exit ;; diff --git a/config.sub b/config.sub index a318a46868500fbeea993e693e32701041ffad1b..8a590458e13ebcfc4f04e26be084e30f355af2e8 100644 --- a/config.sub +++ b/config.sub @@ -1183,6 +1183,7 @@ case $cpu-$vendor in | k1om \ | le32 | le64 \ | lm32 \ + | loongarch32 | loongarchx32 | loongarch64 \ | m32c | m32r | m32rle \ | m5200 | m68000 | m680[012346]0 | m68360 | m683?2 | m68k \ | m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x \ diff --git a/configure b/configure index 97d5ca4fc0498f8e6074908a2a43cdd19e2e797d..65d92742cad07c6dd01f83eba059292f21444f69 100755 --- a/configure +++ b/configure @@ -3029,7 +3029,7 @@ case "${ENABLE_GOLD}" in # Check for target supported by gold. case "${target}" in i?86-*-* | x86_64-*-* | sparc*-*-* | powerpc*-*-* | arm*-*-* \ - | aarch64*-*-* | tilegx*-*-* | mips*-*-* | s390*-*-*) + | aarch64*-*-* | tilegx*-*-* | mips*-*-* | s390*-*-* | loongarch*-*-*) configdirs="$configdirs gold" if test x${ENABLE_GOLD} = xdefault; then default_ld=gold @@ -3641,6 +3641,9 @@ case "${target}" in i[3456789]86-*-*) libgloss_dir=i386 ;; + loongarch*-*-*) + libgloss_dir=loongarch + ;; m68hc11-*-*|m6811-*-*|m68hc12-*-*|m6812-*-*) libgloss_dir=m68hc11 ;; @@ -4025,6 +4028,11 @@ case "${target}" in wasm32-*-*) noconfigdirs="$noconfigdirs ld" ;; + loongarch*-*-linux*) + ;; + loongarch*-*-*) + noconfigdirs="$noconfigdirs gprof" + ;; esac # If we aren't building newlib, then don't build libgloss, since libgloss diff --git a/configure.ac b/configure.ac index 90ccd5ef8a85e8f36658cd5c81924ca346a01eab..bd41bb1025eaa6a342d3884d9132193f2ee248c7 100644 --- a/configure.ac +++ b/configure.ac @@ -345,7 +345,7 @@ case "${ENABLE_GOLD}" in # Check for target supported by gold. case "${target}" in i?86-*-* | x86_64-*-* | sparc*-*-* | powerpc*-*-* | arm*-*-* \ - | aarch64*-*-* | tilegx*-*-* | mips*-*-* | s390*-*-*) + | aarch64*-*-* | tilegx*-*-* | mips*-*-* | s390*-*-* | loongarch*-*-*) configdirs="$configdirs gold" if test x${ENABLE_GOLD} = xdefault; then default_ld=gold @@ -914,6 +914,9 @@ case "${target}" in i[[3456789]]86-*-*) libgloss_dir=i386 ;; + loongarch*-*-*) + libgloss_dir=loongarch + ;; m68hc11-*-*|m6811-*-*|m68hc12-*-*|m6812-*-*) libgloss_dir=m68hc11 ;; @@ -1298,6 +1301,11 @@ case "${target}" in wasm32-*-*) noconfigdirs="$noconfigdirs ld" ;; + loongarch*-*-linux*) + ;; + loongarch*-*-*) + noconfigdirs="$noconfigdirs gprof" + ;; esac # If we aren't building newlib, then don't build libgloss, since libgloss @@ -2639,6 +2647,9 @@ case "${target}" in mips*-*-*linux* | mips*-*-gnu*) target_makefile_frag="config/mt-mips-gnu" ;; + loongarch*-*-*linux* | loongarch*-*-gnu*) + target_makefile_frag="config/mt-loongarch-gnu" + ;; nios2-*-elf*) target_makefile_frag="config/mt-nios2-elf" ;; diff --git a/contrib/config-list.mk b/contrib/config-list.mk index d154286a497cb0c8492892b8ee52cd489efac3e8..c06e2b9248c71fc17f845ecbfe40c18a6f6758e2 100644 --- a/contrib/config-list.mk +++ b/contrib/config-list.mk @@ -57,7 +57,10 @@ LIST = aarch64-elf aarch64-linux-gnu aarch64-rtems \ i686-wrs-vxworksae \ i686-cygwinOPT-enable-threads=yes i686-mingw32crt ia64-elf \ ia64-freebsd6 ia64-linux ia64-hpux ia64-hp-vms iq2000-elf lm32-elf \ - lm32-rtems lm32-uclinux m32c-rtems m32c-elf m32r-elf m32rle-elf \ + lm32-rtems lm32-uclinux \ + loongarch64-linux-gnu loongarch64-linux-gnuf64 \ + loongarch64-linux-gnuf32 loongarch64-linux-gnusf \ + m32c-rtems m32c-elf m32r-elf m32rle-elf \ m32r-linux m32rle-linux m68k-elf m68k-netbsdelf \ m68k-openbsd m68k-uclinux m68k-linux m68k-rtems \ mcore-elf microblaze-linux microblaze-elf \ diff --git a/gcc/cfg.h b/gcc/cfg.h index 1eb7866bac9c30dfd3468bf3cd7edd7ff67e2401..4c2bf1b2691662307523dcc51e7b7832de057895 100644 --- a/gcc/cfg.h +++ b/gcc/cfg.h @@ -21,6 +21,7 @@ along with GCC; see the file COPYING3. If not see #define GCC_CFG_H #include "dominance.h" +#include "function.h" /* What sort of profiling information we have. */ enum profile_status_d diff --git a/gcc/common/config/loongarch/loongarch-common.c b/gcc/common/config/loongarch/loongarch-common.c new file mode 100644 index 0000000000000000000000000000000000000000..3f440aef1e76269d1d8b39adc9f40a18c7618758 --- /dev/null +++ b/gcc/common/config/loongarch/loongarch-common.c @@ -0,0 +1,63 @@ +/* Common hooks for LoongArch. + Copyright (C) 2021 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "common/common-target.h" +#include "common/common-target-def.h" +#include "opts.h" +#include "flags.h" +#include "diagnostic-core.h" + +/* Implement TARGET_HANDLE_OPTION. */ + +static bool +loongarch_handle_option (struct gcc_options *opts, + struct gcc_options *opts_set ATTRIBUTE_UNUSED, + const struct cl_decoded_option *decoded, + location_t loc ATTRIBUTE_UNUSED) +{ + size_t code = decoded->opt_index; + int value = decoded->value; + + switch (code) + { + case OPT_mmemcpy: + if (value) + { + if (opts->x_optimize_size) + opts->x_target_flags |= MASK_MEMCPY; + } + else + opts->x_target_flags &= ~MASK_MEMCPY; + return true; + + default: + return true; + } +} + +#undef TARGET_DEFAULT_TARGET_FLAGS +#define TARGET_DEFAULT_TARGET_FLAGS MASK_CHECK_ZERO_DIV +#undef TARGET_HANDLE_OPTION +#define TARGET_HANDLE_OPTION loongarch_handle_option + +struct gcc_targetm_common targetm_common = TARGETM_COMMON_INITIALIZER; diff --git a/gcc/config.gcc b/gcc/config.gcc index 6fcdd771d4c32604685ebc5da3e20260fe6da2ad..4fb84f3be7b99fc4c1f43f54fe2f88864b9df2ee 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -476,6 +476,14 @@ mips*-*-*) extra_headers="loongson.h loongson-mmiintrin.h msa.h" extra_objs="frame-header-opt.o" extra_options="${extra_options} g.opt fused-madd.opt mips/mips-tables.opt" + ;; +loongarch*-*-*) + cpu_type=loongarch + d_target_objs="loongarch-d.o" + extra_headers="lvzintrin.h larchintrin.h" + extra_objs="frame-header-opt.o loongarch-c.o loongarch-builtins.o" + extra_options="${extra_options} g.opt fused-madd.opt loongarch/loongarch-tables.opt" + ;; nds32*) cpu_type=nds32 @@ -2575,6 +2583,55 @@ mips*-*-linux*) # Linux MIPS, either endian. tmake_file="${tmake_file} mips/t-linux64" fi ;; +loongarch*-*-linux*) + case ${with_abi} in + "") + echo "not specify ABI, default is lp64 for loongarch64" + with_abi=lp64 # for default + ;; + lpx32) + ;; + lp32) + ;; + lp64) + ;; + *) + echo "Unknown ABI used in --with-abi=$with_abi" + exit 1 + esac + + enable_multilib="yes" + loongarch_multilibs="${with_multilib_list}" + if test "$loongarch_multilibs" = "default"; then + loongarch_multilibs="${with_abi}" + fi + loongarch_multilibs=`echo $loongarch_multilibs | sed -e 's/,/ /g'` + for loongarch_multilib in ${loongarch_multilibs}; do + case ${loongarch_multilib} in + lp64 | lpx32 | lp32 ) + TM_MULTILIB_CONFIG="${TM_MULTILIB_CONFIG},${loongarch_multilib}" + ;; + *) + echo "--with-multilib-list=${loongarch_multilib} not supported." + exit 1 + esac + done + TM_MULTILIB_CONFIG=`echo $TM_MULTILIB_CONFIG | sed 's/^,//'` + + if test `for one_abi in ${loongarch_multilibs}; do if [ x\$one_abi = x$with_abi ]; then echo 1; exit 0; fi; done; echo 0;` = "0"; then + echo "--with-abi=${with_abi} must be one of --with-multilib-list=${with_multilib_list}" + exit 1 + fi + + tm_file="dbxelf.h elfos.h gnu-user.h linux.h linux-android.h glibc-stdint.h ${tm_file} loongarch/gnu-user.h loongarch/linux.h loongarch/linux-common.h" + extra_options="${extra_options} linux-android.opt" + tmake_file="${tmake_file} loongarch/t-linux" + gnu_ld=yes + gas=yes + # Force .init_array support. The configure script cannot always + # automatically detect that GAS supports it, yet we require it. + gcc_cv_initfini_array=yes + ;; mips*-mti-elf*) tm_file="elfos.h newlib-stdint.h ${tm_file} mips/elf.h mips/n32-elf.h mips/sde.h mips/mti-elf.h" tmake_file="mips/t-mti-elf" @@ -2629,6 +2686,31 @@ mips*-sde-elf*) ;; esac ;; +loongarch*-sde-elf*) + tm_file="elfos.h newlib-stdint.h ${tm_file} loongarch/elf.h loongarch/sde.h" +# tmake_file="loongarch/t-sde" + extra_options="${extra_options} loongarch/sde.opt" + case "${with_newlib}" in + yes) + # newlib / libgloss. + ;; + *) + # MIPS toolkit libraries. + tm_file="$tm_file loongarch/sdemtk.h" + tmake_file="$tmake_file loongarch/t-sdemtk" + case ${enable_threads} in + "" | yes | loongarchsde) + thread_file='loongarchsde' + ;; + esac + ;; + esac + case ${target} in + loongarch*) + tm_defines="LARCH_ISA_DEFAULT=65 LARCH_ABI_DEFAULT=ABILP64" + ;; + esac + ;; mipsisa32-*-elf* | mipsisa32el-*-elf* | \ mipsisa32r2-*-elf* | mipsisa32r2el-*-elf* | \ mipsisa32r6-*-elf* | mipsisa32r6el-*-elf* | \ @@ -4092,6 +4174,9 @@ if test x$with_llsc = x; then # The kernel emulates LL and SC where necessary. with_llsc=yes ;; + loongarch*-*-linux*) + with_fix-loongson3-llsc=yes + ;; mips64r5900-*-* | mips64r5900el-*-* | mipsr5900-*-* | mipsr5900el-*-*) # The R5900 doesn't support LL(D) and SC(D). with_llsc=no @@ -4854,6 +4939,55 @@ case "${target}" in esac ;; + loongarch*-*-*) + supported_defaults="abi arch arch_32 arch_64 float fpu fp_32 tune tune_32 tune_64 fix-loongson3-llsc" + + case ${with_float} in + "" | soft | hard) + # OK + ;; + *) + echo "Unknown floating point type used in --with-float=$with_float" 1>&2 + exit 1 + ;; + esac + + case ${with_fpu} in + "" | single | double) + # OK + ;; + *) + echo "Unknown fpu type used in --with-fpu=$with_fpu" 1>&2 + exit 1 + ;; + esac + + case ${with_fp_32} in + "" | 32 | xx | 64) + # OK + ;; + *) + echo "Unknown FP mode used in --with-fp-32=$with_fp_32" 1>&2 + exit 1 + ;; + esac + + case ${with_fix_loongson3_llsc} in + yes) + with_fix_loongson3_llsc=fix-loongson3-llsc + ;; + no) + with_fix_loongson3_llsc=no-fix-loongson3-llsc + ;; + "") + ;; + *) + echo "Unknown fix-loongson3-llsc type used in --with-fix-loongson3-llsc" 1>&2 + exit 1 + ;; + esac + ;; + nds32*-*-*) supported_defaults="arch cpu nds32_lib float fpu_config" @@ -5301,6 +5435,29 @@ case ${target} in tmake_file="mips/t-mips $tmake_file" ;; + loongarch*-*-*) + case ${target} in + loongarch*-*-*) + tm_defines="TARGET_ENDIAN_DEFAULT=0 $tm_defines" + ;; + esac + if test x$with_arch != x; then + default_loongarch_arch=$with_arch + fi + if test x$with_abi != x; then + default_loongarch_abi=$with_abi + fi + case ${default_loongarch_arch} in + loongarch | loongarch64) tm_defines="$tm_defines LARCH_ISA_DEFAULT=65" ;; + esac + case ${default_loongarch_abi} in + lp64) tm_defines="$tm_defines LARCH_ABI_DEFAULT=ABILP64" ;; + lp32) tm_defines="$tm_defines LARCH_ABI_DEFAULT=ABILP32" ;; + lpx32) tm_defines="$tm_defines LARCH_ABI_DEFAULT=ABILPX32" ;; + esac + tmake_file="loongarch/t-loongarch $tmake_file" + ;; + powerpc*-*-* | rs6000-*-*) # FIXME: The PowerPC port uses the value set at compile time, # although it's only cosmetic. diff --git a/gcc/config.host b/gcc/config.host index 230ab61ac05b93b1890fce376024f9184a7b4ecf..5415e19a13e591fd9273e73674bb26daac817623 100644 --- a/gcc/config.host +++ b/gcc/config.host @@ -146,6 +146,14 @@ case ${host} in ;; esac ;; + loongarch*-*-linux*) + case ${target} in + loongarch*-*-linux*) + host_extra_gcc_objs="driver-native.o" + host_xmake_file="${host_xmake_file} loongarch/x-native" + ;; + esac + ;; rs6000-*-* \ | powerpc*-*-* ) case ${target} in diff --git a/gcc/config/host-linux.c b/gcc/config/host-linux.c index 26872544130dddaf335068b1c3ae6fac3dc2e90a..38f9d4ce7a1ebe7e1d4aec49c4537f390e669326 100644 --- a/gcc/config/host-linux.c +++ b/gcc/config/host-linux.c @@ -98,6 +98,8 @@ # define TRY_EMPTY_VM_SPACE 0x60000000 #elif defined(__riscv) && defined (__LP64__) # define TRY_EMPTY_VM_SPACE 0x1000000000 +#elif defined(__loongarch__) && defined(__LP64__) +# define TRY_EMPTY_VM_SPACE 0x8000000000 #else # define TRY_EMPTY_VM_SPACE 0 #endif diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md new file mode 100644 index 0000000000000000000000000000000000000000..e33cc23a629c066c3e5349d936a9d1479f7cb08a --- /dev/null +++ b/gcc/config/loongarch/constraints.md @@ -0,0 +1,287 @@ +;; Constraint definitions for LARCH. +;; Copyright (C) 2006-2018 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Register constraints + +;; "a" A constant call global and noplt address. +;; "b" ALL_REGS +;; "c" A constant call local address. +;; "d" GR_REGS +;; "e" JALR_REGS +;; "f" FP_REGS +;; "g" * +;; "h" A constant call plt address. +;; "i" "Matches a general integer constant." +;; "j" SIBCALL_REGS +;; "k" - +;; "l" "A signed 16-bit constant ." +;; "m" "A memory operand whose address is formed by a base register and offset +;; that is suitable for use in instructions with the same addressing mode +;; as @code{st.w} and @code{ld.w}." +;; "n" "Matches a non-symbolic integer constant." +;; "o" "Matches an offsettable memory reference." +;; "p" "Matches a general address." +;; "q" LVZ_REGS +;; "r" GENERAL_REGS +;; "s" "Matches a symbolic integer constant." +;; "t" A constant call weak address +;; "u" - +;; "v" - +;; "w" "Matches any valid memory." +;; "x" - +;; "y" GR_REGS +;; "z" ST_REGS +;; "A" - +;; "B" - +;; "C" - +;; "D" - +;; "E" "Matches a floating-point constant." +;; "F" "Matches a floating-point constant." +;; "G" "Floating-point zero." +;; "H" - +;; "I" "A signed 12-bit constant (for arithmetic instructions)." +;; "J" "Integer zero." +;; "K" "An unsigned 12-bit constant (for logic instructions)." +;; "L" "A signed 32-bit constant in which the lower 12 bits are zero. +;; "M" "A constant that cannot be loaded using @code{lui}, @code{addiu} or @code{ori}." +;; "N" "A constant in the range -65535 to -1 (inclusive)." +;; "O" "A signed 15-bit constant." +;; "P" "A constant in the range 1 to 65535 (inclusive)." +;; "Q" "A signed 12-bit constant" +;; "R" "An address that can be used in a non-macro load or store." +;; "S" "A constant call address." +;; "T" - +;; "U" - +;; "V" "Matches a non-offsettable memory reference." +;; "W" "A memory address based on a member of @code{BASE_REG_CLASS}. This is +;; true for all references (although it can sometimes be implicit +;; if @samp{!TARGET_EXPLICIT_RELOCS})." +;; "X" "Matches anything." +;; "Y" - +;; "Yb" +;; "Yd" +;; "A constant @code{move_operand} that can be safely loaded into @code{$25} +;; using @code{la}." +;; "Yh" +;; "Yw" +;; "Yx" +;; "Z" - +;; "ZC" +;; "A memory operand whose address is formed by a base register and offset +;; that is suitable for use in instructions with the same addressing mode +;; as @code{ll.w} and @code{sc.w}." +;; "ZD" +;; "An address suitable for a @code{prefetch} instruction, or for any other +;; instruction with the same addressing mode as @code{prefetch}." +;; "ZB" +;; "An address that is held in a general-purpose register. +;; The offset is zero" + + +(define_constraint "c" + "@internal + A constant call local address." + (match_operand 0 "is_const_call_local_symbol")) + +(define_constraint "a" + "@internal + A constant call global and noplt address." + (match_operand 0 "is_const_call_global_noplt_symbol")) + +(define_constraint "h" + "@internal + A constant call plt address." + (match_operand 0 "is_const_call_plt_symbol")) + +(define_constraint "t" + "@internal + A constant call weak address." + (match_operand 0 "is_const_call_weak_symbol")) + +(define_register_constraint "d" "GR_REGS" + "A general-purpose register. This is equivalent to @code{r}.") + +(define_register_constraint "e" "JALR_REGS" + "@internal") + +(define_register_constraint "q" "LVZ_REGS" + "A general-purpose register except for $r0 and $r1 for lvz.") + +(define_register_constraint "f" "TARGET_HARD_FLOAT ? FP_REGS : NO_REGS" + "A floating-point register (if available).") + +(define_register_constraint "b" "ALL_REGS" + "@internal") + +(define_register_constraint "j" "SIBCALL_REGS" + "@internal") + +(define_constraint "l" + "A signed 16-bit constant ." + (and (match_code "const_int") + (match_test "IMM16_OPERAND (ival)"))) + +(define_register_constraint "y" "GR_REGS" + "Equivalent to @code{r}; retained for backwards compatibility.") + +(define_register_constraint "z" "ST_REGS" + "A floating-point condition code register.") + +;; Integer constraints + +(define_constraint "I" + "A signed 12-bit constant (for arithmetic instructions)." + (and (match_code "const_int") + (match_test "SMALL_OPERAND (ival)"))) + +(define_constraint "J" + "Integer zero." + (and (match_code "const_int") + (match_test "ival == 0"))) + +(define_constraint "K" + "An unsigned 12-bit constant (for logic instructions)." + (and (match_code "const_int") + (match_test "SMALL_OPERAND_UNSIGNED (ival)"))) + +(define_constraint "u" + "An unsigned 12-bit constant (for logic instructions)." + (and (match_code "const_int") + (match_test "LU32I_OPERAND (ival)"))) + +(define_constraint "v" + "An unsigned 12-bit constant (for logic instructions)." + (and (match_code "const_int") + (match_test "LU52I_OPERAND (ival)"))) + +(define_constraint "L" + "A signed 32-bit constant in which the lower 12 bits are zero. + Such constants can be loaded using @code{lui}." + (and (match_code "const_int") + (match_test "LUI_OPERAND (ival)"))) + +(define_constraint "M" + "A constant that cannot be loaded using @code{lui}, @code{addiu} + or @code{ori}." + (and (match_code "const_int") + (not (match_test "SMALL_OPERAND (ival)")) + (not (match_test "SMALL_OPERAND_UNSIGNED (ival)")) + (not (match_test "LUI_OPERAND (ival)")))) + +(define_constraint "N" + "A constant in the range -65535 to -1 (inclusive)." + (and (match_code "const_int") + (match_test "ival >= -0xffff && ival < 0"))) + +(define_constraint "O" + "A signed 15-bit constant." + (and (match_code "const_int") + (match_test "ival >= -0x4000 && ival < 0x4000"))) + +(define_constraint "P" + "A constant in the range 1 to 65535 (inclusive)." + (and (match_code "const_int") + (match_test "ival > 0 && ival < 0x10000"))) + +;; Floating-point constraints + +(define_constraint "G" + "Floating-point zero." + (and (match_code "const_double") + (match_test "op == CONST0_RTX (mode)"))) + +;; General constraints + +(define_constraint "Q" + "@internal" + (match_operand 0 "const_arith_operand")) + +(define_memory_constraint "R" + "An address that can be used in a non-macro load or store." + (and (match_code "mem") + (match_test "loongarch_address_insns (XEXP (op, 0), mode, false) == 1"))) + +(define_memory_constraint "m" + "A memory operand whose address is formed by a base register and offset + that is suitable for use in instructions with the same addressing mode + as @code{st.w} and @code{ld.w}." + (and (match_code "mem") + (match_test "loongarch_12bit_offset_address_p (XEXP (op, 0), mode)"))) + +(define_constraint "S" + "@internal + A constant call address." + (and (match_operand 0 "call_insn_operand") + (match_test "CONSTANT_P (op)"))) + +(define_memory_constraint "W" + "@internal + A memory address based on a member of @code{BASE_REG_CLASS}. This is + true for allreferences (although it can sometimes be implicit + if @samp{!TARGET_EXPLICIT_RELOCS})." + (and (match_code "mem") + (match_operand 0 "memory_operand") + (and (not (match_operand 0 "stack_operand")) + (not (match_test "CONSTANT_P (XEXP (op, 0))"))))) + +(define_constraint "Yb" + "@internal" + (match_operand 0 "qi_mask_operand")) + +(define_constraint "Yd" + "@internal + A constant @code{move_operand} that can be safely loaded into @code{$25} + using @code{la}." + (and (match_operand 0 "move_operand") + (match_test "CONSTANT_P (op)"))) + +(define_constraint "Yh" + "@internal" + (match_operand 0 "hi_mask_operand")) + +(define_constraint "Yw" + "@internal" + (match_operand 0 "si_mask_operand")) + +(define_constraint "Yx" + "@internal" + (match_operand 0 "low_bitmask_operand")) + +(define_memory_constraint "ZC" + "A memory operand whose address is formed by a base register and offset + that is suitable for use in instructions with the same addressing mode + as @code{ll.w} and @code{sc.w}." + (and (match_code "mem") + (match_test "loongarch_14bit_shifted_offset_address_p (XEXP (op, 0), mode)"))) + +;;(define_address_constraint "ZD" +;; "An address suitable for a @code{prefetch} instruction, or for any other +;; instruction with the same addressing mode as @code{prefetch}." +;; (if_then_else (match_test "ISA_HAS_9BIT_DISPLACEMENT") +;; (match_test "loongarch_9bit_offset_address_p (op, mode)") +;; (match_test "loongarch_address_insns (op, mode, false)"))) + + +(define_memory_constraint "ZB" + "@internal + An address that is held in a general-purpose register. + The offset is zero" + (and (match_code "mem") + (match_test "GET_CODE(XEXP(op,0)) == REG"))) + diff --git a/gcc/config/loongarch/driver-native.c b/gcc/config/loongarch/driver-native.c new file mode 100644 index 0000000000000000000000000000000000000000..e4f26e49586959f21e9d3a4f8d143338c275bdb3 --- /dev/null +++ b/gcc/config/loongarch/driver-native.c @@ -0,0 +1,82 @@ +/* Subroutines for the gcc driver. + Copyright (C) 2008-2018 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#define IN_TARGET_CODE 1 + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" + + +/* This function must set to noinline. Otherwise the arg can not be passed. */ +int loongson_cpucfg (int arg) +{ + int ret; + __asm__ __volatile__ ("cpucfg %0,%1\n\t" /* cpucfg $2,$4. */ + :"=r"(ret) + :"r"(arg) + :); + return ret; +} + +/* This will be called by the spec parser in gcc.c when it sees + a %:local_cpu_detect(args) construct. Currently it will be called + with either "arch" or "tune" as argument depending on if -march=native + or -mtune=native is to be substituted. + + It returns a string containing new command line parameters to be + put at the place of the above two options, depending on what CPU + this is executed. E.g. "-march=loongson2f" on a Loongson 2F for + -march=native. If the routine can't detect a known processor, + the -march or -mtune option is discarded. + + ARGC and ARGV are set depending on the actual arguments given + in the spec. */ +const char * +host_detect_local_cpu (int argc, const char **argv) +{ + const char *cpu = NULL; + bool arch; + int cpucfg_arg; + int cpucfg_ret; + + if (argc < 1) + return NULL; + + arch = strcmp (argv[0], "arch") == 0; + if (!arch && strcmp (argv[0], "tune")) + return NULL; + + cpucfg_arg = 0; + cpucfg_ret = loongson_cpucfg (cpucfg_arg); + if (((cpucfg_ret >> 16) & 0xff) == 0x14) + { + if (((cpucfg_ret >> 8) & 0xff) == 0xc0) + cpu = "gs464v"; + else + cpu = NULL; + } + + + if (cpu == NULL) + return NULL; + + return concat ("-m", argv[0], "=", cpu, NULL); +} diff --git a/gcc/config/loongarch/elf.h b/gcc/config/loongarch/elf.h new file mode 100644 index 0000000000000000000000000000000000000000..b7f938e319ff65a23e469c28401734e82001a02b --- /dev/null +++ b/gcc/config/loongarch/elf.h @@ -0,0 +1,50 @@ +/* Target macros for loongarch*-elf targets. + Copyright (C) 1994-2018 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* LARCH assemblers don't have the usual .set foo,bar construct; + .set is used for assembler options instead. */ +#undef SET_ASM_OP +#define ASM_OUTPUT_DEF(FILE, LABEL1, LABEL2) \ + do \ + { \ + fputc ('\t', FILE); \ + assemble_name (FILE, LABEL1); \ + fputs (" = ", FILE); \ + assemble_name (FILE, LABEL2); \ + fputc ('\n', FILE); \ + } \ + while (0) + +#undef ASM_DECLARE_OBJECT_NAME +#define ASM_DECLARE_OBJECT_NAME loongarch_declare_object_name + +#undef ASM_FINISH_DECLARE_OBJECT +#define ASM_FINISH_DECLARE_OBJECT loongarch_finish_declare_object + +/* Leave the linker script to choose the appropriate libraries. */ +#undef LIB_SPEC +#define LIB_SPEC "" + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC "crti%O%s crtbegin%O%s" + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC "crtend%O%s crtn%O%s" + +#define NO_IMPLICIT_EXTERN_C 1 diff --git a/gcc/config/loongarch/frame-header-opt.c b/gcc/config/loongarch/frame-header-opt.c new file mode 100644 index 0000000000000000000000000000000000000000..08151bbd2f187943d625b007bb42d521fa11c1ae --- /dev/null +++ b/gcc/config/loongarch/frame-header-opt.c @@ -0,0 +1,292 @@ +/* Analyze functions to determine if callers need to allocate a frame header + on the stack. The frame header is used by callees to save their arguments. + This optimization is specific to TARGET_OLDABI targets. For TARGET_NEWABI + targets, if a frame header is required, it is allocated by the callee. + + + Copyright (C) 2015-2018 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +GCC is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + + +#define IN_TARGET_CODE 1 + +#include "config.h" +#include "system.h" +#include "context.h" +#include "coretypes.h" +#include "tree.h" +#include "tree-core.h" +#include "tree-pass.h" +#include "target.h" +#include "target-globals.h" +#include "profile-count.h" +#include "function.h" +#include "cfg.h" +#include "cgraph.h" +#include "basic-block.h" +#include "gimple.h" +#include "gimple-iterator.h" +#include "gimple-walk.h" + +static unsigned int frame_header_opt (void); + +namespace { + +const pass_data pass_data_ipa_frame_header_opt = +{ + IPA_PASS, /* type */ + "frame-header-opt", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_CGRAPHOPT, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_ipa_frame_header_opt : public ipa_opt_pass_d +{ +public: + pass_ipa_frame_header_opt (gcc::context *ctxt) + : ipa_opt_pass_d (pass_data_ipa_frame_header_opt, ctxt, + NULL, /* generate_summary */ + NULL, /* write_summary */ + NULL, /* read_summary */ + NULL, /* write_optimization_summary */ + NULL, /* read_optimization_summary */ + NULL, /* stmt_fixup */ + 0, /* function_transform_todo_flags_start */ + NULL, /* function_transform */ + NULL) /* variable_transform */ + {} + + /* opt_pass methods: */ + virtual bool gate (function *) + { + /* This optimization has no affect if TARGET_NEWABI. If optimize + is not at least 1 then the data needed for the optimization is + not available and nothing will be done anyway. */ + return TARGET_OLDABI && flag_frame_header_optimization && optimize > 0; + } + + virtual unsigned int execute (function *) { return frame_header_opt (); } + +}; // class pass_ipa_frame_header_opt + +} // anon namespace + +static ipa_opt_pass_d * +make_pass_ipa_frame_header_opt (gcc::context *ctxt) +{ + return new pass_ipa_frame_header_opt (ctxt); +} + +void +loongarch_register_frame_header_opt (void) +{ + opt_pass *p = make_pass_ipa_frame_header_opt (g); + struct register_pass_info f = { p, "comdats", 1, PASS_POS_INSERT_AFTER }; + register_pass (&f); +} + + +/* Return true if it is certain that this is a leaf function. False if it is + not a leaf function or if it is impossible to tell. */ + +static bool +is_leaf_function (function *fn) +{ + basic_block bb; + gimple_stmt_iterator gsi; + + /* If we do not have a cfg for this function be conservative and assume + it is not a leaf function. */ + if (fn->cfg == NULL) + return false; + + FOR_EACH_BB_FN (bb, fn) + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + if (is_gimple_call (gsi_stmt (gsi))) + return false; + return true; +} + +/* Return true if this function has inline assembly code or if we cannot + be certain that it does not. False if we know that there is no inline + assembly. */ + +static bool +has_inlined_assembly (function *fn) +{ + basic_block bb; + gimple_stmt_iterator gsi; + + /* If we do not have a cfg for this function be conservative and assume + it is may have inline assembly. */ + if (fn->cfg == NULL) + return true; + + FOR_EACH_BB_FN (bb, fn) + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + if (gimple_code (gsi_stmt (gsi)) == GIMPLE_ASM) + return true; + + return false; +} + +/* Return true if this function will use the stack space allocated by its + caller or if we cannot determine for certain that it does not. */ + +static bool +needs_frame_header_p (function *fn) +{ + tree t; + + if (fn->decl == NULL) + return true; + + if (fn->stdarg) + return true; + + for (t = DECL_ARGUMENTS (fn->decl); t; t = TREE_CHAIN (t)) + { + if (!use_register_for_decl (t)) + return true; + + /* Some 64-bit types may get copied to general registers using the frame + header, see loongarch_output_64bit_xfer. Checking for SImode only may be + overly restrictive but it is guaranteed to be safe. */ + if (DECL_MODE (t) != SImode) + return true; + } + + return false; +} + +/* Return true if the argument stack space allocated by function FN is used. + Return false if the space is needed or if the need for the space cannot + be determined. */ + +static bool +callees_functions_use_frame_header (function *fn) +{ + basic_block bb; + gimple_stmt_iterator gsi; + gimple *stmt; + tree called_fn_tree; + function *called_fn; + + if (fn->cfg == NULL) + return true; + + FOR_EACH_BB_FN (bb, fn) + { + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + stmt = gsi_stmt (gsi); + if (is_gimple_call (stmt)) + { + called_fn_tree = gimple_call_fndecl (stmt); + if (called_fn_tree != NULL) + { + called_fn = DECL_STRUCT_FUNCTION (called_fn_tree); + if (called_fn == NULL + || DECL_WEAK (called_fn_tree) + || has_inlined_assembly (called_fn) + || !is_leaf_function (called_fn) + || !called_fn->machine->does_not_use_frame_header) + return true; + } + else + return true; + } + } + } + return false; +} + +/* Set the callers_may_not_allocate_frame flag for any function which + function FN calls because FN may not allocate a frame header. */ + +static void +set_callers_may_not_allocate_frame (function *fn) +{ + basic_block bb; + gimple_stmt_iterator gsi; + gimple *stmt; + tree called_fn_tree; + function *called_fn; + + if (fn->cfg == NULL) + return; + + FOR_EACH_BB_FN (bb, fn) + { + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + stmt = gsi_stmt (gsi); + if (is_gimple_call (stmt)) + { + called_fn_tree = gimple_call_fndecl (stmt); + if (called_fn_tree != NULL) + { + called_fn = DECL_STRUCT_FUNCTION (called_fn_tree); + if (called_fn != NULL) + called_fn->machine->callers_may_not_allocate_frame = true; + } + } + } + } + return; +} + +/* Scan each function to determine those that need its frame headers. Perform + a second scan to determine if the allocation can be skipped because none of + their callees require the frame header. */ + +static unsigned int +frame_header_opt () +{ + struct cgraph_node *node; + function *fn; + + FOR_EACH_DEFINED_FUNCTION (node) + { + fn = node->get_fun (); + if (fn != NULL) + fn->machine->does_not_use_frame_header = !needs_frame_header_p (fn); + } + + FOR_EACH_DEFINED_FUNCTION (node) + { + fn = node->get_fun (); + if (fn != NULL) + fn->machine->optimize_call_stack + = !callees_functions_use_frame_header (fn) && !is_leaf_function (fn); + } + + FOR_EACH_DEFINED_FUNCTION (node) + { + fn = node->get_fun (); + if (fn != NULL && fn->machine->optimize_call_stack) + set_callers_may_not_allocate_frame (fn); + } + + return 0; +} diff --git a/gcc/config/loongarch/generic.md b/gcc/config/loongarch/generic.md new file mode 100644 index 0000000000000000000000000000000000000000..321b8e5614071ebe63722709ddf507044ce446f0 --- /dev/null +++ b/gcc/config/loongarch/generic.md @@ -0,0 +1,109 @@ +;; Generic DFA-based pipeline description for LARCH targets +;; Copyright (C) 2004-2018 Free Software Foundation, Inc. +;; +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + + +;; This file is derived from the old define_function_unit description. +;; Each reservation can be overridden on a processor-by-processor basis. + +(define_insn_reservation "generic_alu" 1 + (eq_attr "type" "unknown,prefetch,prefetchx,condmove,const,arith, + shift,slt,clz,trap,multi,nop,logical,signext,move") + "alu") + +(define_insn_reservation "generic_load" 3 + (eq_attr "type" "load,fpload,fpidxload") + "alu") + +(define_insn_reservation "generic_store" 1 + (eq_attr "type" "store,fpstore,fpidxstore") + "alu") + +(define_insn_reservation "generic_xfer" 2 + (eq_attr "type" "mftg,mgtf") + "alu") + +(define_insn_reservation "generic_branch" 1 + (eq_attr "type" "branch,jump,call") + "alu") + +(define_insn_reservation "generic_imul" 17 + (eq_attr "type" "imul,imul3") + "imuldiv*17") + +(define_insn_reservation "generic_fcvt" 1 + (eq_attr "type" "fcvt") + "alu") + +(define_insn_reservation "generic_fmove" 2 + (eq_attr "type" "fabs,fneg,fmove") + "alu") + +(define_insn_reservation "generic_fcmp" 3 + (eq_attr "type" "fcmp") + "alu") + +(define_insn_reservation "generic_fadd" 4 + (eq_attr "type" "fadd") + "alu") + +(define_insn_reservation "generic_fmul_single" 7 + (and (eq_attr "type" "fmul,fmadd") + (eq_attr "mode" "SF")) + "alu") + +(define_insn_reservation "generic_fmul_double" 8 + (and (eq_attr "type" "fmul,fmadd") + (eq_attr "mode" "DF")) + "alu") + +(define_insn_reservation "generic_fdiv_single" 23 + (and (eq_attr "type" "fdiv,frdiv") + (eq_attr "mode" "SF")) + "alu") + +(define_insn_reservation "generic_fdiv_double" 36 + (and (eq_attr "type" "fdiv,frdiv") + (eq_attr "mode" "DF")) + "alu") + +(define_insn_reservation "generic_fsqrt_single" 54 + (and (eq_attr "type" "fsqrt,frsqrt") + (eq_attr "mode" "SF")) + "alu") + +(define_insn_reservation "generic_fsqrt_double" 112 + (and (eq_attr "type" "fsqrt,frsqrt") + (eq_attr "mode" "DF")) + "alu") + +(define_insn_reservation "generic_atomic" 10 + (eq_attr "type" "atomic") + "alu") + +;; Sync loop consists of (in order) +;; (1) optional sync, +;; (2) LL instruction, +;; (3) branch and 1-2 ALU instructions, +;; (4) SC instruction, +;; (5) branch and ALU instruction. +;; The net result of this reservation is a big delay with a flush of +;; ALU pipeline. +(define_insn_reservation "generic_sync_loop" 40 + (eq_attr "type" "syncloop") + "alu*39") diff --git a/gcc/config/loongarch/genopt.sh b/gcc/config/loongarch/genopt.sh new file mode 100644 index 0000000000000000000000000000000000000000..23ae1a99e0255c341a9b79d848534f5c9f4c7a57 --- /dev/null +++ b/gcc/config/loongarch/genopt.sh @@ -0,0 +1,123 @@ +#!/bin/sh +# Generate loongarch-tables.opt from the list of CPUs in loongarch-cpus.def. +# Copyright (C) 2011-2018 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +cat <. + +Enum +Name(loongarch_arch_opt_value) Type(int) +Known LARCH CPUs (for use with the -march= and -mtune= options): + +Enum +Name(loongarch_loongarch_opt_value) Type(int) +Known LARCH ISA levels (for use with the -loongarch option): + +EnumValue +Enum(loongarch_arch_opt_value) String(from-abi) Value(LARCH_ARCH_OPTION_FROM_ABI) + +EnumValue +Enum(loongarch_arch_opt_value) String(native) Value(LARCH_ARCH_OPTION_NATIVE) DriverOnly + +EOF + +awk -F'[(, ]+' ' +BEGIN { + value = 0 +} + +# Write an entry for a single string accepted as a -march= argument. + +function write_one_arch_value(name, value, flags) +{ + print "EnumValue" + print "Enum(loongarch_arch_opt_value) String(" name ") Value(" value ")" flags + print "" + if (name ~ "^loongarch") { + sub("^loongarch", "", name) + print "EnumValue" + print "Enum(loongarch_loongarch_opt_value) String(" name ") Value(" value ")" + print "" + } +} + +# The logic for matching CPU name variants should be the same as in GAS. + +# Write an entry for a single string accepted as a -march= argument, +# plus any variant with a final "000" replaced by "k". + +function write_arch_value_maybe_k(name, value, flags) +{ + write_one_arch_value(name, value, flags) + if (name ~ "000$") { + sub("000$", "k", name) + write_one_arch_value(name, value, "") + } +} + +# Write all the entries for a -march= argument. In addition to +# replacement of a final "000" with "k", an argument starting with +# "vr", "rm" or "r" followed by a number, or just a plain number, +# matches a plain number or "r" followed by a plain number. + +function write_all_arch_values(name, value) +{ + write_arch_value_maybe_k(name, value, " Canonical") + cname = name + if (cname ~ "^vr") { + sub("^vr", "", cname) + } else if (cname ~ "^rm") { + sub("^rm", "", cname) + } else if (cname ~ "^r") { + sub("^r", "", cname) + } + if (cname ~ "^[0-9]") { + if (cname != name) + write_arch_value_maybe_k(cname, value, "") + rname = "r" cname + if (rname != name) + write_arch_value_maybe_k(rname, value, "") + } +} + +/^LARCH_CPU/ { + name = $2 + gsub("\"", "", name) + write_all_arch_values(name, value) + value++ +}' $1/loongarch-cpus.def diff --git a/gcc/config/loongarch/gnu-user.h b/gcc/config/loongarch/gnu-user.h new file mode 100644 index 0000000000000000000000000000000000000000..9a52bc1e64a351bc1937a43a176b9ff9be689801 --- /dev/null +++ b/gcc/config/loongarch/gnu-user.h @@ -0,0 +1,132 @@ +/* Definitions for LARCH systems using GNU userspace. + Copyright (C) 1998-2018 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#undef WCHAR_TYPE +#define WCHAR_TYPE "int" + +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE 32 + +#undef ASM_DECLARE_OBJECT_NAME +#define ASM_DECLARE_OBJECT_NAME loongarch_declare_object_name + +/* If we don't set MASK_ABICALLS, we can't default to PIC. */ +/* #undef TARGET_DEFAULT */ +/* #define TARGET_DEFAULT MASK_ABICALLS */ + +#define TARGET_OS_CPP_BUILTINS() \ + do { \ + GNU_USER_TARGET_OS_CPP_BUILTINS(); \ + /* The GNU C++ standard library requires this. */ \ + if (c_dialect_cxx ()) \ + builtin_define ("_GNU_SOURCE"); \ + } while (0) + +#undef SUBTARGET_CPP_SPEC +#define SUBTARGET_CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}" + +/* A standard GNU/Linux mapping. On most targets, it is included in + CC1_SPEC itself by config/linux.h, but loongarch.h overrides CC1_SPEC + and provides this hook instead. */ +#undef SUBTARGET_CC1_SPEC +#define SUBTARGET_CC1_SPEC GNU_USER_TARGET_CC1_SPEC + +/* -G is incompatible with -KPIC which is the default, so only allow objects + in the small data section if the user explicitly asks for it. */ +#undef LARCH_DEFAULT_GVALUE +#define LARCH_DEFAULT_GVALUE 0 + +#undef GNU_USER_TARGET_LINK_SPEC +#define GNU_USER_TARGET_LINK_SPEC "\ + %{G*} %{EB} %{EL} %{shared} \ + %{!shared: \ + %{!static: \ + %{rdynamic:-export-dynamic} \ + %{mabi=lp32: -dynamic-linker " GNU_USER_DYNAMIC_LINKERLP32 "} \ + %{mabi=lp64: -dynamic-linker " GNU_USER_DYNAMIC_LINKERLP64 "}} \ + %{static}} \ + %{mabi=lp32:-m" GNU_USER_LINK_EMULATION32 "} \ + %{mabi=lp64:-m" GNU_USER_LINK_EMULATION64 "}" + +#undef LINK_SPEC +#define LINK_SPEC GNU_USER_TARGET_LINK_SPEC + +/* The LARCH assembler has different syntax for .set. We set it to + .dummy to trap any errors. */ +#undef SET_ASM_OP +#define SET_ASM_OP "\t.dummy\t" + +#undef ASM_OUTPUT_DEF +#define ASM_OUTPUT_DEF(FILE,LABEL1,LABEL2) \ + do { \ + fputc ( '\t', FILE); \ + assemble_name (FILE, LABEL1); \ + fputs ( " = ", FILE); \ + assemble_name (FILE, LABEL2); \ + fputc ( '\n', FILE); \ + } while (0) + +/* The glibc _mcount stub will save $v0 for us. Don't mess with saving + it, since ASM_OUTPUT_REG_PUSH/ASM_OUTPUT_REG_POP do not work in the + presence of $gp-relative calls. */ +#undef ASM_OUTPUT_REG_PUSH +#undef ASM_OUTPUT_REG_POP + +#undef LIB_SPEC +#define LIB_SPEC GNU_USER_TARGET_LIB_SPEC + +#define NO_SHARED_SPECS "" + +/* -march=native handling only makes sense with compiler running on + a LARCH chip. */ +#if defined(__loongarch__) +extern const char *host_detect_local_cpu (int argc, const char **argv); +# define EXTRA_SPEC_FUNCTIONS \ + { "local_cpu_detect", host_detect_local_cpu }, + +# define MARCH_MTUNE_NATIVE_SPECS \ + " %{march=native:%. */ + +#ifndef _GCC_LOONGARCH_BASE_INTRIN_H +#define _GCC_LOONGARCH_BASE_INTRIN_H + +#ifdef __cplusplus +extern "C"{ +#endif + +typedef struct drdtime{ + unsigned long dvalue; + unsigned long dtimeid; +} __drdtime_t; + +typedef struct rdtime{ + unsigned int value; + unsigned int timeid; +} __rdtime_t; + +#ifdef __loongarch64 +extern __inline __drdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__builtin_loongarch_rdtime_d (void) +{ + __drdtime_t drdtime; + __asm__ volatile ( + "rdtime.d\t%[val],%[tid]\n\t" + : [val]"=&r"(drdtime.dvalue),[tid]"=&r"(drdtime.dtimeid) + : + ); + return drdtime; +} +#define __rdtime_d __builtin_loongarch_rdtime_d +#endif + +extern __inline __rdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__builtin_loongarch_rdtimeh_w (void) +{ + __rdtime_t rdtime; + __asm__ volatile ( + "rdtimeh.w\t%[val],%[tid]\n\t" + : [val]"=&r"(rdtime.value),[tid]"=&r"(rdtime.timeid) + : + ); + return rdtime; +} +#define __rdtimel_w __builtin_loongarch_rdtimel_w + +extern __inline __rdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__builtin_loongarch_rdtimel_w (void) +{ + __rdtime_t rdtime; + __asm__ volatile ( + "rdtimel.w\t%[val],%[tid]\n\t" + : [val]"=&r"(rdtime.value),[tid]"=&r"(rdtime.timeid) + : + ); + return rdtime; +} +#define __rdtimeh_w __builtin_loongarch_rdtimeh_w + +/* Assembly instruction format: rj, fcsr */ +/* Data types in instruction templates: USI, UQI */ +#define __movfcsr2gr(/*ui5*/_1) __builtin_loongarch_movfcsr2gr((_1)); + +/* Assembly instruction format: 0, fcsr, rj */ +/* Data types in instruction templates: VOID, UQI, USI */ +#define __movgr2fcsr(/*ui5*/ _1, _2) __builtin_loongarch_movgr2fcsr((unsigned short)_1, (unsigned int)_2); + +#ifdef __loongarch32 +/* Assembly instruction format: ui5, rj, si12 */ +/* Data types in instruction templates: VOID, USI, USI, SI */ +#define __cacop(/*ui5*/ _1, /*unsigned int*/ _2, /*si12*/ _3) ((void)__builtin_loongarch_cacop((_1), (unsigned int)(_2), (_3))) +#elif defined __loongarch64 +/* Assembly instruction format: ui5, rj, si12 */ +/* Data types in instruction templates: VOID, USI, UDI, SI */ +#define __dcacop(/*ui5*/ _1, /*unsigned long int*/ _2, /*si12*/ _3) ((void)__builtin_loongarch_dcacop((_1), (unsigned long int)(_2), (_3))) +#else +# error "Don't support this ABI." +#endif + +/* Assembly instruction format: rd, rj */ +/* Data types in instruction templates: USI, USI */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +unsigned int __cpucfg(unsigned int _1) +{ + return (unsigned int)__builtin_loongarch_cpucfg((unsigned int)_1); +} + +#ifdef __loongarch64 +/* Assembly instruction format: rd, rj */ +/* Data types in instruction templates: DI, DI */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +void __asrtle_d(long int _1, long int _2) +{ + __builtin_loongarch_asrtle_d((long int)_1, (long int)_2); +} + +/* Assembly instruction format: rd, rj */ +/* Data types in instruction templates: DI, DI */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +void __asrtgt_d(long int _1, long int _2) +{ + __builtin_loongarch_asrtgt_d((long int)_1, (long int)_2); +} +#endif + +#ifdef __loongarch32 +/* Assembly instruction format: rd, rj, ui5 */ +/* Data types in instruction templates: SI, SI, UQI */ +#define __lddir(/*int*/ _1, /*ui5*/ _2) ((int)__builtin_loongarch_lddir((int)(_1), (_2))) +#elif defined __loongarch64 +/* Assembly instruction format: rd, rj, ui5 */ +/* Data types in instruction templates: DI, DI, UQI */ +#define __dlddir(/*long int*/ _1, /*ui5*/ _2) ((long int)__builtin_loongarch_dlddir((long int)(_1), (_2))) +#else +# error "Don't support this ABI." +#endif + +#ifdef __loongarch32 +/* Assembly instruction format: rj, ui5 */ +/* Data types in instruction templates: VOID, SI, UQI */ +#define __ldpte(/*int*/ _1, /*ui5*/ _2) ((void)__builtin_loongarch_ldpte((int)(_1), (_2))) +#elif defined __loongarch64 +/* Assembly instruction format: rj, ui5 */ +/* Data types in instruction templates: VOID, DI, UQI */ +#define __dldpte(/*long int*/ _1, /*ui5*/ _2) ((void)__builtin_loongarch_dldpte((long int)(_1), (_2))) +#else +# error "Don't support this ABI." +#endif + +/* Assembly instruction format: rd, rj, rk */ +/* Data types in instruction templates: SI, QI, SI */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +int __crc_w_b_w(char _1, int _2) +{ + return (int)__builtin_loongarch_crc_w_b_w((char)_1, (int)_2); +} + +/* Assembly instruction format: rd, rj, rk */ +/* Data types in instruction templates: SI, HI, SI */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +int __crc_w_h_w(short _1, int _2) +{ + return (int)__builtin_loongarch_crc_w_h_w((short)_1, (int)_2); +} + +/* Assembly instruction format: rd, rj, rk */ +/* Data types in instruction templates: SI, SI, SI */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +int __crc_w_w_w(int _1, int _2) +{ + return (int)__builtin_loongarch_crc_w_w_w((int)_1, (int)_2); +} + +#ifdef __loongarch64 +/* Assembly instruction format: rd, rj, rk */ +/* Data types in instruction templates: SI, DI, SI */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +int __crc_w_d_w(long int _1, int _2) +{ + return (int)__builtin_loongarch_crc_w_d_w((long int)_1, (int)_2); +} +#endif + +/* Assembly instruction format: rd, rj, rk */ +/* Data types in instruction templates: SI, QI, SI */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +int __crcc_w_b_w(char _1, int _2) +{ + return (int)__builtin_loongarch_crcc_w_b_w((char)_1, (int)_2); +} + +/* Assembly instruction format: rd, rj, rk */ +/* Data types in instruction templates: SI, HI, SI */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +int __crcc_w_h_w(short _1, int _2) +{ + return (int)__builtin_loongarch_crcc_w_h_w((short)_1, (int)_2); +} + +/* Assembly instruction format: rd, rj, rk */ +/* Data types in instruction templates: SI, SI, SI */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +int __crcc_w_w_w(int _1, int _2) +{ + return (int)__builtin_loongarch_crcc_w_w_w((int)_1, (int)_2); +} + +#ifdef __loongarch64 +/* Assembly instruction format: rd, rj, rk */ +/* Data types in instruction templates: SI, DI, SI */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +int __crcc_w_d_w(long int _1, int _2) +{ + return (int)__builtin_loongarch_crcc_w_d_w((long int)_1, (int)_2); +} +#endif + +/* Assembly instruction format: rd, ui14 */ +/* Data types in instruction templates: USI, USI */ +#define __csrrd(/*ui14*/ _1) ((unsigned int)__builtin_loongarch_csrrd((_1))) + +/* Assembly instruction format: rd, ui14 */ +/* Data types in instruction templates: USI, USI, USI */ +#define __csrwr(/*unsigned int*/ _1, /*ui14*/ _2) ((unsigned int)__builtin_loongarch_csrwr((unsigned int)(_1), (_2))) + +/* Assembly instruction format: rd, rj, ui14 */ +/* Data types in instruction templates: USI, USI, USI, USI */ +#define __csrxchg(/*unsigned int*/ _1, /*unsigned int*/ _2, /*ui14*/ _3) ((unsigned int)__builtin_loongarch_csrxchg((unsigned int)(_1), (unsigned int)(_2), (_3))) + +#ifdef __loongarch64 +/* Assembly instruction format: rd, ui14 */ +/* Data types in instruction templates: UDI, USI */ +#define __dcsrrd(/*ui14*/ _1) ((unsigned long int)__builtin_loongarch_dcsrrd((_1))) + +/* Assembly instruction format: rd, ui14 */ +/* Data types in instruction templates: UDI, UDI, USI */ +#define __dcsrwr(/*unsigned long int*/ _1, /*ui14*/ _2) ((unsigned long int)__builtin_loongarch_dcsrwr((unsigned long int)(_1), (_2))) + +/* Assembly instruction format: rd, rj, ui14 */ +/* Data types in instruction templates: UDI, UDI, UDI, USI */ +#define __dcsrxchg(/*unsigned long int*/ _1, /*unsigned long int*/ _2, /*ui14*/ _3) ((unsigned long int)__builtin_loongarch_dcsrxchg((unsigned long int)(_1), (unsigned long int)(_2), (_3))) +#endif + +/* Assembly instruction format: rd, rj */ +/* Data types in instruction templates: UQI, USI */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +unsigned char __iocsrrd_b(unsigned int _1) +{ + return (unsigned char)__builtin_loongarch_iocsrrd_b((unsigned int)_1); +} + +/* Assembly instruction format: rd, rj */ +/* Data types in instruction templates: UHI, USI */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +unsigned short __iocsrrd_h(unsigned int _1) +{ + return (unsigned short)__builtin_loongarch_iocsrrd_h((unsigned int)_1); +} + +/* Assembly instruction format: rd, rj */ +/* Data types in instruction templates: USI, USI */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +unsigned int __iocsrrd_w(unsigned int _1) +{ + return (unsigned int)__builtin_loongarch_iocsrrd_w((unsigned int)_1); +} + +#ifdef __loongarch64 +/* Assembly instruction format: rd, rj */ +/* Data types in instruction templates: UDI, USI */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +unsigned long int __iocsrrd_d(unsigned int _1) +{ + return (unsigned long int)__builtin_loongarch_iocsrrd_d((unsigned int)_1); +} +#endif + +/* Assembly instruction format: rd, rj */ +/* Data types in instruction templates: VOID, UQI, USI */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +void __iocsrwr_b(unsigned char _1, unsigned int _2) +{ + return (void)__builtin_loongarch_iocsrwr_b((unsigned char)_1, (unsigned int)_2); +} + +/* Assembly instruction format: rd, rj */ +/* Data types in instruction templates: VOID, UHI, USI */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +void __iocsrwr_h(unsigned short _1, unsigned int _2) +{ + return (void)__builtin_loongarch_iocsrwr_h((unsigned short)_1, (unsigned int)_2); +} + +/* Assembly instruction format: rd, rj */ +/* Data types in instruction templates: VOID, USI, USI */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +void __iocsrwr_w(unsigned int _1, unsigned int _2) +{ + return (void)__builtin_loongarch_iocsrwr_w((unsigned int)_1, (unsigned int)_2); +} + +#ifdef __loongarch64 +/* Assembly instruction format: rd, rj */ +/* Data types in instruction templates: VOID, UDI, USI */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +void __iocsrwr_d(unsigned long int _1, unsigned int _2) +{ + return (void)__builtin_loongarch_iocsrwr_d((unsigned long int)_1, (unsigned int)_2); +} +#endif + +/* Assembly instruction format: ui15 */ +/* Data types in instruction templates: UQI */ +#define __dbar(/*ui15*/ _1) __builtin_loongarch_dbar((_1)) + +/* Assembly instruction format: ui15 */ +/* Data types in instruction templates: UQI */ +#define __ibar(/*ui15*/ _1) __builtin_loongarch_ibar((_1)) + +#define __builtin_loongarch_syscall(a) \ +{ \ + __asm__ volatile ("syscall %0\n\t" \ + ::"I"(a)); \ +} +#define __syscall __builtin_loongarch_syscall + +#define __builtin_loongarch_break(a) \ +{ \ + __asm__ volatile ("break %0\n\t" \ + ::"I"(a)); \ +} +#define __break __builtin_loongarch_break + + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__builtin_loongarch_tlbsrch (void) +{ + __asm__ volatile ("tlbsrch\n\t"); +} +#define __tlbsrch __builtin_loongarch_tlbsrch + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__builtin_loongarch_tlbrd (void) +{ + __asm__ volatile ("tlbrd\n\t"); +} +#define __tlbrd __builtin_loongarch_tlbrd + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__builtin_loongarch_tlbwr (void) +{ + __asm__ volatile ("tlbwr\n\t"); +} +#define __tlbwr __builtin_loongarch_tlbwr + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__builtin_loongarch_tlbfill (void) +{ + __asm__ volatile ("tlbfill\n\t"); +} +#define __tlbfill __builtin_loongarch_tlbfill + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__builtin_loongarch_tlbclr (void) +{ + __asm__ volatile ("tlbclr\n\t"); +} +#define __tlbclr __builtin_loongarch_tlbclr + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__builtin_loongarch_tlbflush (void) +{ + __asm__ volatile ("tlbflush\n\t"); +} +#define __tlbflush __builtin_loongarch_tlbflush + + +#ifdef __cplusplus +} +#endif +#endif /* _GCC_LOONGARCH_BASE_INTRIN_H */ diff --git a/gcc/config/loongarch/linux-common.h b/gcc/config/loongarch/linux-common.h new file mode 100644 index 0000000000000000000000000000000000000000..9e1a1b50f65779879fb4c990768acab6c309f617 --- /dev/null +++ b/gcc/config/loongarch/linux-common.h @@ -0,0 +1,68 @@ +/* Definitions for LARCH running Linux-based GNU systems with ELF format. + Copyright (C) 2012-2018 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do { \ + GNU_USER_TARGET_OS_CPP_BUILTINS(); \ + /* The GNU C++ standard library requires this. */ \ + if (c_dialect_cxx ()) \ + builtin_define ("_GNU_SOURCE"); \ + ANDROID_TARGET_OS_CPP_BUILTINS(); \ + } while (0) + +#define EXTRA_TARGET_D_OS_VERSIONS() \ + ANDROID_TARGET_D_OS_VERSIONS(); + +#undef LINK_SPEC +#define LINK_SPEC \ + LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LINK_SPEC, \ + GNU_USER_TARGET_LINK_SPEC " " ANDROID_LINK_SPEC) + +#undef SUBTARGET_CC1_SPEC +#define SUBTARGET_CC1_SPEC \ + LINUX_OR_ANDROID_CC (GNU_USER_TARGET_CC1_SPEC, \ + GNU_USER_TARGET_CC1_SPEC " " ANDROID_CC1_SPEC) + +#undef CC1PLUS_SPEC +#define CC1PLUS_SPEC \ + LINUX_OR_ANDROID_CC ("", ANDROID_CC1PLUS_SPEC) + +#undef LIB_SPEC +#define LIB_SPEC \ + LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LIB_SPEC, \ + GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC " " ANDROID_LIB_SPEC) + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC \ + LINUX_OR_ANDROID_LD (GNU_USER_TARGET_STARTFILE_SPEC, ANDROID_STARTFILE_SPEC) + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC \ + LINUX_OR_ANDROID_LD (GNU_USER_TARGET_MATHFILE_SPEC " " \ + GNU_USER_TARGET_ENDFILE_SPEC, \ + GNU_USER_TARGET_MATHFILE_SPEC " " \ + ANDROID_ENDFILE_SPEC) + +/* Define this to be nonzero if static stack checking is supported. */ +#define STACK_CHECK_STATIC_BUILTIN 1 + +/* FIXME*/ +/* The default value isn't sufficient in 64-bit mode. */ +#define STACK_CHECK_PROTECT (TARGET_64BIT ? 16 * 1024 : 12 * 1024) diff --git a/gcc/config/loongarch/linux.h b/gcc/config/loongarch/linux.h new file mode 100644 index 0000000000000000000000000000000000000000..1b786a0feb0f7743f3a8c6a64eb9c1e317e186c2 --- /dev/null +++ b/gcc/config/loongarch/linux.h @@ -0,0 +1,29 @@ +/* Definitions for LARCH running Linux-based GNU systems with ELF format. + Copyright (C) 1998-2018 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#define GNU_USER_LINK_EMULATION32 "elf32loongarch" +#define GNU_USER_LINK_EMULATION64 "elf64loongarch" + +#define GLIBC_DYNAMIC_LINKERLP32 \ + "/lib32/ld.so.1" +#define GLIBC_DYNAMIC_LINKERLP64 \ + "/lib64/ld.so.1" + +#define GNU_USER_DYNAMIC_LINKERLP32 GLIBC_DYNAMIC_LINKERLP32 +#define GNU_USER_DYNAMIC_LINKERLP64 GLIBC_DYNAMIC_LINKERLP64 diff --git a/gcc/config/loongarch/loongarch-builtins.c b/gcc/config/loongarch/loongarch-builtins.c new file mode 100644 index 0000000000000000000000000000000000000000..cfe9c16cbd1d6f6f7d64dca77a281cbb70d38570 --- /dev/null +++ b/gcc/config/loongarch/loongarch-builtins.c @@ -0,0 +1,593 @@ +/* Subroutines used for expanding LOONGARCH builtins. + Copyright (C) 2011-2018 Free Software Foundation, Inc. + Contributed by Andrew Waterman (andrew@sifive.com). + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#define IN_TARGET_CODE 1 + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "backend.h" +#include "target.h" +#include "rtl.h" +#include "tree.h" +#include "memmodel.h" +#include "gimple.h" +#include "tm_p.h" +#include "optabs.h" +#include "recog.h" +#include "diagnostic.h" +#include "fold-const.h" +#include "expr.h" +#include "langhooks.h" + +/* Macros to create an enumeration identifier for a function prototype. */ +#define LARCH_FTYPE_NAME1(A, B) LARCH_##A##_FTYPE_##B +#define LARCH_FTYPE_NAME2(A, B, C) LARCH_##A##_FTYPE_##B##_##C +#define LARCH_FTYPE_NAME3(A, B, C, D) LARCH_##A##_FTYPE_##B##_##C##_##D +#define LARCH_FTYPE_NAME4(A, B, C, D, E) LARCH_##A##_FTYPE_##B##_##C##_##D##_##E + +/* Classifies the prototype of a built-in function. */ +enum loongarch_function_type { +#define DEF_LARCH_FTYPE(NARGS, LIST) LARCH_FTYPE_NAME##NARGS LIST, +#include "config/loongarch/loongarch-ftypes.def" +#undef DEF_LARCH_FTYPE + LARCH_MAX_FTYPE_MAX +}; + +/* Specifies how a built-in function should be converted into rtl. */ +enum loongarch_builtin_type { + /* The function corresponds directly to an .md pattern. The return + value is mapped to operand 0 and the arguments are mapped to + operands 1 and above. */ + LARCH_BUILTIN_DIRECT, + + /* The function corresponds directly to an .md pattern. There is no return + value and the arguments are mapped to operands 0 and above. */ + LARCH_BUILTIN_DIRECT_NO_TARGET, + +}; + +/* Invoke MACRO (COND) for each C.cond.fmt condition. */ +#define LARCH_FP_CONDITIONS(MACRO) \ + MACRO (f), \ + MACRO (un), \ + MACRO (eq), \ + MACRO (ueq), \ + MACRO (olt), \ + MACRO (ult), \ + MACRO (ole), \ + MACRO (ule), \ + MACRO (sf), \ + MACRO (ngle), \ + MACRO (seq), \ + MACRO (ngl), \ + MACRO (lt), \ + MACRO (nge), \ + MACRO (le), \ + MACRO (ngt) + +/* Enumerates the codes above as LARCH_FP_COND_. */ +#define DECLARE_LARCH_COND(X) LARCH_FP_COND_ ## X +enum loongarch_fp_condition { + LARCH_FP_CONDITIONS (DECLARE_LARCH_COND) +}; +#undef DECLARE_LARCH_COND + +/* Index X provides the string representation of LARCH_FP_COND_. */ +#define STRINGIFY(X) #X +const char *const loongarch_fp_conditions[16] = { + LARCH_FP_CONDITIONS (STRINGIFY) +}; +#undef STRINGIFY + +/* Declare an availability predicate for built-in functions that require + * COND to be true. NAME is the main part of the predicate's name. */ +#define AVAIL_ALL(NAME, COND) \ + static unsigned int \ + loongarch_builtin_avail_##NAME (void) \ + { \ + return (COND) ? 1 : 0; \ + } + +static unsigned int +loongarch_builtin_avail_default (void) +{ + return 1; +} +/* This structure describes a single built-in function. */ +struct loongarch_builtin_description { + /* The code of the main .md file instruction. See loongarch_builtin_type + for more information. */ + enum insn_code icode; + + /* The floating-point comparison code to use with ICODE, if any. */ + enum loongarch_fp_condition cond; + + /* The name of the built-in function. */ + const char *name; + + /* Specifies how the function should be expanded. */ + enum loongarch_builtin_type builtin_type; + + /* The function's prototype. */ + enum loongarch_function_type function_type; + + /* Whether the function is available. */ + unsigned int (*avail) (void); +}; + +AVAIL_ALL (hard_float, TARGET_HARD_FLOAT_ABI) +AVAIL_ALL (lvz, TARGET_LVZ) + +/* Construct a loongarch_builtin_description from the given arguments. + + INSN is the name of the associated instruction pattern, without the + leading CODE_FOR_loongarch_. + + CODE is the floating-point condition code associated with the + function. It can be 'f' if the field is not applicable. + + NAME is the name of the function itself, without the leading + "__builtin_loongarch_". + + BUILTIN_TYPE and FUNCTION_TYPE are loongarch_builtin_description fields. + + AVAIL is the name of the availability predicate, without the leading + loongarch_builtin_avail_. */ +#define LARCH_BUILTIN(INSN, COND, NAME, BUILTIN_TYPE, \ + FUNCTION_TYPE, AVAIL) \ + { CODE_FOR_loongarch_ ## INSN, LARCH_FP_COND_ ## COND, \ + "__builtin_loongarch_" NAME, BUILTIN_TYPE, FUNCTION_TYPE, \ + loongarch_builtin_avail_ ## AVAIL } + +/* Define __builtin_loongarch_, which is a LARCH_BUILTIN_DIRECT function + mapped to instruction CODE_FOR_loongarch_, FUNCTION_TYPE and AVAIL + are as for LARCH_BUILTIN. */ +#define DIRECT_BUILTIN(INSN, FUNCTION_TYPE, AVAIL) \ + LARCH_BUILTIN (INSN, f, #INSN, LARCH_BUILTIN_DIRECT, FUNCTION_TYPE, AVAIL) + +/* Define __builtin_loongarch_, which is a LARCH_BUILTIN_DIRECT_NO_TARGET + function mapped to instruction CODE_FOR_loongarch_, FUNCTION_TYPE + and AVAIL are as for LARCH_BUILTIN. */ +#define DIRECT_NO_TARGET_BUILTIN(INSN, FUNCTION_TYPE, AVAIL) \ + LARCH_BUILTIN (INSN, f, #INSN, LARCH_BUILTIN_DIRECT_NO_TARGET, \ + FUNCTION_TYPE, AVAIL) + +/* Define an LVZ LARCH_BUILTIN_DIRECT function __builtin_lvz_ + for instruction CODE_FOR_lvz_. FUNCTION_TYPE is a builtin_description + field. */ +#define LVZ_BUILTIN(INSN, FUNCTION_TYPE) \ + { CODE_FOR_lvz_ ## INSN, LARCH_FP_COND_f, \ + "__builtin_lvz_" #INSN, LARCH_BUILTIN_DIRECT, \ + FUNCTION_TYPE, loongarch_builtin_avail_lvz } + + /* Loongson support loongarch64r6 */ +#define CODE_FOR_loongarch_fmax_sf CODE_FOR_smaxsf3 +#define CODE_FOR_loongarch_fmax_df CODE_FOR_smaxdf3 +#define CODE_FOR_loongarch_fmin_sf CODE_FOR_sminsf3 +#define CODE_FOR_loongarch_fmin_df CODE_FOR_smindf3 +#define CODE_FOR_loongarch_fmaxa_sf CODE_FOR_smaxasf3 +#define CODE_FOR_loongarch_fmaxa_df CODE_FOR_smaxadf3 +#define CODE_FOR_loongarch_fmina_sf CODE_FOR_sminasf3 +#define CODE_FOR_loongarch_fmina_df CODE_FOR_sminadf3 +#define CODE_FOR_loongarch_fclass_s CODE_FOR_fclass_s +#define CODE_FOR_loongarch_fclass_d CODE_FOR_fclass_d +#define CODE_FOR_loongarch_frint_s CODE_FOR_frint_s +#define CODE_FOR_loongarch_frint_d CODE_FOR_frint_d +#define CODE_FOR_loongarch_bytepick_w CODE_FOR_bytepick_w +#define CODE_FOR_loongarch_bytepick_d CODE_FOR_bytepick_d +#define CODE_FOR_loongarch_bitrev_4b CODE_FOR_bitrev_4b +#define CODE_FOR_loongarch_bitrev_8b CODE_FOR_bitrev_8b + +/* Loongson support crc */ +#define CODE_FOR_loongarch_crc_w_b_w CODE_FOR_crc_w_b_w +#define CODE_FOR_loongarch_crc_w_h_w CODE_FOR_crc_w_h_w +#define CODE_FOR_loongarch_crc_w_w_w CODE_FOR_crc_w_w_w +#define CODE_FOR_loongarch_crc_w_d_w CODE_FOR_crc_w_d_w +#define CODE_FOR_loongarch_crcc_w_b_w CODE_FOR_crcc_w_b_w +#define CODE_FOR_loongarch_crcc_w_h_w CODE_FOR_crcc_w_h_w +#define CODE_FOR_loongarch_crcc_w_w_w CODE_FOR_crcc_w_w_w +#define CODE_FOR_loongarch_crcc_w_d_w CODE_FOR_crcc_w_d_w + +/* Privileged state instruction */ +#define CODE_FOR_loongarch_cpucfg CODE_FOR_cpucfg +#define CODE_FOR_loongarch_asrtle_d CODE_FOR_asrtle_d +#define CODE_FOR_loongarch_asrtgt_d CODE_FOR_asrtgt_d +#define CODE_FOR_loongarch_csrrd CODE_FOR_csrrd +#define CODE_FOR_loongarch_dcsrrd CODE_FOR_dcsrrd +#define CODE_FOR_loongarch_csrwr CODE_FOR_csrwr +#define CODE_FOR_loongarch_dcsrwr CODE_FOR_dcsrwr +#define CODE_FOR_loongarch_csrxchg CODE_FOR_csrxchg +#define CODE_FOR_loongarch_dcsrxchg CODE_FOR_dcsrxchg +#define CODE_FOR_loongarch_iocsrrd_b CODE_FOR_iocsrrd_b +#define CODE_FOR_loongarch_iocsrrd_h CODE_FOR_iocsrrd_h +#define CODE_FOR_loongarch_iocsrrd_w CODE_FOR_iocsrrd_w +#define CODE_FOR_loongarch_iocsrrd_d CODE_FOR_iocsrrd_d +#define CODE_FOR_loongarch_iocsrwr_b CODE_FOR_iocsrwr_b +#define CODE_FOR_loongarch_iocsrwr_h CODE_FOR_iocsrwr_h +#define CODE_FOR_loongarch_iocsrwr_w CODE_FOR_iocsrwr_w +#define CODE_FOR_loongarch_iocsrwr_d CODE_FOR_iocsrwr_d +#define CODE_FOR_loongarch_lddir CODE_FOR_lddir +#define CODE_FOR_loongarch_dlddir CODE_FOR_dlddir +#define CODE_FOR_loongarch_ldpte CODE_FOR_ldpte +#define CODE_FOR_loongarch_dldpte CODE_FOR_dldpte +#define CODE_FOR_loongarch_cacop CODE_FOR_cacop +#define CODE_FOR_loongarch_dcacop CODE_FOR_dcacop +#define CODE_FOR_loongarch_dbar CODE_FOR_dbar +#define CODE_FOR_loongarch_ibar CODE_FOR_ibar + +static const struct loongarch_builtin_description loongarch_builtins[] = { +#define LARCH_MOVFCSR2GR 0 + DIRECT_BUILTIN (movfcsr2gr, LARCH_USI_FTYPE_UQI, hard_float), +#define LARCH_MOVGR2FCSR 1 + DIRECT_NO_TARGET_BUILTIN (movgr2fcsr, LARCH_VOID_FTYPE_UQI_USI, hard_float), + + DIRECT_NO_TARGET_BUILTIN (cacop, LARCH_VOID_FTYPE_USI_USI_SI, default), + DIRECT_NO_TARGET_BUILTIN (dcacop, LARCH_VOID_FTYPE_USI_UDI_SI, default), + DIRECT_NO_TARGET_BUILTIN (dbar, LARCH_VOID_FTYPE_USI, default), + DIRECT_NO_TARGET_BUILTIN (ibar, LARCH_VOID_FTYPE_USI, default), + + DIRECT_BUILTIN (fmax_sf, LARCH_SF_FTYPE_SF_SF, hard_float), + DIRECT_BUILTIN (fmax_df, LARCH_DF_FTYPE_DF_DF, hard_float), + DIRECT_BUILTIN (fmin_sf, LARCH_SF_FTYPE_SF_SF, hard_float), + DIRECT_BUILTIN (fmin_df, LARCH_DF_FTYPE_DF_DF, hard_float), + DIRECT_BUILTIN (fmaxa_sf, LARCH_SF_FTYPE_SF_SF, hard_float), + DIRECT_BUILTIN (fmaxa_df, LARCH_DF_FTYPE_DF_DF, hard_float), + DIRECT_BUILTIN (fmina_sf, LARCH_SF_FTYPE_SF_SF, hard_float), + DIRECT_BUILTIN (fmina_df, LARCH_DF_FTYPE_DF_DF, hard_float), + DIRECT_BUILTIN (fclass_s, LARCH_SF_FTYPE_SF, hard_float), + DIRECT_BUILTIN (fclass_d, LARCH_DF_FTYPE_DF, hard_float), + DIRECT_BUILTIN (frint_s, LARCH_SF_FTYPE_SF, hard_float), + DIRECT_BUILTIN (frint_d, LARCH_DF_FTYPE_DF, hard_float), + DIRECT_BUILTIN (bytepick_w, LARCH_SI_FTYPE_SI_SI_QI, default), + DIRECT_BUILTIN (bytepick_d, LARCH_DI_FTYPE_DI_DI_QI, default), + DIRECT_BUILTIN (bitrev_4b, LARCH_SI_FTYPE_SI, default), + DIRECT_BUILTIN (bitrev_8b, LARCH_DI_FTYPE_DI, default), + DIRECT_BUILTIN (cpucfg, LARCH_USI_FTYPE_USI, default), + DIRECT_BUILTIN (asrtle_d, LARCH_VOID_FTYPE_DI_DI, default), + DIRECT_BUILTIN (asrtgt_d, LARCH_VOID_FTYPE_DI_DI, default), + DIRECT_BUILTIN (dlddir, LARCH_DI_FTYPE_DI_UQI, default), + DIRECT_BUILTIN (lddir, LARCH_SI_FTYPE_SI_UQI, default), + DIRECT_NO_TARGET_BUILTIN (dldpte, LARCH_VOID_FTYPE_DI_UQI, default), + DIRECT_NO_TARGET_BUILTIN (ldpte, LARCH_VOID_FTYPE_SI_UQI, default), + + /* CRC Instrinsic */ + + DIRECT_BUILTIN (crc_w_b_w, LARCH_SI_FTYPE_QI_SI, default), + DIRECT_BUILTIN (crc_w_h_w, LARCH_SI_FTYPE_HI_SI, default), + DIRECT_BUILTIN (crc_w_w_w, LARCH_SI_FTYPE_SI_SI, default), + DIRECT_BUILTIN (crc_w_d_w, LARCH_SI_FTYPE_DI_SI, default), + DIRECT_BUILTIN (crcc_w_b_w, LARCH_SI_FTYPE_QI_SI, default), + DIRECT_BUILTIN (crcc_w_h_w, LARCH_SI_FTYPE_HI_SI, default), + DIRECT_BUILTIN (crcc_w_w_w, LARCH_SI_FTYPE_SI_SI, default), + DIRECT_BUILTIN (crcc_w_d_w, LARCH_SI_FTYPE_DI_SI, default), + + /* Built-in functions for LVZ. */ + LVZ_BUILTIN (gcsrrd, LARCH_USI_FTYPE_USI), + LVZ_BUILTIN (gcsrwr, LARCH_USI_FTYPE_USI_USI), + LVZ_BUILTIN (gcsrxchg, LARCH_USI_FTYPE_USI_USI_USI), + LVZ_BUILTIN (dgcsrrd, LARCH_UDI_FTYPE_USI), + LVZ_BUILTIN (dgcsrwr, LARCH_UDI_FTYPE_UDI_USI), + LVZ_BUILTIN (dgcsrxchg, LARCH_UDI_FTYPE_UDI_UDI_USI), + + DIRECT_BUILTIN (csrrd, LARCH_USI_FTYPE_USI, default), + DIRECT_BUILTIN (dcsrrd, LARCH_UDI_FTYPE_USI, default), + DIRECT_BUILTIN (csrwr, LARCH_USI_FTYPE_USI_USI, default), + DIRECT_BUILTIN (dcsrwr, LARCH_UDI_FTYPE_UDI_USI, default), + DIRECT_BUILTIN (csrxchg, LARCH_USI_FTYPE_USI_USI_USI, default), + DIRECT_BUILTIN (dcsrxchg, LARCH_UDI_FTYPE_UDI_UDI_USI, default), + DIRECT_BUILTIN (iocsrrd_b, LARCH_UQI_FTYPE_USI, default), + DIRECT_BUILTIN (iocsrrd_h, LARCH_UHI_FTYPE_USI, default), + DIRECT_BUILTIN (iocsrrd_w, LARCH_USI_FTYPE_USI, default), + DIRECT_BUILTIN (iocsrrd_d, LARCH_UDI_FTYPE_USI, default), + DIRECT_NO_TARGET_BUILTIN (iocsrwr_b, LARCH_VOID_FTYPE_UQI_USI, default), + DIRECT_NO_TARGET_BUILTIN (iocsrwr_h, LARCH_VOID_FTYPE_UHI_USI, default), + DIRECT_NO_TARGET_BUILTIN (iocsrwr_w, LARCH_VOID_FTYPE_USI_USI, default), + DIRECT_NO_TARGET_BUILTIN (iocsrwr_d, LARCH_VOID_FTYPE_UDI_USI, default), +}; + +/* Index I is the function declaration for loongarch_builtins[I], or null if the + function isn't defined on this target. */ +static GTY(()) tree loongarch_builtin_decls[ARRAY_SIZE (loongarch_builtins)]; +/* Get the index I of the function declaration for loongarch_builtin_decls[I] + using the instruction code or return null if not defined for the target. */ +static GTY(()) int loongarch_get_builtin_decl_index[NUM_INSN_CODES]; + +/* Return a type for 'const volatile void *'. */ + +static tree +loongarch_build_cvpointer_type (void) +{ + static tree cache; + + if (cache == NULL_TREE) + cache = build_pointer_type (build_qualified_type + (void_type_node, + TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE)); + return cache; +} + +/* Source-level argument types. */ +#define LARCH_ATYPE_VOID void_type_node +#define LARCH_ATYPE_INT integer_type_node +#define LARCH_ATYPE_POINTER ptr_type_node +#define LARCH_ATYPE_CVPOINTER loongarch_build_cvpointer_type () + +/* Standard mode-based argument types. */ +#define LARCH_ATYPE_QI intQI_type_node +#define LARCH_ATYPE_UQI unsigned_intQI_type_node +#define LARCH_ATYPE_HI intHI_type_node +#define LARCH_ATYPE_UHI unsigned_intHI_type_node +#define LARCH_ATYPE_SI intSI_type_node +#define LARCH_ATYPE_USI unsigned_intSI_type_node +#define LARCH_ATYPE_DI intDI_type_node +#define LARCH_ATYPE_UDI unsigned_intDI_type_node +#define LARCH_ATYPE_SF float_type_node +#define LARCH_ATYPE_DF double_type_node + +/* LARCH_FTYPE_ATYPESN takes N LARCH_FTYPES-like type codes and lists + their associated LARCH_ATYPEs. */ +#define LARCH_FTYPE_ATYPES1(A, B) \ + LARCH_ATYPE_##A, LARCH_ATYPE_##B + +#define LARCH_FTYPE_ATYPES2(A, B, C) \ + LARCH_ATYPE_##A, LARCH_ATYPE_##B, LARCH_ATYPE_##C + +#define LARCH_FTYPE_ATYPES3(A, B, C, D) \ + LARCH_ATYPE_##A, LARCH_ATYPE_##B, LARCH_ATYPE_##C, LARCH_ATYPE_##D + +#define LARCH_FTYPE_ATYPES4(A, B, C, D, E) \ + LARCH_ATYPE_##A, LARCH_ATYPE_##B, LARCH_ATYPE_##C, LARCH_ATYPE_##D, \ + LARCH_ATYPE_##E + +/* Return the function type associated with function prototype TYPE. */ + +static tree +loongarch_build_function_type (enum loongarch_function_type type) +{ + static tree types[(int) LARCH_MAX_FTYPE_MAX]; + + if (types[(int) type] == NULL_TREE) + switch (type) + { +#define DEF_LARCH_FTYPE(NUM, ARGS) \ + case LARCH_FTYPE_NAME##NUM ARGS: \ + types[(int) type] \ + = build_function_type_list (LARCH_FTYPE_ATYPES##NUM ARGS, \ + NULL_TREE); \ + break; +#include "config/loongarch/loongarch-ftypes.def" +#undef DEF_LARCH_FTYPE + default: + gcc_unreachable (); + } + + return types[(int) type]; +} + +/* Implement TARGET_INIT_BUILTINS. */ + +void +loongarch_init_builtins (void) +{ + const struct loongarch_builtin_description *d; + unsigned int i; + + /* Iterate through all of the bdesc arrays, initializing all of the + builtin functions. */ + for (i = 0; i < ARRAY_SIZE (loongarch_builtins); i++) + { + d = &loongarch_builtins[i]; + if (d->avail ()) + { + loongarch_builtin_decls[i] + = add_builtin_function (d->name, + loongarch_build_function_type (d->function_type), + i, BUILT_IN_MD, NULL, NULL); + loongarch_get_builtin_decl_index[d->icode] = i; + } + } +} + +/* Implement TARGET_BUILTIN_DECL. */ + +tree +loongarch_builtin_decl (unsigned int code, bool initialize_p ATTRIBUTE_UNUSED) +{ + if (code >= ARRAY_SIZE (loongarch_builtins)) + return error_mark_node; + return loongarch_builtin_decls[code]; +} + +/* Take argument ARGNO from EXP's argument list and convert it into + an expand operand. Store the operand in *OP. */ + +static void +loongarch_prepare_builtin_arg (struct expand_operand *op, tree exp, + unsigned int argno) +{ + tree arg; + rtx value; + + arg = CALL_EXPR_ARG (exp, argno); + value = expand_normal (arg); + create_input_operand (op, value, TYPE_MODE (TREE_TYPE (arg))); +} + +/* Expand instruction ICODE as part of a built-in function sequence. + Use the first NOPS elements of OPS as the instruction's operands. + HAS_TARGET_P is true if operand 0 is a target; it is false if the + instruction has no target. + + Return the target rtx if HAS_TARGET_P, otherwise return const0_rtx. */ + +static rtx +loongarch_expand_builtin_insn (enum insn_code icode, unsigned int nops, + struct expand_operand *ops, bool has_target_p) +{ + int error_opno = 0, rangelo = 0, rangehi =0 ; + + switch(icode){ + case CODE_FOR_csrrd: + case CODE_FOR_dcsrrd: + case CODE_FOR_csrwr: + case CODE_FOR_dcsrwr: + case CODE_FOR_csrxchg: + case CODE_FOR_dcsrxchg: + case CODE_FOR_iocsrrd_b: + case CODE_FOR_iocsrrd_h: + case CODE_FOR_iocsrrd_w: + case CODE_FOR_iocsrrd_d: + case CODE_FOR_iocsrwr_b: + case CODE_FOR_iocsrwr_h: + case CODE_FOR_iocsrwr_w: + case CODE_FOR_iocsrwr_d: + if (!maybe_expand_insn (icode, nops, ops)) + { + error ("invalid argument to built-in function"); + return has_target_p ? gen_reg_rtx (ops[0].mode) : const0_rtx; + } + emit_barrier(); + break; + default: + break; + } + + if (error_opno != 0) + { + error ("argument %d to the built-in must be a constant" + " in range %d to %d", error_opno, rangelo, rangehi); + return has_target_p ? gen_reg_rtx (ops[0].mode) : const0_rtx; + } + else if (!maybe_expand_insn (icode, nops, ops)) + { + error ("invalid argument to built-in function"); + return has_target_p ? gen_reg_rtx (ops[0].mode) : const0_rtx; + } + + return has_target_p ? ops[0].value : const0_rtx; +} + +/* Expand a LARCH_BUILTIN_DIRECT or LARCH_BUILTIN_DIRECT_NO_TARGET function; + HAS_TARGET_P says which. EXP is the CALL_EXPR that calls the function + and ICODE is the code of the associated .md pattern. TARGET, if nonnull, + suggests a good place to put the result. */ + +static rtx +loongarch_expand_builtin_direct (enum insn_code icode, rtx target, tree exp, + bool has_target_p) +{ + struct expand_operand ops[MAX_RECOG_OPERANDS]; + int opno, argno; + + /* Map any target to operand 0. */ + opno = 0; + if (has_target_p) + create_output_operand (&ops[opno++], target, TYPE_MODE (TREE_TYPE (exp))); + + /* Map the arguments to the other operands. */ + gcc_assert (opno + call_expr_nargs (exp) + == insn_data[icode].n_generator_args); + for (argno = 0; argno < call_expr_nargs (exp); argno++) + loongarch_prepare_builtin_arg (&ops[opno++], exp, argno); + + return loongarch_expand_builtin_insn (icode, opno, ops, has_target_p); +} + +/* Implement TARGET_EXPAND_BUILTIN. */ + +rtx +loongarch_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, + machine_mode mode, int ignore) +{ + tree fndecl; + unsigned int fcode, avail; + const struct loongarch_builtin_description *d; + + fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + fcode = DECL_FUNCTION_CODE (fndecl); + gcc_assert (fcode < ARRAY_SIZE (loongarch_builtins)); + d = &loongarch_builtins[fcode]; + avail = d->avail (); + gcc_assert (avail != 0); + switch (d->builtin_type) + { + case LARCH_BUILTIN_DIRECT: + return loongarch_expand_builtin_direct (d->icode, target, exp, true); + + case LARCH_BUILTIN_DIRECT_NO_TARGET: + return loongarch_expand_builtin_direct (d->icode, target, exp, false); + + } + gcc_unreachable (); +} + +/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */ + +void +loongarch_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) +{ + if (!TARGET_HARD_FLOAT_ABI) + return; + tree exceptions_var = create_tmp_var_raw (LARCH_ATYPE_USI); + tree fcsr_orig_var = create_tmp_var_raw (LARCH_ATYPE_USI); + tree fcsr_mod_var = create_tmp_var_raw (LARCH_ATYPE_USI); + tree const0 = build_int_cst (LARCH_ATYPE_UQI, 0); + tree get_fcsr = loongarch_builtin_decls[LARCH_MOVFCSR2GR]; + tree set_fcsr = loongarch_builtin_decls[LARCH_MOVGR2FCSR]; + tree get_fcsr_hold_call = build_call_expr (get_fcsr, 1, const0); + tree hold_assign_orig = build2 (MODIFY_EXPR, LARCH_ATYPE_USI, + fcsr_orig_var, get_fcsr_hold_call); + tree hold_mod_val = build2 (BIT_AND_EXPR, LARCH_ATYPE_USI, fcsr_orig_var, + build_int_cst (LARCH_ATYPE_USI, 0xffe0ffe0)); + tree hold_assign_mod = build2 (MODIFY_EXPR, LARCH_ATYPE_USI, + fcsr_mod_var, hold_mod_val); + tree set_fcsr_hold_call = build_call_expr (set_fcsr, 2, const0, fcsr_mod_var); + tree hold_all = build2 (COMPOUND_EXPR, LARCH_ATYPE_USI, + hold_assign_orig, hold_assign_mod); + *hold = build2 (COMPOUND_EXPR, void_type_node, hold_all, + set_fcsr_hold_call); + + *clear = build_call_expr (set_fcsr, 2, const0, fcsr_mod_var); + + tree get_fcsr_update_call = build_call_expr (get_fcsr, 1, const0); + *update = build2 (MODIFY_EXPR, LARCH_ATYPE_USI, + exceptions_var, get_fcsr_update_call); + tree set_fcsr_update_call = build_call_expr (set_fcsr, 2, const0, fcsr_orig_var); + *update = build2 (COMPOUND_EXPR, void_type_node, *update, + set_fcsr_update_call); + tree atomic_feraiseexcept + = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT); + tree int_exceptions_var = fold_convert (integer_type_node, + exceptions_var); + tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept, + 1, int_exceptions_var); + *update = build2 (COMPOUND_EXPR, void_type_node, *update, + atomic_feraiseexcept_call); +} + +/* Implement TARGET_BUILTIN_VA_LIST. */ + +tree +loongarch_build_builtin_va_list (void) +{ + return ptr_type_node; +} + diff --git a/gcc/config/loongarch/loongarch-c.c b/gcc/config/loongarch/loongarch-c.c new file mode 100644 index 0000000000000000000000000000000000000000..4d3b0c8ca4e021c1ead18c5c795610c8bfa80e3e --- /dev/null +++ b/gcc/config/loongarch/loongarch-c.c @@ -0,0 +1,117 @@ +/* LOONGARCH-specific code for C family languages. + Copyright (C) 2011-2018 Free Software Foundation, Inc. + Contributed by Andrew Waterman (zhouyingkun@mail.loongson.cn). + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#define IN_TARGET_CODE 1 + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "c-family/c-common.h" +#include "cpplib.h" + +#define preprocessing_asm_p() (cpp_get_options (pfile)->lang == CLK_ASM) +#define builtin_define(TXT) cpp_define (pfile, TXT) +#define builtin_assert(TXT) cpp_assert (pfile, TXT) + +/* TODO: what is the pfile technique ??? !!! */ + +void loongarch_cpu_cpp_builtins (cpp_reader *pfile) +{ + builtin_assert ("machine=loongarch"); + builtin_assert ("cpu=loongarch"); + builtin_define ("__loongarch__"); + + if (TARGET_FLOAT64) + builtin_define ("__loongarch_fpr=64"); + else + builtin_define ("__loongarch_fpr=32"); + + LARCH_CPP_SET_PROCESSOR ("_LOONGARCH_ARCH", loongarch_arch_info); + LARCH_CPP_SET_PROCESSOR ("_LOONGARCH_TUNE", loongarch_tune_info); + + + switch (loongarch_abi) + { + case ABILP32: + builtin_define ("_ABILP32=1"); + builtin_define ("_LOONGARCH_SIM=_ABILP32"); + builtin_define ("__loongarch32"); + break; + + case ABILPX32: + builtin_define ("_ABILPX32=2"); + builtin_define ("_LOONGARCH_SIM=_ABILPX32"); + break; + + case ABILP64: + builtin_define ("_ABILP64=3"); + builtin_define ("_LOONGARCH_SIM=_ABILP64"); + builtin_define ("__loongarch64"); + builtin_define ("__loongarch64__"); + break; + } + + builtin_define_with_int_value ("_LOONGARCH_SZINT", INT_TYPE_SIZE); + builtin_define_with_int_value ("_LOONGARCH_SZLONG", LONG_TYPE_SIZE); + builtin_define_with_int_value ("_LOONGARCH_SZPTR", POINTER_SIZE); + builtin_define_with_int_value ("_LOONGARCH_FPSET", + 32 / MAX_FPRS_PER_FMT); + builtin_define_with_int_value ("_LOONGARCH_SPFPSET", + 32); + + /* These defines reflect the ABI in use, not whether the + FPU is directly accessible. */ + if (TARGET_NO_FLOAT) + builtin_define ("__loongarch_no_float"); + else if (TARGET_HARD_FLOAT_ABI) + builtin_define ("__loongarch_hard_float"); + else + builtin_define ("__loongarch_soft_float"); + + if (TARGET_SINGLE_FLOAT) + builtin_define ("__loongarch_single_float"); + + /* Macros dependent on the C dialect. */ + if (preprocessing_asm_p ()) + { + builtin_define_std ("LANGUAGE_ASSEMBLY"); + builtin_define ("_LANGUAGE_ASSEMBLY"); + } + else if (c_dialect_cxx ()) + { + builtin_define ("_LANGUAGE_C_PLUS_PLUS"); + builtin_define ("__LANGUAGE_C_PLUS_PLUS"); + builtin_define ("__LANGUAGE_C_PLUS_PLUS__"); + } + else + { + builtin_define_std ("LANGUAGE_C"); + builtin_define ("_LANGUAGE_C"); + } + if (c_dialect_objc ()) + { + builtin_define ("_LANGUAGE_OBJECTIVE_C"); + builtin_define ("__LANGUAGE_OBJECTIVE_C"); + /* Bizarre, but retained for backwards compatibility. */ + builtin_define_std ("LANGUAGE_C"); + builtin_define ("_LANGUAGE_C"); + } +} diff --git a/gcc/config/loongarch/loongarch-cpus.def b/gcc/config/loongarch/loongarch-cpus.def new file mode 100644 index 0000000000000000000000000000000000000000..71b749b2dce15dde79f95db1d9995dae4f0c0926 --- /dev/null +++ b/gcc/config/loongarch/loongarch-cpus.def @@ -0,0 +1,39 @@ +/* LARCH CPU names. + Copyright (C) 1989-2018 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* A table describing all the processors GCC knows about. The first + mention of an ISA level is taken as the canonical name for that + ISA. + + To ease comparison, please keep this table in the same order + as GAS's loongarch_cpu_info_table. Please also make sure that + LARCH_ISA_LEVEL_SPEC and LARCH_ARCH_FLOAT_SPEC handle all -march + options correctly. + + Before including this file, define a macro: + + LARCH_CPU (NAME, CPU, ISA, FLAGS) + + where the arguments are the fields of struct loongarch_cpu_info. */ + +/* Entries for generic ISAs. */ +LARCH_CPU ("loongarch", PROCESSOR_LOONGARCH64, 65, 0) +LARCH_CPU ("loongarch64", PROCESSOR_LOONGARCH64, 65, 0) +LARCH_CPU ("gs464v", PROCESSOR_GS464V, 65, 0) + diff --git a/gcc/config/loongarch/loongarch-d.c b/gcc/config/loongarch/loongarch-d.c new file mode 100644 index 0000000000000000000000000000000000000000..971e5d33eec427fec344873ae1287f289f989700 --- /dev/null +++ b/gcc/config/loongarch/loongarch-d.c @@ -0,0 +1,31 @@ +/* Subroutines for the D front end on the LARCH architecture. + Copyright (C) 2017 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "d/d-target.h" +#include "d/d-target-def.h" + +/* Implement TARGET_D_CPU_VERSIONS for LARCH targets. */ + +void +loongarch_d_target_versions (void) +{ + // need to be improved !! +} diff --git a/gcc/config/loongarch/loongarch-ftypes.def b/gcc/config/loongarch/loongarch-ftypes.def new file mode 100644 index 0000000000000000000000000000000000000000..c000fedeb8f475c24315c3727d470e7f0116c591 --- /dev/null +++ b/gcc/config/loongarch/loongarch-ftypes.def @@ -0,0 +1,93 @@ +/* Definitions of prototypes for LARCH built-in functions. -*- C -*- + Copyright (C) 2007-2018 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* Invoke DEF_LARCH_FTYPE (NARGS, LIST) for each prototype used by + LARCH built-in functions, where: + + NARGS is the number of arguments. + LIST contains the return-type code followed by the codes for each + argument type. + + Argument- and return-type codes are either modes or one of the following: + + VOID for void_type_node + INT for integer_type_node + POINTER for ptr_type_node + + (we don't use PTR because that's a ANSI-compatibillity macro). + + Please keep this list lexicographically sorted by the LIST argument. */ + +DEF_LARCH_FTYPE (1, (DF, DF)) +DEF_LARCH_FTYPE (2, (DF, DF, DF)) +DEF_LARCH_FTYPE (1, (DI, DI)) +DEF_LARCH_FTYPE (1, (DI, SI)) +DEF_LARCH_FTYPE (1, (DI, UQI)) +DEF_LARCH_FTYPE (1, (UDI, USI)) +DEF_LARCH_FTYPE (1, (UQI, USI)) +DEF_LARCH_FTYPE (1, (USI, UQI)) +DEF_LARCH_FTYPE (1, (UHI, USI)) +DEF_LARCH_FTYPE (2, (DI, DI, DI)) +DEF_LARCH_FTYPE (2, (DI, DI, SI)) +DEF_LARCH_FTYPE (2, (DI, DI, UQI)) +DEF_LARCH_FTYPE (2, (VOID, DI, UQI)) +DEF_LARCH_FTYPE (2, (VOID, SI, UQI)) +DEF_LARCH_FTYPE (2, (UDI, UDI, USI)) +DEF_LARCH_FTYPE (3, (DI, DI, SI, SI)) +DEF_LARCH_FTYPE (3, (DI, DI, USI, USI)) +DEF_LARCH_FTYPE (3, (DI, DI, DI, QI)) +DEF_LARCH_FTYPE (3, (UDI, UDI, UDI, USI)) +DEF_LARCH_FTYPE (2, (DI, POINTER, SI)) +DEF_LARCH_FTYPE (2, (DI, SI, SI)) +DEF_LARCH_FTYPE (2, (DI, USI, USI)) +DEF_LARCH_FTYPE (2, (INT, DF, DF)) +DEF_LARCH_FTYPE (2, (INT, SF, SF)) +DEF_LARCH_FTYPE (1, (SF, SF)) +DEF_LARCH_FTYPE (2, (SF, SF, SF)) +DEF_LARCH_FTYPE (2, (SI, DI, SI)) +DEF_LARCH_FTYPE (2, (SI, POINTER, SI)) +DEF_LARCH_FTYPE (1, (SI, SI)) +DEF_LARCH_FTYPE (1, (USI, USI)) +DEF_LARCH_FTYPE (1, (SI, UDI)) +DEF_LARCH_FTYPE (2, (QI, QI, QI)) +DEF_LARCH_FTYPE (2, (HI, HI, HI)) +DEF_LARCH_FTYPE (2, (SI, SI, SI)) +DEF_LARCH_FTYPE (2, (SI, QI, SI)) +DEF_LARCH_FTYPE (2, (SI, HI, SI)) +DEF_LARCH_FTYPE (2, (SI, SI, UQI)) +DEF_LARCH_FTYPE (2, (USI, USI, USI)) +DEF_LARCH_FTYPE (3, (SI, SI, SI, SI)) +DEF_LARCH_FTYPE (3, (SI, SI, SI, QI)) +DEF_LARCH_FTYPE (3, (USI, USI, USI, USI)) +DEF_LARCH_FTYPE (1, (SI, UQI)) +DEF_LARCH_FTYPE (1, (SI, VOID)) +DEF_LARCH_FTYPE (2, (UDI, UDI, UDI)) +DEF_LARCH_FTYPE (1, (USI, VOID)) +DEF_LARCH_FTYPE (2, (VOID, SI, CVPOINTER)) +DEF_LARCH_FTYPE (2, (VOID, SI, SI)) +DEF_LARCH_FTYPE (2, (VOID, DI, DI)) +DEF_LARCH_FTYPE (2, (VOID, UQI, SI)) +DEF_LARCH_FTYPE (1, (VOID, USI)) +DEF_LARCH_FTYPE (2, (VOID, USI, UQI)) +DEF_LARCH_FTYPE (2, (VOID, UQI, USI)) +DEF_LARCH_FTYPE (2, (VOID, UHI, USI)) +DEF_LARCH_FTYPE (2, (VOID, USI, USI)) +DEF_LARCH_FTYPE (2, (VOID, UDI, USI)) +DEF_LARCH_FTYPE (3, (VOID, USI, USI, SI)) +DEF_LARCH_FTYPE (3, (VOID, USI, UDI, SI)) diff --git a/gcc/config/loongarch/loongarch-modes.def b/gcc/config/loongarch/loongarch-modes.def new file mode 100644 index 0000000000000000000000000000000000000000..325db8dde23a069739b98f7b27d04900735acb83 --- /dev/null +++ b/gcc/config/loongarch/loongarch-modes.def @@ -0,0 +1,31 @@ +/* LARCH extra machine modes. + Copyright (C) 2003-2018 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +FLOAT_MODE (TF, 16, ieee_quad_format); + +VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */ + + +INT_MODE (OI, 32); + +/* Keep the OI modes from confusing the compiler into thinking + that these modes could actually be used for computation. They are + only holders for vectors during data movement. */ +#define MAX_BITSIZE_MODE_ANY_INT (128) + diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h new file mode 100644 index 0000000000000000000000000000000000000000..35bb6a407508d2fd742c15e7c397c8b69bf7c373 --- /dev/null +++ b/gcc/config/loongarch/loongarch-opts.h @@ -0,0 +1,38 @@ +/* Definitions for option handling for LARCH. + Copyright (C) 1989-2018 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifndef LARCH_OPTS_H +#define LARCH_OPTS_H + +/* No enumeration is defined to index the -march= values (entries in + loongarch_cpu_info_table), with the type int being used instead, but we + need to distinguish the special "from-abi" and "native" values. */ +#define LARCH_ARCH_OPTION_FROM_ABI -1 +#define LARCH_ARCH_OPTION_NATIVE -2 + + +enum loongarch_code_model { + LARCH_CMODEL_NORMAL, + LARCH_CMODEL_TINY, + LARCH_CMODEL_TINY_STATIC, + LARCH_CMODEL_LARGE, + LARCH_CMODEL_EXTREME +}; + +#endif diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h new file mode 100644 index 0000000000000000000000000000000000000000..2fec3e91872673a129c7a4a9c9ff0be103eb5289 --- /dev/null +++ b/gcc/config/loongarch/loongarch-protos.h @@ -0,0 +1,276 @@ +/* Prototypes of target machine for GNU compiler. LARCH version. + Copyright (C) 1989-2018 Free Software Foundation, Inc. + Contributed by A. Lichnewsky (lich@inria.inria.fr). + Changed by Michael Meissner (meissner@osf.org). + 64-bit r4000 support by Ian Lance Taylor (ian@cygnus.com) and + Brendan Eich (brendan@microunity.com). + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifndef GCC_LARCH_PROTOS_H +#define GCC_LARCH_PROTOS_H + +/* Describes how a symbol is used. + + SYMBOL_CONTEXT_CALL + The symbol is used as the target of a call instruction. + + SYMBOL_CONTEXT_LEA + The symbol is used in a load-address operation. + + SYMBOL_CONTEXT_MEM + The symbol is used as the address in a MEM. */ +enum loongarch_symbol_context { + SYMBOL_CONTEXT_CALL, + SYMBOL_CONTEXT_LEA, + SYMBOL_CONTEXT_MEM +}; + +/* Classifies a SYMBOL_REF, LABEL_REF or UNSPEC address. + + SYMBOL_GOT_DISP + The symbol's value will be loaded directly from the GOT. + + SYMBOL_TLS + A thread-local symbol. + + SYMBOL_TLSGD + SYMBOL_TLSLDM + UNSPEC wrappers around SYMBOL_TLS, corresponding to the + thread-local storage relocation operators. + */ +enum loongarch_symbol_type { + SYMBOL_GOT_DISP, + SYMBOL_TLS, + SYMBOL_TLSGD, + SYMBOL_TLSLDM +}; +#define NUM_SYMBOL_TYPES (SYMBOL_TLSLDM + 1) + +/* Classifies a type of call. + + LARCH_CALL_NORMAL + A normal call or call_value pattern. + + LARCH_CALL_SIBCALL + A sibcall or sibcall_value pattern. + + LARCH_CALL_EPILOGUE + A call inserted in the epilogue. */ +enum loongarch_call_type { + LARCH_CALL_NORMAL, + LARCH_CALL_SIBCALL, + LARCH_CALL_EPILOGUE +}; + +/* Controls the conditions under which certain instructions are split. + + SPLIT_IF_NECESSARY + Only perform splits that are necessary for correctness + (because no unsplit version exists). + + SPLIT_FOR_SPEED + Perform splits that are necessary for correctness or + beneficial for code speed. + + SPLIT_FOR_SIZE + Perform splits that are necessary for correctness or + beneficial for code size. */ +enum loongarch_split_type { + SPLIT_IF_NECESSARY, + SPLIT_FOR_SPEED, + SPLIT_FOR_SIZE +}; + +extern const char *const loongarch_fp_conditions[16]; + +/***********************/ +/* N_LARCH-PORT */ +/***********************/ +/* Routines implemented in n_loongarch.c. */ +extern rtx n_loongarch_emit_move (rtx, rtx); +extern const char *n_loongarch_output_gpr_save (unsigned); +extern void n_loongarch_set_return_address (rtx, rtx); +extern HOST_WIDE_INT n_loongarch_initial_elimination_offset (int, int); +extern void n_loongarch_expand_prologue (void); +extern void n_loongarch_expand_epilogue (bool); +extern bool n_loongarch_can_use_return_insn (void); +extern rtx n_loongarch_function_value (const_tree, const_tree, enum machine_mode); +/***********************/ +/* N_LARCH-PORT */ +/***********************/ + +extern bool loongarch_symbolic_constant_p (rtx, enum loongarch_symbol_context, + enum loongarch_symbol_type *); +extern int loongarch_regno_mode_ok_for_base_p (int, machine_mode, bool); +extern bool loongarch_stack_address_p (rtx, machine_mode); +extern int loongarch_address_insns (rtx, machine_mode, bool); +extern int loongarch_const_insns (rtx); +extern int loongarch_split_const_insns (rtx); +extern int loongarch_split_128bit_const_insns (rtx); +extern int loongarch_load_store_insns (rtx, rtx_insn *); +extern int loongarch_idiv_insns (machine_mode); +extern rtx_insn *loongarch_emit_move (rtx, rtx); +#ifdef RTX_CODE +extern void loongarch_emit_binary (enum rtx_code, rtx, rtx, rtx); +#endif +extern rtx loongarch_pic_base_register (rtx); +extern bool loongarch_split_symbol (rtx, rtx, machine_mode, rtx *); +extern rtx loongarch_unspec_address (rtx, enum loongarch_symbol_type); +extern rtx loongarch_strip_unspec_address (rtx); +extern void loongarch_move_integer (rtx, rtx, unsigned HOST_WIDE_INT); +extern bool loongarch_legitimize_move (machine_mode, rtx, rtx); +extern rtx loongarch_legitimize_call_address (rtx); + +extern rtx loongarch_subword (rtx, bool); +extern bool loongarch_split_move_p (rtx, rtx, enum loongarch_split_type); +extern void loongarch_split_move (rtx, rtx, enum loongarch_split_type, rtx); +extern bool loongarch_split_move_insn_p (rtx, rtx, rtx); +extern void loongarch_split_move_insn (rtx, rtx, rtx); +extern const char *loongarch_output_move (rtx, rtx); +extern bool loongarch_cfun_has_cprestore_slot_p (void); +extern bool loongarch_cprestore_address_p (rtx, bool); +#ifdef RTX_CODE +extern void loongarch_expand_scc (rtx *); +extern void loongarch_expand_conditional_branch (rtx *); +extern bool loongarch_expand_conditional_move (rtx *); +extern void loongarch_expand_conditional_trap (rtx); +#endif +extern bool loongarch_get_pic_call_symbol (rtx *, int); +extern void loongarch_set_return_address (rtx, rtx); +extern bool loongarch_move_by_pieces_p (unsigned HOST_WIDE_INT, unsigned int); +extern bool loongarch_store_by_pieces_p (unsigned HOST_WIDE_INT, unsigned int); +extern bool loongarch_expand_block_move (rtx, rtx, rtx); + +extern void loongarch_init_cumulative_args (CUMULATIVE_ARGS *, tree); +extern bool loongarch_pad_reg_upward (machine_mode, tree); + +extern bool loongarch_expand_ext_as_unaligned_load (rtx, rtx, HOST_WIDE_INT, + HOST_WIDE_INT, bool); +extern bool loongarch_expand_ins_as_unaligned_store (rtx, rtx, HOST_WIDE_INT, + HOST_WIDE_INT); +extern bool loongarch_mem_fits_mode_p (machine_mode mode, rtx x); +extern HOST_WIDE_INT loongarch_debugger_offset (rtx, HOST_WIDE_INT); + +extern void loongarch_push_asm_switch (struct loongarch_asm_switch *); +extern void loongarch_pop_asm_switch (struct loongarch_asm_switch *); +extern void loongarch_output_external (FILE *, tree, const char *); +extern void loongarch_output_ascii (FILE *, const char *, size_t); +extern void loongarch_output_aligned_decl_common (FILE *, tree, const char *, + unsigned HOST_WIDE_INT, + unsigned int); +extern void loongarch_declare_common_object (FILE *, const char *, + const char *, unsigned HOST_WIDE_INT, + unsigned int, bool); +extern void loongarch_declare_object (FILE *, const char *, const char *, + const char *, ...) ATTRIBUTE_PRINTF_4; +extern void loongarch_declare_object_name (FILE *, const char *, tree); +extern void loongarch_finish_declare_object (FILE *, tree, int, int); +extern void loongarch_set_text_contents_type (FILE *, const char *, + unsigned long, bool); + +extern bool loongarch_small_data_pattern_p (rtx); +extern rtx loongarch_rewrite_small_data (rtx); +extern HOST_WIDE_INT loongarch_initial_elimination_offset (int, int); +extern rtx loongarch_return_addr (int, rtx); +extern bool loongarch_must_initialize_gp_p (void); +extern void loongarch_emit_save_slot_move (rtx, rtx, rtx); +extern void loongarch_expand_prologue (void); +extern void loongarch_expand_epilogue (bool); +extern bool loongarch_can_use_return_insn (void); + +extern enum reg_class loongarch_secondary_reload_class (enum reg_class, + machine_mode, + rtx, bool); +extern int loongarch_class_max_nregs (enum reg_class, machine_mode); + +extern machine_mode loongarch_hard_regno_caller_save_mode (unsigned int, + unsigned int, + machine_mode); +extern int loongarch_adjust_insn_length (rtx_insn *, int); +extern const char *loongarch_output_conditional_branch (rtx_insn *, rtx *, + const char *, const char *); +extern const char *loongarch_output_order_conditional_branch (rtx_insn *, rtx *, + bool); +extern const char *loongarch_output_equal_conditional_branch (rtx_insn *, rtx *, + bool); +extern const char *loongarch_output_division (const char *, rtx *); +extern const char *loongarch_output_probe_stack_range (rtx, rtx); +extern bool loongarch_hard_regno_rename_ok (unsigned int, unsigned int); +extern bool loongarch_linked_madd_p (rtx_insn *, rtx_insn *); +extern bool loongarch_store_data_bypass_p (rtx_insn *, rtx_insn *); +extern int loongarch_dspalu_bypass_p (rtx, rtx); +extern rtx loongarch_prefetch_cookie (rtx, rtx); + +extern bool loongarch_global_symbol_p (const_rtx); +extern bool loongarch_global_symbol_noweak_p (const_rtx); +extern bool loongarch_weak_symbol_p (const_rtx); +extern bool loongarch_symbol_binds_local_p (const_rtx); + +extern const char *current_section_name (void); +extern unsigned int current_section_flags (void); +extern bool loongarch_use_ins_ext_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT); + +extern bool and_operands_ok (machine_mode, rtx, rtx); +extern bool loongarch_fmadd_bypass (rtx_insn *, rtx_insn *); + +union loongarch_gen_fn_ptrs +{ + rtx (*fn_8) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx); + rtx (*fn_7) (rtx, rtx, rtx, rtx, rtx, rtx, rtx); + rtx (*fn_6) (rtx, rtx, rtx, rtx, rtx, rtx); + rtx (*fn_5) (rtx, rtx, rtx, rtx, rtx); + rtx (*fn_4) (rtx, rtx, rtx, rtx); +}; + +extern void loongarch_expand_atomic_qihi (union loongarch_gen_fn_ptrs, + rtx, rtx, rtx, rtx, rtx); + +extern bool loongarch_signed_immediate_p (unsigned HOST_WIDE_INT, int, int); +extern bool loongarch_unsigned_immediate_p (unsigned HOST_WIDE_INT, int, int); +extern bool loongarch_load_store_pair_p (bool, rtx *); +extern bool loongarch_movep_target_p (rtx, rtx); +extern bool loongarch_12bit_offset_address_p (rtx, machine_mode); +extern bool loongarch_14bit_shifted_offset_address_p (rtx, machine_mode); +extern bool loongarch_9bit_offset_address_p (rtx, machine_mode); +extern rtx loongarch_expand_thread_pointer (rtx); + +extern bool loongarch_eh_uses (unsigned int); +extern bool loongarch_epilogue_uses (unsigned int); +extern int loongarch_trampoline_code_size (void); +extern bool loongarch_load_store_bonding_p (rtx *, machine_mode, bool); +extern bool loongarch_split_symbol_type (enum loongarch_symbol_type); + +typedef rtx (*mulsidi3_gen_fn) (rtx, rtx, rtx); + +extern void loongarch_register_frame_header_opt (void); + +extern void loongarch_declare_function_name(FILE *, const char *, tree); +/* Routines implemented in loongarch-d.c */ +extern void loongarch_d_target_versions (void); + +/* Routines implemented in loongarch-c.c. */ +void loongarch_cpu_cpp_builtins (cpp_reader *); + +extern void loongarch_init_builtins (void); +extern void loongarch_atomic_assign_expand_fenv (tree *, tree *, tree *); +extern tree loongarch_builtin_decl (unsigned int, bool); +extern rtx loongarch_expand_builtin (tree, rtx, rtx subtarget ATTRIBUTE_UNUSED, + machine_mode, int); +extern tree loongarch_build_builtin_va_list (void); + +#endif /* ! GCC_LARCH_PROTOS_H */ diff --git a/gcc/config/loongarch/loongarch-tables.opt b/gcc/config/loongarch/loongarch-tables.opt new file mode 100644 index 0000000000000000000000000000000000000000..19b781917950ac97d1c09735a6a43a4e0a1a641c --- /dev/null +++ b/gcc/config/loongarch/loongarch-tables.opt @@ -0,0 +1,50 @@ +; -*- buffer-read-only: t -*- +; Generated automatically by genopt.sh from loongarch-cpus.def. + +; Copyright (C) 2011-2018 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +Enum +Name(loongarch_arch_opt_value) Type(int) +Known LARCH CPUs (for use with the -march= and -mtune= options): + +Enum +Name(loongarch_loongarch_opt_value) Type(int) +Known LARCH ISA levels (for use with the -loongarch option): + +EnumValue +Enum(loongarch_arch_opt_value) String(from-abi) Value(LARCH_ARCH_OPTION_FROM_ABI) + +EnumValue +Enum(loongarch_arch_opt_value) String(native) Value(LARCH_ARCH_OPTION_NATIVE) DriverOnly + +EnumValue +Enum(loongarch_arch_opt_value) String(loongarch) Value(0) Canonical + +EnumValue +Enum(loongarch_loongarch_opt_value) String() Value(0) + +EnumValue +Enum(loongarch_arch_opt_value) String(loongarch64) Value(1) Canonical + +EnumValue +Enum(loongarch_loongarch_opt_value) String(64) Value(1) + +EnumValue +Enum(loongarch_arch_opt_value) String(gs464v) Value(2) Canonical + diff --git a/gcc/config/loongarch/loongarch.c b/gcc/config/loongarch/loongarch.c new file mode 100644 index 0000000000000000000000000000000000000000..22a8d776f0a707da62c2338076ce6d5b213a1866 --- /dev/null +++ b/gcc/config/loongarch/loongarch.c @@ -0,0 +1,9636 @@ +/* Subroutines used for LARCH code generation. + Copyright (C) 1989-2018 Free Software Foundation, Inc. + Contributed by A. Lichnewsky, lich@inria.inria.fr. + Changes by Michael Meissner, meissner@osf.org. + 64-bit r4000 support by Ian Lance Taylor, ian@cygnus.com, and + Brendan Eich, brendan@microunity.com. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#define IN_TARGET_CODE 1 + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "backend.h" +#include "target.h" +#include "rtl.h" +#include "tree.h" +#include "memmodel.h" +#include "gimple.h" +#include "cfghooks.h" +#include "df.h" +#include "tm_p.h" +#include "stringpool.h" +#include "attribs.h" +#include "optabs.h" +#include "regs.h" +#include "emit-rtl.h" +#include "recog.h" +#include "cgraph.h" +#include "diagnostic.h" +#include "insn-attr.h" +#include "output.h" +#include "alias.h" +#include "fold-const.h" +#include "varasm.h" +#include "stor-layout.h" +#include "calls.h" +#include "explow.h" +#include "expr.h" +#include "libfuncs.h" +#include "reload.h" +#include "common/common-target.h" +#include "langhooks.h" +#include "cfgrtl.h" +#include "cfganal.h" +#include "sched-int.h" +#include "gimplify.h" +#include "target-globals.h" +#include "tree-pass.h" +#include "context.h" +#include "builtins.h" +#include "rtl-iter.h" + +/* This file should be included last. */ +#include "target-def.h" + +/* True if X is an UNSPEC wrapper around a SYMBOL_REF or LABEL_REF. */ +#define UNSPEC_ADDRESS_P(X) \ + (GET_CODE (X) == UNSPEC \ + && XINT (X, 1) >= UNSPEC_ADDRESS_FIRST \ + && XINT (X, 1) < UNSPEC_ADDRESS_FIRST + NUM_SYMBOL_TYPES) + +/* Extract the symbol or label from UNSPEC wrapper X. */ +#define UNSPEC_ADDRESS(X) \ + XVECEXP (X, 0, 0) + +/* Extract the symbol type from UNSPEC wrapper X. */ +#define UNSPEC_ADDRESS_TYPE(X) \ + ((enum loongarch_symbol_type) (XINT (X, 1) - UNSPEC_ADDRESS_FIRST)) + +/* The maximum distance between the top of the stack frame and the + value $sp has when we save and restore registers. +*/ +#define LARCH_MAX_FIRST_STACK_STEP 0x7f0 + +/* True if INSN is a loongarch.md pattern or asm statement. */ +/* ??? This test exists through the compiler, perhaps it should be + moved to rtl.h. */ +#define USEFUL_INSN_P(INSN) \ + (NONDEBUG_INSN_P (INSN) \ + && GET_CODE (PATTERN (INSN)) != USE \ + && GET_CODE (PATTERN (INSN)) != CLOBBER) + +/* If INSN is a delayed branch sequence, return the first instruction + in the sequence, otherwise return INSN itself. */ +#define SEQ_BEGIN(INSN) \ + (INSN_P (INSN) && GET_CODE (PATTERN (INSN)) == SEQUENCE \ + ? as_a (XVECEXP (PATTERN (INSN), 0, 0)) \ + : (INSN)) + +/* Likewise for the last instruction in a delayed branch sequence. */ +#define SEQ_END(INSN) \ + (INSN_P (INSN) && GET_CODE (PATTERN (INSN)) == SEQUENCE \ + ? as_a (XVECEXP (PATTERN (INSN), \ + 0, \ + XVECLEN (PATTERN (INSN), 0) - 1)) \ + : (INSN)) + +/* Execute the following loop body with SUBINSN set to each instruction + between SEQ_BEGIN (INSN) and SEQ_END (INSN) inclusive. */ +#define FOR_EACH_SUBINSN(SUBINSN, INSN) \ + for ((SUBINSN) = SEQ_BEGIN (INSN); \ + (SUBINSN) != NEXT_INSN (SEQ_END (INSN)); \ + (SUBINSN) = NEXT_INSN (SUBINSN)) + +/* True if bit BIT is set in VALUE. */ +#define BITSET_P(VALUE, BIT) (((VALUE) & (1 << (BIT))) != 0) + +/* Classifies an address. + + ADDRESS_REG + A natural register + offset address. The register satisfies + loongarch_valid_base_register_p and the offset is a const_arith_operand. + + ADDRESS_CONST_INT + A signed 16-bit constant address. + + ADDRESS_SYMBOLIC: + A constant symbolic address. */ +enum loongarch_address_type { + ADDRESS_REG, + ADDRESS_CONST_INT, + ADDRESS_SYMBOLIC +}; + +/* A class used to control a comdat-style stub that we output in each + translation unit that needs it. */ +class loongarch_one_only_stub { +public: + virtual ~loongarch_one_only_stub () {} + + /* Return the name of the stub. */ + virtual const char *get_name () = 0; + + /* Output the body of the function to asm_out_file. */ + virtual void output_body () = 0; +}; + +/* Tuning information that is automatically derived from other sources + (such as the scheduler). */ +static struct { + /* The architecture and tuning settings that this structure describes. */ + enum processor arch; + enum processor tune; + + /* True if the structure has been initialized. */ + bool initialized_p; + +} loongarch_tuning_info; + +/* Information about an address described by loongarch_address_type. + + ADDRESS_CONST_INT + No fields are used. + + ADDRESS_REG + REG is the base register and OFFSET is the constant offset. + + ADDRESS_SYMBOLIC + SYMBOL_TYPE is the type of symbol that the address references. */ +struct loongarch_address_info { + enum loongarch_address_type type; + rtx reg; + rtx offset; + enum loongarch_symbol_type symbol_type; +}; + +/* Method to load immediate number fields. + + METHOD_NORMAL: + load immediate number 0-31 bit + + METHOD_LU32I: + load imm 32-51 bit + + METHOD_LU52I: + load imm 52-63 bit + + METHOD_INSV: + imm 0xfff00000fffffxxx + */ +enum loongarch_load_imm_method { + METHOD_NORMAL, + METHOD_LU32I, + METHOD_LU52I, + METHOD_INSV +}; + +/* One stage in a constant building sequence. These sequences have + the form: + + A = VALUE[0] + A = A CODE[1] VALUE[1] + A = A CODE[2] VALUE[2] + ... + + where A is an accumulator, each CODE[i] is a binary rtl operation + and each VALUE[i] is a constant integer. CODE[0] is undefined. */ +struct loongarch_integer_op { + enum rtx_code code; + unsigned HOST_WIDE_INT value; + enum loongarch_load_imm_method method; +}; + +/* The largest number of operations needed to load an integer constant. + The worst accepted case for 64-bit constants is LUI,ORI,SLL,ORI,SLL,ORI. + When the lowest bit is clear, we can try, but reject a sequence with + an extra SLL at the end. */ +#define LARCH_MAX_INTEGER_OPS 9 + +/* Costs of various operations on the different architectures. */ + +struct loongarch_rtx_cost_data +{ + unsigned short fp_add; + unsigned short fp_mult_sf; + unsigned short fp_mult_df; + unsigned short fp_div_sf; + unsigned short fp_div_df; + unsigned short int_mult_si; + unsigned short int_mult_di; + unsigned short int_div_si; + unsigned short int_div_di; + unsigned short branch_cost; + unsigned short memory_latency; +}; + +/* Global variables for machine-dependent things. */ + +/* The -G setting, or the configuration's default small-data limit if + no -G option is given. */ +static unsigned int loongarch_small_data_threshold; + +/* The number of file directives written by loongarch_output_filename. */ +int num_source_filenames; + +/* The name that appeared in the last .file directive written by + loongarch_output_filename, or "" if loongarch_output_filename hasn't + written anything yet. */ +const char *current_function_file = ""; + +/* Arrays that map GCC register numbers to debugger register numbers. */ +int loongarch_dbx_regno[FIRST_PSEUDO_REGISTER]; +int loongarch_dwarf_regno[FIRST_PSEUDO_REGISTER]; + +/* Information about the current function's epilogue, used only while + expanding it. */ +static struct { + /* A list of queued REG_CFA_RESTORE notes. */ + rtx cfa_restores; + + /* The CFA is currently defined as CFA_REG + CFA_OFFSET. */ + rtx cfa_reg; + HOST_WIDE_INT cfa_offset; + + /* The offset of the CFA from the stack pointer while restoring + registers. */ + HOST_WIDE_INT cfa_restore_sp_offset; +} loongarch_epilogue; + + +/* The current instruction-set architecture. */ +enum processor loongarch_arch; +const struct loongarch_cpu_info *loongarch_arch_info; + +/* The processor that we should tune the code for. */ +enum processor loongarch_tune; +const struct loongarch_cpu_info *loongarch_tune_info; + +/* The ISA level associated with loongarch_arch. */ +int loongarch_isa; + +/* The ISA revision level. */ +int loongarch_isa_rev; + +/* The architecture selected by -loongarchN, or null if -loongarchN wasn't used. */ +static const struct loongarch_cpu_info *loongarch_isa_option_info; + +/* Which cost information to use. */ +static const struct loongarch_rtx_cost_data *loongarch_cost; + +/* The ambient target flags. */ +static int loongarch_base_target_flags; + +/* The default compression mode. */ +unsigned int loongarch_base_compression_flags; + +/* The ambient values of other global variables. */ +static int loongarch_base_schedule_insns; /* flag_schedule_insns */ +static int loongarch_base_reorder_blocks_and_partition; /* flag_reorder... */ +static int loongarch_base_move_loop_invariants; /* flag_move_loop_invariants */ +static const char *loongarch_base_align_loops; /* flag_align_loops */ +static const char *loongarch_base_align_jumps; /* flag_align_jumps */ +static const char *loongarch_base_align_functions; /* str_align_functions */ + +/* Index [M][R] is true if register R is allowed to hold a value of mode M. */ +static bool loongarch_hard_regno_mode_ok_p[MAX_MACHINE_MODE][FIRST_PSEUDO_REGISTER]; + +/* Index C is true if character C is a valid PRINT_OPERAND punctation + character. */ +static bool loongarch_print_operand_punct[256]; + +static GTY (()) int loongarch_output_filename_first_time = 1; + +/* loongarch_use_pcrel_pool_p[X] is true if symbols of type X should be + forced into a PC-relative constant pool. */ +bool loongarch_use_pcrel_pool_p[NUM_SYMBOL_TYPES]; + +/* Cached value of can_issue_more. This is cached in loongarch_variable_issue hook + and returned from loongarch_sched_reorder2. */ +static int cached_can_issue_more; + +/* Index R is the smallest register class that contains register R. */ +const enum reg_class loongarch_regno_to_class[FIRST_PSEUDO_REGISTER] = { + GR_REGS, GR_REGS, GR_REGS, GR_REGS, + JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS, + JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS, + SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, + SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, + SIBCALL_REGS, GR_REGS, GR_REGS, JALR_REGS, + JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS, + JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS, + + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + ST_REGS, ST_REGS, ST_REGS, ST_REGS, + ST_REGS, ST_REGS, ST_REGS, ST_REGS, + FRAME_REGS, FRAME_REGS +}; + +static tree loongarch_handle_interrupt_attr (tree *, tree, tree, int, bool *); +static tree loongarch_handle_use_shadow_register_set_attr (tree *, tree, tree, int, + bool *); + +/* The value of TARGET_ATTRIBUTE_TABLE. */ +static const struct attribute_spec loongarch_attribute_table[] = { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ + { "long_call", 0, 0, false, true, true, false, NULL, NULL }, + { "short_call", 0, 0, false, true, true, false, NULL, NULL }, + { "far", 0, 0, false, true, true, false, NULL, NULL }, + { "near", 0, 0, false, true, true, false, NULL, NULL }, + { "nocompression", 0, 0, true, false, false, false, NULL, NULL }, + /* Allow functions to be specified as interrupt handlers */ + { "interrupt", 0, 1, false, true, true, false, loongarch_handle_interrupt_attr, + NULL }, + { "use_shadow_register_set", 0, 1, false, true, true, false, + loongarch_handle_use_shadow_register_set_attr, NULL }, + { "keep_interrupts_masked", 0, 0, false, true, true, false, NULL, NULL }, + { "use_debug_exception_return", 0, 0, false, true, true, false, NULL, NULL }, + { NULL, 0, 0, false, false, false, false, NULL, NULL } +}; + +/* A table describing all the processors GCC knows about; see + loongarch-cpus.def for details. */ +static const struct loongarch_cpu_info loongarch_cpu_info_table[] = { +#define LARCH_CPU(NAME, CPU, ISA, FLAGS) \ + { NAME, CPU, ISA, FLAGS }, +#include "loongarch-cpus.def" +#undef LARCH_CPU +}; + +/* Default costs. If these are used for a processor we should look + up the actual costs. */ +#define DEFAULT_COSTS COSTS_N_INSNS (6), /* fp_add */ \ + COSTS_N_INSNS (7), /* fp_mult_sf */ \ + COSTS_N_INSNS (8), /* fp_mult_df */ \ + COSTS_N_INSNS (23), /* fp_div_sf */ \ + COSTS_N_INSNS (36), /* fp_div_df */ \ + COSTS_N_INSNS (10), /* int_mult_si */ \ + COSTS_N_INSNS (10), /* int_mult_di */ \ + COSTS_N_INSNS (69), /* int_div_si */ \ + COSTS_N_INSNS (69), /* int_div_di */ \ + 2, /* branch_cost */ \ + 4 /* memory_latency */ + +/* Floating-point costs for processors without an FPU. Just assume that + all floating-point libcalls are very expensive. */ +#define SOFT_FP_COSTS COSTS_N_INSNS (256), /* fp_add */ \ + COSTS_N_INSNS (256), /* fp_mult_sf */ \ + COSTS_N_INSNS (256), /* fp_mult_df */ \ + COSTS_N_INSNS (256), /* fp_div_sf */ \ + COSTS_N_INSNS (256) /* fp_div_df */ + +/* Costs to use when optimizing for size. */ +static const struct loongarch_rtx_cost_data loongarch_rtx_cost_optimize_size = { + COSTS_N_INSNS (1), /* fp_add */ + COSTS_N_INSNS (1), /* fp_mult_sf */ + COSTS_N_INSNS (1), /* fp_mult_df */ + COSTS_N_INSNS (1), /* fp_div_sf */ + COSTS_N_INSNS (1), /* fp_div_df */ + COSTS_N_INSNS (1), /* int_mult_si */ + COSTS_N_INSNS (1), /* int_mult_di */ + COSTS_N_INSNS (1), /* int_div_si */ + COSTS_N_INSNS (1), /* int_div_di */ + 2, /* branch_cost */ + 4 /* memory_latency */ +}; + +/* Costs to use when optimizing for speed, indexed by processor. */ +static const struct loongarch_rtx_cost_data + loongarch_rtx_cost_data[NUM_PROCESSOR_VALUES] = { + { /* loongarch */ + DEFAULT_COSTS + }, + { /* loongarch64 */ + DEFAULT_COSTS + }, + { /* gs464v */ + DEFAULT_COSTS + } +}; + +/* Information about a single argument. */ +struct n_loongarch_arg_info { + /* True if the argument is at least partially passed on the stack. */ + bool stack_p; + + /* The number of integer registers allocated to this argument. */ + unsigned int num_gprs; + + /* The offset of the first register used, provided num_gprs is nonzero. + If passed entirely on the stack, the value is MAX_ARGS_IN_REGISTERS. */ + unsigned int gpr_offset; + + /* The number of floating-point registers allocated to this argument. */ + unsigned int num_fprs; + + /* The offset of the first register used, provided num_fprs is nonzero. */ + unsigned int fpr_offset; +}; + + +/* Emit a move from SRC to DEST. Assume that the move expanders can + handle all moves if !can_create_pseudo_p (). The distinction is + important because, unlike emit_move_insn, the move expanders know + how to force Pmode objects into the constant pool even when the + constant pool address is not itself legitimate. */ + +rtx +n_loongarch_emit_move (rtx dest, rtx src) +{ + return (can_create_pseudo_p () + ? emit_move_insn (dest, src) + : emit_move_insn_1 (dest, src)); +} + +/* Implement TARGET_FUNCTION_ARG_BOUNDARY. Every parameter gets at + least PARM_BOUNDARY bits of alignment, but will be given anything up + to PREFERRED_STACK_BOUNDARY bits if the type requires it. */ + +static unsigned int +n_loongarch_function_arg_boundary (machine_mode mode, const_tree type) +{ + unsigned int alignment; + + /* Use natural alignment if the type is not aggregate data. */ + if (type && !AGGREGATE_TYPE_P (type)) + alignment = TYPE_ALIGN (TYPE_MAIN_VARIANT (type)); + else + alignment = type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode); + + return MIN (PREFERRED_STACK_BOUNDARY, MAX (PARM_BOUNDARY, alignment)); +} + +/* If MODE represents an argument that can be passed or returned in + floating-point registers, return the number of registers, else 0. */ + +static unsigned +n_loongarch_pass_mode_in_fpr_p (machine_mode mode) +{ + if (GET_MODE_UNIT_SIZE (mode) <= UNITS_PER_FP_ARG) + { + if (GET_MODE_CLASS (mode) == MODE_FLOAT) + return 1; + + if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) + return 2; + } + + return 0; +} + +typedef struct { + const_tree type; + HOST_WIDE_INT offset; +} n_loongarch_aggregate_field; + +/* Identify subfields of aggregates that are candidates for passing in + floating-point registers. */ + +static int +n_loongarch_flatten_aggregate_field (const_tree type, + n_loongarch_aggregate_field fields[2], + int n, HOST_WIDE_INT offset) +{ + switch (TREE_CODE (type)) + { + case RECORD_TYPE: + /* Can't handle incomplete types nor sizes that are not fixed. */ + if (!COMPLETE_TYPE_P (type) + || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST + || !tree_fits_uhwi_p (TYPE_SIZE (type))) + return -1; + + for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f)) + if (TREE_CODE (f) == FIELD_DECL) + { + if (!TYPE_P (TREE_TYPE (f))) + return -1; + + HOST_WIDE_INT pos = offset + int_byte_position (f); + n = n_loongarch_flatten_aggregate_field (TREE_TYPE (f), fields, n, pos); + if (n < 0) + return -1; + } + return n; + + case ARRAY_TYPE: + { + HOST_WIDE_INT n_elts; + n_loongarch_aggregate_field subfields[2]; + tree index = TYPE_DOMAIN (type); + tree elt_size = TYPE_SIZE_UNIT (TREE_TYPE (type)); + int n_subfields = n_loongarch_flatten_aggregate_field (TREE_TYPE (type), + subfields, 0, offset); + + /* Can't handle incomplete types nor sizes that are not fixed. */ + if (n_subfields <= 0 + || !COMPLETE_TYPE_P (type) + || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST + || !index + || !TYPE_MAX_VALUE (index) + || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index)) + || !TYPE_MIN_VALUE (index) + || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index)) + || !tree_fits_uhwi_p (elt_size)) + return -1; + + n_elts = 1 + tree_to_uhwi (TYPE_MAX_VALUE (index)) + - tree_to_uhwi (TYPE_MIN_VALUE (index)); + gcc_assert (n_elts >= 0); + + for (HOST_WIDE_INT i = 0; i < n_elts; i++) + for (int j = 0; j < n_subfields; j++) + { + if (n >= 2) + return -1; + + fields[n] = subfields[j]; + fields[n++].offset += i * tree_to_uhwi (elt_size); + } + + return n; + } + + case COMPLEX_TYPE: + { + /* Complex type need consume 2 field, so n must be 0. */ + if (n != 0) + return -1; + + HOST_WIDE_INT elt_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (type))); + + if (elt_size <= UNITS_PER_FP_ARG) + { + fields[0].type = TREE_TYPE (type); + fields[0].offset = offset; + fields[1].type = TREE_TYPE (type); + fields[1].offset = offset + elt_size; + + return 2; + } + + return -1; + } + + default: + if (n < 2 + && ((SCALAR_FLOAT_TYPE_P (type) + && GET_MODE_SIZE (TYPE_MODE (type)) <= UNITS_PER_FP_ARG) + || (INTEGRAL_TYPE_P (type) + && GET_MODE_SIZE (TYPE_MODE (type)) <= UNITS_PER_WORD))) + { + fields[n].type = type; + fields[n].offset = offset; + return n + 1; + } + else + return -1; + } +} + +/* Identify candidate aggregates for passing in floating-point registers. + Candidates have at most two fields after flattening. */ + +static int +n_loongarch_flatten_aggregate_argument (const_tree type, + n_loongarch_aggregate_field fields[2]) +{ + if (!type || TREE_CODE (type) != RECORD_TYPE) + return -1; + + return n_loongarch_flatten_aggregate_field (type, fields, 0, 0); +} + +/* See whether TYPE is a record whose fields should be returned in one or + two floating-point registers. If so, populate FIELDS accordingly. */ + +static unsigned +n_loongarch_pass_aggregate_in_fpr_pair_p (const_tree type, + n_loongarch_aggregate_field fields[2]) +{ + int n = n_loongarch_flatten_aggregate_argument (type, fields); + + for (int i = 0; i < n; i++) + if (!SCALAR_FLOAT_TYPE_P (fields[i].type)) + return 0; + + return n > 0 ? n : 0; +} + +/* See whether TYPE is a record whose fields should be returned in one or + floating-point register and one integer register. If so, populate + FIELDS accordingly. */ + +static bool +n_loongarch_pass_aggregate_in_fpr_and_gpr_p (const_tree type, + n_loongarch_aggregate_field fields[2]) +{ + unsigned num_int = 0, num_float = 0; + int n = n_loongarch_flatten_aggregate_argument (type, fields); + + for (int i = 0; i < n; i++) + { + num_float += SCALAR_FLOAT_TYPE_P (fields[i].type); + num_int += INTEGRAL_TYPE_P (fields[i].type); + } + + return num_int == 1 && num_float == 1; +} + +/* Return the representation of an argument passed or returned in an FPR + when the value has mode VALUE_MODE and the type has TYPE_MODE. The + two modes may be different for structures like: + + struct __attribute__((packed)) foo { float f; } + + where the SFmode value "f" is passed in REGNO but the struct itself + has mode BLKmode. */ + +static rtx +n_loongarch_pass_fpr_single (machine_mode type_mode, unsigned regno, + machine_mode value_mode) +{ + rtx x = gen_rtx_REG (value_mode, regno); + + if (type_mode != value_mode) + { + x = gen_rtx_EXPR_LIST (VOIDmode, x, const0_rtx); + x = gen_rtx_PARALLEL (type_mode, gen_rtvec (1, x)); + } + return x; +} + +/* Pass or return a composite value in the FPR pair REGNO and REGNO + 1. + MODE is the mode of the composite. MODE1 and OFFSET1 are the mode and + byte offset for the first value, likewise MODE2 and OFFSET2 for the + second value. */ + +static rtx +n_loongarch_pass_fpr_pair (machine_mode mode, unsigned regno1, + machine_mode mode1, HOST_WIDE_INT offset1, + unsigned regno2, machine_mode mode2, + HOST_WIDE_INT offset2) +{ + return gen_rtx_PARALLEL + (mode, + gen_rtvec (2, + gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (mode1, regno1), + GEN_INT (offset1)), + gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (mode2, regno2), + GEN_INT (offset2)))); +} + +/* Fill INFO with information about a single argument, and return an + RTL pattern to pass or return the argument. CUM is the cumulative + state for earlier arguments. MODE is the mode of this argument and + TYPE is its type (if known). NAMED is true if this is a named + (fixed) argument rather than a variable one. RETURN_P is true if + returning the argument, or false if passing the argument. */ + +static rtx +n_loongarch_get_arg_info (struct n_loongarch_arg_info *info, const CUMULATIVE_ARGS *cum, + machine_mode mode, const_tree type, bool named, + bool return_p) +{ + unsigned num_bytes, num_words; + unsigned fpr_base = return_p ? FP_RETURN : FP_ARG_FIRST; + unsigned gpr_base = return_p ? GP_RETURN : GP_ARG_FIRST; + unsigned alignment = n_loongarch_function_arg_boundary (mode, type); + + memset (info, 0, sizeof (*info)); + info->gpr_offset = cum->num_gprs; + info->fpr_offset = cum->num_fprs; + + if (named) + { + n_loongarch_aggregate_field fields[2]; + unsigned fregno = fpr_base + info->fpr_offset; + unsigned gregno = gpr_base + info->gpr_offset; + + /* Pass one- or two-element floating-point aggregates in FPRs. */ + if ((info->num_fprs = n_loongarch_pass_aggregate_in_fpr_pair_p (type, fields)) + && info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS) + switch (info->num_fprs) + { + case 1: + return n_loongarch_pass_fpr_single (mode, fregno, + TYPE_MODE (fields[0].type)); + + case 2: + return n_loongarch_pass_fpr_pair (mode, fregno, + TYPE_MODE (fields[0].type), + fields[0].offset, + fregno + 1, + TYPE_MODE (fields[1].type), + fields[1].offset); + + default: + gcc_unreachable (); + } + + /* Pass real and complex floating-point numbers in FPRs. */ + if ((info->num_fprs = n_loongarch_pass_mode_in_fpr_p (mode)) + && info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS) + switch (GET_MODE_CLASS (mode)) + { + case MODE_FLOAT: + return gen_rtx_REG (mode, fregno); + + case MODE_COMPLEX_FLOAT: + return n_loongarch_pass_fpr_pair (mode, fregno, GET_MODE_INNER (mode), 0, + fregno + 1, GET_MODE_INNER (mode), + GET_MODE_UNIT_SIZE (mode)); + + default: + gcc_unreachable (); + } + + /* Pass structs with one float and one integer in an FPR and a GPR. */ + if (n_loongarch_pass_aggregate_in_fpr_and_gpr_p (type, fields) + && info->gpr_offset < MAX_ARGS_IN_REGISTERS + && info->fpr_offset < MAX_ARGS_IN_REGISTERS) + { + info->num_gprs = 1; + info->num_fprs = 1; + + if (!SCALAR_FLOAT_TYPE_P (fields[0].type)) + std::swap (fregno, gregno); + + return n_loongarch_pass_fpr_pair (mode, fregno, TYPE_MODE (fields[0].type), + fields[0].offset, + gregno, TYPE_MODE (fields[1].type), + fields[1].offset); + } + } + + /* Work out the size of the argument. */ + num_bytes = type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode); + num_words = (num_bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + + /* Doubleword-aligned varargs start on an even register boundary. */ + if (!named && num_bytes != 0 && alignment > BITS_PER_WORD) + info->gpr_offset += info->gpr_offset & 1; + + /* Partition the argument between registers and stack. */ + info->num_fprs = 0; + info->num_gprs = MIN (num_words, MAX_ARGS_IN_REGISTERS - info->gpr_offset); + info->stack_p = (num_words - info->num_gprs) != 0; + + if (info->num_gprs || return_p) + return gen_rtx_REG (mode, gpr_base + info->gpr_offset); + + return NULL_RTX; +} + +/* Implement TARGET_FUNCTION_ARG. */ + +static rtx +n_loongarch_function_arg (cumulative_args_t cum_v, const function_arg_info &arg) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + struct n_loongarch_arg_info info; + + if (arg.end_marker_p ()) + return NULL; + + return n_loongarch_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, false); +} + +/* Implement TARGET_FUNCTION_ARG_ADVANCE. */ + +static void +n_loongarch_function_arg_advance (cumulative_args_t cum_v, + const function_arg_info &arg) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + struct n_loongarch_arg_info info; + + n_loongarch_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, false); + + /* Advance the register count. This has the effect of setting + num_gprs to MAX_ARGS_IN_REGISTERS if a doubleword-aligned + argument required us to skip the final GPR and pass the whole + argument on the stack. */ + cum->num_fprs = info.fpr_offset + info.num_fprs; + cum->num_gprs = info.gpr_offset + info.num_gprs; +} + +/* Implement TARGET_ARG_PARTIAL_BYTES. */ + +static int +n_loongarch_arg_partial_bytes (cumulative_args_t cum, + const function_arg_info &generic_arg) +{ + struct n_loongarch_arg_info arg; + + n_loongarch_get_arg_info (&arg, get_cumulative_args (cum), generic_arg.mode, + generic_arg.type, generic_arg.named, false); + return arg.stack_p ? arg.num_gprs * UNITS_PER_WORD : 0; +} + +/* Implement FUNCTION_VALUE and LIBCALL_VALUE. For normal calls, + VALTYPE is the return type and MODE is VOIDmode. For libcalls, + VALTYPE is null and MODE is the mode of the return value. */ + +rtx +n_loongarch_function_value (const_tree type, const_tree func, machine_mode mode) +{ + struct n_loongarch_arg_info info; + CUMULATIVE_ARGS args; + + if (type) + { + int unsigned_p = TYPE_UNSIGNED (type); + + mode = TYPE_MODE (type); + + /* Since TARGET_PROMOTE_FUNCTION_MODE unconditionally promotes, + return values, promote the mode here too. */ + mode = promote_function_mode (type, mode, &unsigned_p, func, 1); + } + + memset (&args, 0, sizeof args); + return n_loongarch_get_arg_info (&info, &args, mode, type, true, true); +} + +/* Implement TARGET_PASS_BY_REFERENCE. */ + +static bool +n_loongarch_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg) +{ + //HOST_WIDE_INT size = type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode); + HOST_WIDE_INT size = arg.type_size_in_bytes (); + struct n_loongarch_arg_info info; + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + + /* ??? std_gimplify_va_arg_expr passes NULL for cum. Fortunately, we + never pass variadic arguments in floating-point registers, so we can + avoid the call to n_loongarch_get_arg_info in this case. */ + if (cum != NULL) + { + /* Don't pass by reference if we can use a floating-point register. */ + n_loongarch_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, false); + if (info.num_fprs) + return false; + } + + /* Pass by reference if the data do not fit in two integer registers. */ + return !IN_RANGE (size, 0, 2 * UNITS_PER_WORD); +} + +/* Implement TARGET_RETURN_IN_MEMORY. */ + +static bool +n_loongarch_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED) +{ + CUMULATIVE_ARGS args; + cumulative_args_t cum = pack_cumulative_args (&args); + + /* The rules for returning in memory are the same as for passing the + first named argument by reference. */ + memset (&args, 0, sizeof args); + function_arg_info arg (const_cast (type), /*named=*/true); + return n_loongarch_pass_by_reference (cum, arg); +} + +/* Implement TARGET_SETUP_INCOMING_VARARGS. */ + +static void +n_loongarch_setup_incoming_varargs (cumulative_args_t cum, + const function_arg_info &arg, + int *pretend_size ATTRIBUTE_UNUSED, + int no_rtl) +{ + CUMULATIVE_ARGS local_cum; + int gp_saved; + + /* The caller has advanced CUM up to, but not beyond, the last named + argument. Advance a local copy of CUM past the last "real" named + argument, to find out how many registers are left over. */ + local_cum = *get_cumulative_args (cum); + n_loongarch_function_arg_advance (pack_cumulative_args (&local_cum), arg); + + /* Found out how many registers we need to save. */ + gp_saved = MAX_ARGS_IN_REGISTERS - local_cum.num_gprs; + + if (!no_rtl && gp_saved > 0) + { + rtx ptr = plus_constant (Pmode, virtual_incoming_args_rtx, + REG_PARM_STACK_SPACE (cfun->decl) + - gp_saved * UNITS_PER_WORD); + rtx mem = gen_frame_mem (BLKmode, ptr); + set_mem_alias_set (mem, get_varargs_alias_set ()); + + move_block_from_reg (local_cum.num_gprs + GP_ARG_FIRST, + mem, gp_saved); + } + if (REG_PARM_STACK_SPACE (cfun->decl) == 0) + cfun->machine->varargs_size = gp_saved * UNITS_PER_WORD; +} + +/* Make the last instruction frame-related and note that it performs + the operation described by FRAME_PATTERN. */ + +static void +n_loongarch_set_frame_expr (rtx frame_pattern) +{ + rtx insn; + + insn = get_last_insn (); + RTX_FRAME_RELATED_P (insn) = 1; + REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR, + frame_pattern, + REG_NOTES (insn)); +} + +/* Return a frame-related rtx that stores REG at MEM. + REG must be a single register. */ + +static rtx +n_loongarch_frame_set (rtx mem, rtx reg) +{ + rtx set = gen_rtx_SET (mem, reg); + RTX_FRAME_RELATED_P (set) = 1; + return set; +} + +/* Return true if the current function must save register REGNO. */ + +static bool +n_loongarch_save_reg_p (unsigned int regno) +{ + bool call_saved = !global_regs[regno] && !call_used_regs[regno]; + bool might_clobber = crtl->saves_all_registers + || df_regs_ever_live_p (regno); + + if (call_saved && might_clobber) + return true; + + if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed) + return true; + + if (regno == RETURN_ADDR_REGNUM && crtl->calls_eh_return) + return true; + + return false; +} + +/* Determine whether to call GPR save/restore routines. */ +static bool +n_loongarch_use_save_libcall (const struct loongarch_frame_info *frame) +{ + // FIXME: if (!TARGET_SAVE_RESTORE || crtl->calls_eh_return || frame_pointer_needed) + return false; + +} + +/* Determine which GPR save/restore routine to call. */ + +static unsigned +n_loongarch_save_libcall_count (unsigned mask) +{ + for (unsigned n = GP_REG_LAST; n > GP_REG_FIRST; n--) + if (BITSET_P (mask, n)) + return CALLEE_SAVED_REG_NUMBER (n) + 1; + abort (); +} + +/* Populate the current function's loongarch_frame_info structure. + + LARCH stack frames grown downward. High addresses are at the top. + + +-------------------------------+ + | | + | incoming stack arguments | + | | + +-------------------------------+ <-- incoming stack pointer + | | + | callee-allocated save area | + | for arguments that are | + | split between registers and | + | the stack | + | | + +-------------------------------+ <-- arg_pointer_rtx + | | + | callee-allocated save area | + | for register varargs | + | | + +-------------------------------+ <-- hard_frame_pointer_rtx; + | | stack_pointer_rtx + gp_sp_offset + | GPR save area | + UNITS_PER_WORD + | | + +-------------------------------+ <-- stack_pointer_rtx + fp_sp_offset + | | + UNITS_PER_HWVALUE + | FPR save area | + | | + +-------------------------------+ <-- frame_pointer_rtx (virtual) + | | + | local variables | + | | + P +-------------------------------+ + | | + | outgoing stack arguments | + | | + +-------------------------------+ <-- stack_pointer_rtx + + Dynamic stack allocations such as alloca insert data at point P. + They decrease stack_pointer_rtx but leave frame_pointer_rtx and + hard_frame_pointer_rtx unchanged. */ + +static void +n_loongarch_compute_frame_info (void) +{ + struct loongarch_frame_info *frame; + HOST_WIDE_INT offset; + unsigned int regno, i, num_x_saved = 0, num_f_saved = 0; + + frame = &cfun->machine->frame; + memset (frame, 0, sizeof (*frame)); + + /* Find out which GPRs we need to save. */ + for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) + if (n_loongarch_save_reg_p (regno)) + frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++; + + /* If this function calls eh_return, we must also save and restore the + EH data registers. */ + if (crtl->calls_eh_return) + for (i = 0; (regno = EH_RETURN_DATA_REGNO (i)) != INVALID_REGNUM; i++) + frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++; + + /* Find out which FPRs we need to save. This loop must iterate over + the same space as its companion in n_loongarch_for_each_saved_reg. */ + if (TARGET_HARD_FLOAT) + for (regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++) + if (n_loongarch_save_reg_p (regno)) + frame->fmask |= 1 << (regno - FP_REG_FIRST), num_f_saved++; + + /* At the bottom of the frame are any outgoing stack arguments. */ + offset = LARCH_STACK_ALIGN (crtl->outgoing_args_size); + /* Next are local stack variables. */ + offset += LARCH_STACK_ALIGN (get_frame_size ()); + /* The virtual frame pointer points above the local variables. */ + frame->frame_pointer_offset = offset; + /* Next are the callee-saved FPRs. */ + if (frame->fmask) + offset += LARCH_STACK_ALIGN (num_f_saved * UNITS_PER_FP_REG); + frame->fp_sp_offset = offset - UNITS_PER_FP_REG; + /* Next are the callee-saved GPRs. */ + if (frame->mask) + { + unsigned x_save_size = LARCH_STACK_ALIGN (num_x_saved * UNITS_PER_WORD); + unsigned num_save_restore = 1 + n_loongarch_save_libcall_count (frame->mask); + + /* Only use save/restore routines if they don't alter the stack size. */ + if (LARCH_STACK_ALIGN (num_save_restore * UNITS_PER_WORD) == x_save_size) + frame->save_libcall_adjustment = x_save_size; + + offset += x_save_size; + } + frame->gp_sp_offset = offset - UNITS_PER_WORD; + /* The hard frame pointer points above the callee-saved GPRs. */ + frame->hard_frame_pointer_offset = offset; + /* Above the hard frame pointer is the callee-allocated varags save area. */ + offset += LARCH_STACK_ALIGN (cfun->machine->varargs_size); + /* Next is the callee-allocated area for pretend stack arguments. */ + offset += LARCH_STACK_ALIGN (crtl->args.pretend_args_size); + /* Arg pointer must be below pretend args, but must be above alignment + padding. */ + frame->arg_pointer_offset = offset - crtl->args.pretend_args_size; + frame->total_size = offset; + /* Next points the incoming stack pointer and any incoming arguments. */ + + /* Only use save/restore routines when the GPRs are atop the frame. */ + if (frame->hard_frame_pointer_offset != frame->total_size) + frame->save_libcall_adjustment = 0; +} + +/* Implement INITIAL_ELIMINATION_OFFSET. FROM is either the frame pointer + or argument pointer. TO is either the stack pointer or hard frame + pointer. */ + +HOST_WIDE_INT +n_loongarch_initial_elimination_offset (int from, int to) +{ + HOST_WIDE_INT src, dest; + + n_loongarch_compute_frame_info (); + + if (to == HARD_FRAME_POINTER_REGNUM) + dest = cfun->machine->frame.hard_frame_pointer_offset; + else if (to == STACK_POINTER_REGNUM) + dest = 0; /* The stack pointer is the base of all offsets, hence 0. */ + else + gcc_unreachable (); + + if (from == FRAME_POINTER_REGNUM) + src = cfun->machine->frame.frame_pointer_offset; + else if (from == ARG_POINTER_REGNUM) + src = cfun->machine->frame.arg_pointer_offset; + else + gcc_unreachable (); + + return src - dest; +} + +/* A function to save or store a register. The first argument is the + register and the second is the stack slot. */ +typedef void (*n_loongarch_save_restore_fn) (rtx, rtx); + +/* Use FN to save or restore register REGNO. MODE is the register's + mode and OFFSET is the offset of its save slot from the current + stack pointer. */ + +static void +n_loongarch_save_restore_reg (machine_mode mode, int regno, + HOST_WIDE_INT offset, n_loongarch_save_restore_fn fn) +{ + rtx mem; + + mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx, offset)); + fn (gen_rtx_REG (mode, regno), mem); +} + +/* Call FN for each register that is saved by the current function. + SP_OFFSET is the offset of the current stack pointer from the start + of the frame. */ + +static void +n_loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset, n_loongarch_save_restore_fn fn) +{ + HOST_WIDE_INT offset; + + /* Save the link register and s-registers. */ + offset = cfun->machine->frame.gp_sp_offset - sp_offset; + for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) + if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST)) + { + n_loongarch_save_restore_reg (word_mode, regno, offset, fn); + offset -= UNITS_PER_WORD; + } + + /* This loop must iterate over the same space as its companion in + n_loongarch_compute_frame_info. */ + offset = cfun->machine->frame.fp_sp_offset - sp_offset; + for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++) + if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST)) + { + machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode; + + n_loongarch_save_restore_reg (mode, regno, offset, fn); + offset -= GET_MODE_SIZE (mode); + } +} + +/* Save register REG to MEM. Make the instruction frame-related. */ + +static void +n_loongarch_save_reg (rtx reg, rtx mem) +{ + n_loongarch_emit_move (mem, reg); + n_loongarch_set_frame_expr (n_loongarch_frame_set (mem, reg)); +} + +/* Restore register REG from MEM. */ + +static void +n_loongarch_restore_reg (rtx reg, rtx mem) +{ + rtx insn = n_loongarch_emit_move (reg, mem); + rtx dwarf = NULL_RTX; + dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf); + REG_NOTES (insn) = dwarf; + + RTX_FRAME_RELATED_P (insn) = 1; +} + +/* Return the code to invoke the GPR save routine. */ + +const char * +n_loongarch_output_gpr_save (unsigned mask) +{ + static char s[32]; + unsigned n = n_loongarch_save_libcall_count (mask); + + ssize_t bytes = snprintf (s, sizeof (s), "call\tt0,__n_loongarch_save_%u", n); + gcc_assert ((size_t) bytes < sizeof (s)); + + return s; +} + +#define IMM_BITS 12 + +#define IMM_REACH (1LL << IMM_BITS) + +/* For stack frames that can't be allocated with a single ADDI instruction, + compute the best value to initially allocate. It must at a minimum + allocate enough space to spill the callee-saved registers. If TARGET_RVC, + try to pick a value that will allow compression of the register saves + without adding extra instructions. */ + +static HOST_WIDE_INT +n_loongarch_first_stack_step (struct loongarch_frame_info *frame) +{ + if (SMALL_OPERAND (frame->total_size)) + return frame->total_size; + + HOST_WIDE_INT min_first_step = + LARCH_STACK_ALIGN (frame->total_size - frame->fp_sp_offset); + HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8; + HOST_WIDE_INT min_second_step = frame->total_size - max_first_step; + gcc_assert (min_first_step <= max_first_step); + + /* As an optimization, use the least-significant bits of the total frame + size, so that the second adjustment step is just LUI + ADD. */ + if (!SMALL_OPERAND (min_second_step) + && frame->total_size % IMM_REACH < IMM_REACH / 2 + && frame->total_size % IMM_REACH >= min_first_step) + return frame->total_size % IMM_REACH; + + return max_first_step; +} + +static rtx +n_loongarch_adjust_libcall_cfi_prologue () +{ + rtx dwarf = NULL_RTX; + rtx adjust_sp_rtx, reg, mem, insn; + int saved_size = cfun->machine->frame.save_libcall_adjustment; + int offset; + + for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) + if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST)) + { + /* The save order is ra, s0 to s8. */ + if (regno == RETURN_ADDR_REGNUM) + offset = saved_size - UNITS_PER_WORD; + else + offset = saved_size - ((regno - S0_REGNUM + 2) * UNITS_PER_WORD); + + reg = gen_rtx_REG (SImode, regno); + mem = gen_frame_mem (SImode, plus_constant (Pmode, + stack_pointer_rtx, + offset)); + + insn = gen_rtx_SET (mem, reg); + dwarf = alloc_reg_note (REG_CFA_OFFSET, insn, dwarf); + } + + /* Debug info for adjust sp. */ + adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx, + stack_pointer_rtx, GEN_INT (-saved_size)); + dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx, + dwarf); + return dwarf; +} + +static void +n_loongarch_emit_stack_tie (void) +{ + if (Pmode == SImode) + emit_insn (gen_stack_tiesi (stack_pointer_rtx, hard_frame_pointer_rtx)); + else + emit_insn (gen_stack_tiedi (stack_pointer_rtx, hard_frame_pointer_rtx)); +} + +/* Expand the "prologue" pattern. */ + +void +n_loongarch_expand_prologue (void) +{ + struct loongarch_frame_info *frame = &cfun->machine->frame; + HOST_WIDE_INT size = frame->total_size; + unsigned mask = frame->mask; + rtx insn; + + if (flag_stack_usage_info) + current_function_static_stack_size = size; + + /* When optimizing for size, call a subroutine to save the registers. */ + if (n_loongarch_use_save_libcall (frame)) + { + rtx dwarf = NULL_RTX; + dwarf = n_loongarch_adjust_libcall_cfi_prologue (); + + frame->mask = 0; /* Temporarily fib that we need not save GPRs. */ + size -= frame->save_libcall_adjustment; + insn = emit_insn (gen_gpr_save (GEN_INT (mask))); + + RTX_FRAME_RELATED_P (insn) = 1; + REG_NOTES (insn) = dwarf; + } + + /* Save the registers. */ + if ((frame->mask | frame->fmask) != 0) + { + HOST_WIDE_INT step1 = MIN (size, n_loongarch_first_stack_step (frame)); + + insn = gen_add3_insn (stack_pointer_rtx, + stack_pointer_rtx, + GEN_INT (-step1)); + RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; + size -= step1; + n_loongarch_for_each_saved_reg (size, n_loongarch_save_reg); + } + + frame->mask = mask; /* Undo the above fib. */ + + /* Set up the frame pointer, if we're using one. */ + if (frame_pointer_needed) + { + insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx, + GEN_INT (frame->hard_frame_pointer_offset - size)); + RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; + + n_loongarch_emit_stack_tie (); + } + + /* Allocate the rest of the frame. */ + if (size > 0) + { + if (SMALL_OPERAND (-size)) + { + insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (-size)); + RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; + } + else + { + n_loongarch_emit_move (N_LARCH_PROLOGUE_TEMP (Pmode), GEN_INT (-size)); + emit_insn (gen_add3_insn (stack_pointer_rtx, + stack_pointer_rtx, + N_LARCH_PROLOGUE_TEMP (Pmode))); + + /* Describe the effect of the previous instructions. */ + insn = plus_constant (Pmode, stack_pointer_rtx, -size); + insn = gen_rtx_SET (stack_pointer_rtx, insn); + n_loongarch_set_frame_expr (insn); + } + } +} + +/* Return nonzero if this function is known to have a null epilogue. + This allows the optimizer to omit jumps to jumps if no stack + was created. */ + +bool +n_loongarch_can_use_return_insn (void) +{ + return reload_completed && cfun->machine->frame.total_size == 0; +} + +static rtx +n_loongarch_adjust_libcall_cfi_epilogue () +{ + rtx dwarf = NULL_RTX; + rtx adjust_sp_rtx, reg; + int saved_size = cfun->machine->frame.save_libcall_adjustment; + + /* Debug info for adjust sp. */ + adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx, + stack_pointer_rtx, GEN_INT (saved_size)); + dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx, + dwarf); + + for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) + if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST)) + { + reg = gen_rtx_REG (SImode, regno); + dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf); + } + + return dwarf; +} + +/* Expand an "epilogue" or "sibcall_epilogue" pattern; SIBCALL_P + says which. */ + +void +n_loongarch_expand_epilogue (bool sibcall_p) +{ + /* Split the frame into two. STEP1 is the amount of stack we should + deallocate before restoring the registers. STEP2 is the amount we + should deallocate afterwards. + + Start off by assuming that no registers need to be restored. */ + struct loongarch_frame_info *frame = &cfun->machine->frame; + unsigned mask = frame->mask; + HOST_WIDE_INT step1 = frame->total_size; + HOST_WIDE_INT step2 = 0; + bool use_restore_libcall = !sibcall_p && n_loongarch_use_save_libcall (frame); + rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); + rtx insn; + + /* We need to add memory barrier to prevent read from deallocated stack. */ + bool need_barrier_p = (get_frame_size () + + cfun->machine->frame.arg_pointer_offset) != 0; + + if (!sibcall_p && n_loongarch_can_use_return_insn ()) + { + emit_jump_insn (gen_return ()); + return; + } + + /* Move past any dynamic stack allocations. */ + if (cfun->calls_alloca) + { + /* Emit a barrier to prevent loads from a deallocated stack. */ + n_loongarch_emit_stack_tie (); + need_barrier_p = false; + + rtx adjust = GEN_INT (-frame->hard_frame_pointer_offset); + if (!SMALL_OPERAND (INTVAL (adjust))) + { + n_loongarch_emit_move (N_LARCH_PROLOGUE_TEMP (Pmode), adjust); + adjust = N_LARCH_PROLOGUE_TEMP (Pmode); + } + + insn = emit_insn ( + gen_add3_insn (stack_pointer_rtx, hard_frame_pointer_rtx, + adjust)); + + rtx dwarf = NULL_RTX; + rtx cfa_adjust_value = gen_rtx_PLUS ( + Pmode, hard_frame_pointer_rtx, + GEN_INT (-frame->hard_frame_pointer_offset)); + rtx cfa_adjust_rtx = gen_rtx_SET (stack_pointer_rtx, cfa_adjust_value); + dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, cfa_adjust_rtx, dwarf); + RTX_FRAME_RELATED_P (insn) = 1; + + REG_NOTES (insn) = dwarf; + } + + /* If we need to restore registers, deallocate as much stack as + possible in the second step without going out of range. */ + if ((frame->mask | frame->fmask) != 0) + { + step2 = n_loongarch_first_stack_step (frame); + step1 -= step2; + } + + /* Set TARGET to BASE + STEP1. */ + if (step1 > 0) + { + /* Emit a barrier to prevent loads from a deallocated stack. */ + n_loongarch_emit_stack_tie (); + need_barrier_p = false; + + /* Get an rtx for STEP1 that we can add to BASE. */ + rtx adjust = GEN_INT (step1); + if (!SMALL_OPERAND (step1)) + { + n_loongarch_emit_move (N_LARCH_PROLOGUE_TEMP (Pmode), adjust); + adjust = N_LARCH_PROLOGUE_TEMP (Pmode); + } + + insn = emit_insn ( + gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, adjust)); + + rtx dwarf = NULL_RTX; + rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx, + GEN_INT (step2)); + + dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf); + RTX_FRAME_RELATED_P (insn) = 1; + + REG_NOTES (insn) = dwarf; + } + + if (use_restore_libcall) + frame->mask = 0; /* Temporarily fib that we need not save GPRs. */ + + /* Restore the registers. */ + n_loongarch_for_each_saved_reg (frame->total_size - step2, n_loongarch_restore_reg); + + if (use_restore_libcall) + { + frame->mask = mask; /* Undo the above fib. */ + gcc_assert (step2 >= frame->save_libcall_adjustment); + step2 -= frame->save_libcall_adjustment; + } + + if (need_barrier_p) + n_loongarch_emit_stack_tie (); + + /* Deallocate the final bit of the frame. */ + if (step2 > 0) + { + insn = emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (step2))); + + rtx dwarf = NULL_RTX; + rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx, + const0_rtx); + dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf); + RTX_FRAME_RELATED_P (insn) = 1; + + REG_NOTES (insn) = dwarf; + } + + if (use_restore_libcall) + { + rtx dwarf = n_loongarch_adjust_libcall_cfi_epilogue (); + insn = emit_insn (gen_gpr_restore (GEN_INT (n_loongarch_save_libcall_count (mask)))); + RTX_FRAME_RELATED_P (insn) = 1; + REG_NOTES (insn) = dwarf; + + emit_jump_insn (gen_gpr_restore_return (ra)); + return; + } + + /* Add in the __builtin_eh_return stack adjustment. */ + if (crtl->calls_eh_return) + emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, + EH_RETURN_STACKADJ_RTX)); + + if (!sibcall_p) + emit_jump_insn (gen_simple_return_internal (ra)); +} + + +static rtx loongarch_find_pic_call_symbol (rtx_insn *, rtx, bool); +static int loongarch_register_move_cost (machine_mode, reg_class_t, + reg_class_t); + +/* Predicates to test for presence of "near"/"short_call" and "far"/"long_call" + attributes on the given TYPE. */ + +static bool +loongarch_near_type_p (const_tree type) +{ + return (lookup_attribute ("short_call", TYPE_ATTRIBUTES (type)) != NULL + || lookup_attribute ("near", TYPE_ATTRIBUTES (type)) != NULL); +} + +static bool +loongarch_far_type_p (const_tree type) +{ + return (lookup_attribute ("long_call", TYPE_ATTRIBUTES (type)) != NULL + || lookup_attribute ("far", TYPE_ATTRIBUTES (type)) != NULL); +} + + +/* Check if the interrupt attribute is set for a function. */ + +static bool +loongarch_interrupt_type_p (tree type) +{ + return lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type)) != NULL; +} + +/* Return the compression mode that should be used for function DECL. + Return the ambient setting if DECL is null. */ + +static unsigned int +loongarch_get_compress_mode (tree decl) +{ + unsigned int flags; + + flags = loongarch_base_compression_flags; + if (decl) + { + /* Nested functions must use the same frame pointer as their + parent and must therefore use the same ISA mode. */ + tree parent = decl_function_context (decl); + if (parent) + decl = parent; + } + return flags; +} + +/* Implement TARGET_COMP_TYPE_ATTRIBUTES. */ + +static int +loongarch_comp_type_attributes (const_tree type1, const_tree type2) +{ + /* Disallow mixed near/far attributes. */ + if (loongarch_far_type_p (type1) && loongarch_near_type_p (type2)) + return 0; + if (loongarch_near_type_p (type1) && loongarch_far_type_p (type2)) + return 0; + return 1; +} + +/* Implement TARGET_MERGE_DECL_ATTRIBUTES. */ + +static tree +loongarch_merge_decl_attributes (tree olddecl, tree newdecl) +{ + return merge_attributes (DECL_ATTRIBUTES (olddecl), + DECL_ATTRIBUTES (newdecl)); +} + +/* Implement TARGET_CAN_INLINE_P. */ + +static bool +loongarch_can_inline_p (tree caller, tree callee) +{ + if (loongarch_get_compress_mode (callee) != loongarch_get_compress_mode (caller)) + return false; + return default_target_can_inline_p (caller, callee); +} + +/* Handle an "interrupt" attribute with an optional argument. */ + +static tree +loongarch_handle_interrupt_attr (tree *node ATTRIBUTE_UNUSED, tree name, tree args, + int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) +{ + /* Check for an argument. */ + if (is_attribute_p ("interrupt", name) && args != NULL) + { + tree cst; + + cst = TREE_VALUE (args); + if (TREE_CODE (cst) != STRING_CST) + { + warning (OPT_Wattributes, + "%qE attribute requires a string argument", + name); + *no_add_attrs = true; + } + else if (strcmp (TREE_STRING_POINTER (cst), "eic") != 0 + && strncmp (TREE_STRING_POINTER (cst), "vector=", 7) != 0) + { + warning (OPT_Wattributes, + "argument to %qE attribute is neither eic, nor " + "vector=", name); + *no_add_attrs = true; + } + else if (strncmp (TREE_STRING_POINTER (cst), "vector=", 7) == 0) + { + const char *arg = TREE_STRING_POINTER (cst) + 7; + + /* Acceptable names are: sw0,sw1,hw0,hw1,hw2,hw3,hw4,hw5. */ + if (strlen (arg) != 3 + || (arg[0] != 's' && arg[0] != 'h') + || arg[1] != 'w' + || (arg[0] == 's' && arg[2] != '0' && arg[2] != '1') + || (arg[0] == 'h' && (arg[2] < '0' || arg[2] > '5'))) + { + warning (OPT_Wattributes, + "interrupt vector to %qE attribute is not " + "vector=(sw0|sw1|hw0|hw1|hw2|hw3|hw4|hw5)", + name); + *no_add_attrs = true; + } + } + + return NULL_TREE; + } + + return NULL_TREE; +} + +/* Handle a "use_shadow_register_set" attribute with an optional argument. */ + +static tree +loongarch_handle_use_shadow_register_set_attr (tree *node ATTRIBUTE_UNUSED, + tree name, tree args, + int flags ATTRIBUTE_UNUSED, + bool *no_add_attrs) +{ + /* Check for an argument. */ + if (is_attribute_p ("use_shadow_register_set", name) && args != NULL) + { + tree cst; + + cst = TREE_VALUE (args); + if (TREE_CODE (cst) != STRING_CST) + { + warning (OPT_Wattributes, + "%qE attribute requires a string argument", + name); + *no_add_attrs = true; + } + else if (strcmp (TREE_STRING_POINTER (cst), "intstack") != 0) + { + warning (OPT_Wattributes, + "argument to %qE attribute is not intstack", name); + *no_add_attrs = true; + } + + return NULL_TREE; + } + + return NULL_TREE; +} + +/* If X is a PLUS of a CONST_INT, return the two terms in *BASE_PTR + and *OFFSET_PTR. Return X in *BASE_PTR and 0 in *OFFSET_PTR otherwise. */ + +static void +loongarch_split_plus (rtx x, rtx *base_ptr, HOST_WIDE_INT *offset_ptr) +{ + if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))) + { + *base_ptr = XEXP (x, 0); + *offset_ptr = INTVAL (XEXP (x, 1)); + } + else + { + *base_ptr = x; + *offset_ptr = 0; + } +} + +static unsigned int loongarch_build_integer (struct loongarch_integer_op *, + unsigned HOST_WIDE_INT); + +/* Fill CODES with a sequence of rtl operations to load VALUE. + Return the number of operations needed. + Split interger in loongarch_output_move. */ + +static unsigned int +loongarch_build_integer (struct loongarch_integer_op *codes, + unsigned HOST_WIDE_INT value) +{ + uint32_t hi32, lo32; + char all0_bit_vec, sign_bit_vec, allf_bit_vec, paritial_is_sext_of_prev; + unsigned int cost = 0; + + lo32 = value & 0xffffffff; + hi32 = value >> 32; + + all0_bit_vec = (((hi32 & 0xfff00000) == 0) << 3) + | (((hi32 & 0x000fffff) == 0) << 2) + | (((lo32 & 0xfffff000) == 0) << 1) + | ((lo32 & 0x00000fff) == 0); + sign_bit_vec = (((hi32 & 0x80000000) != 0) << 3) + | (((hi32 & 0x00080000) != 0) << 2) + | (((lo32 & 0x80000000) != 0) << 1) + | ((lo32 & 0x00000800) != 0); + allf_bit_vec = (((hi32 & 0xfff00000) == 0xfff00000) << 3) + | (((hi32 & 0x000fffff) == 0x000fffff) << 2) + | (((lo32 & 0xfffff000) == 0xfffff000) << 1) + | ((lo32 & 0x00000fff) == 0x00000fff); + paritial_is_sext_of_prev = (all0_bit_vec ^ allf_bit_vec) + & (all0_bit_vec ^ (sign_bit_vec << 1)); + + do + { + if (paritial_is_sext_of_prev == 0x7) + { + codes[0].code = UNKNOWN; + codes[0].method = METHOD_LU52I; + codes[0].value = value & 0xfff0000000000000; + cost++; + break; + } + if ((all0_bit_vec & 0x3) == 0x2) + { + codes[cost].code = UNKNOWN; + codes[cost].method = METHOD_NORMAL; + codes[cost].value = value & 0xfff; + cost++; + } + else + { + switch (paritial_is_sext_of_prev & 0x3) + { + case 0: + codes[cost].code = UNKNOWN; + codes[cost].method = METHOD_NORMAL; + codes[cost].value = ((HOST_WIDE_INT)value << 32 >> 32) & 0xfffffffffffff000; + cost++; + codes[cost].code = IOR; + codes[cost].method = METHOD_NORMAL; + codes[cost].value = value & 0xfff; + cost++; + break; + case 1: + codes[cost].code = UNKNOWN; + codes[cost].method = METHOD_NORMAL; + codes[cost].value = ((HOST_WIDE_INT)value << 32 >> 32) & 0xfffffffffffff000; + cost++; + break; + case 2: + codes[cost].code = UNKNOWN; + codes[cost].method = METHOD_NORMAL; + codes[cost].value = (HOST_WIDE_INT)value << 52 >> 52; + cost++; + break; + case 3: + codes[cost].code = UNKNOWN; + codes[cost].method = METHOD_NORMAL; + codes[cost].value = 0; + cost++; + break; + default: + gcc_unreachable (); + } + } + + if (((value & 0xfffffffffffff800) ^ 0xfff00000fffff800) == 0) + { + codes[cost].method = METHOD_INSV; + cost++; + break; + } + + switch (paritial_is_sext_of_prev >> 2) + { + case 0: + codes[cost].method = METHOD_LU32I; + codes[cost].value = ((HOST_WIDE_INT)value << 12 >> 12) & 0xffffffff00000000; + cost++; + case 1: + codes[cost].method = METHOD_LU52I; + codes[cost].value = value & 0xfff0000000000000; + cost++; + break; + case 2: + codes[cost].method = METHOD_LU32I; + codes[cost].value = ((HOST_WIDE_INT)value << 12 >> 12) & 0xffffffff00000000; + cost++; + break; + case 3: + break; + default: + gcc_unreachable (); + } + } + while (0); + + return cost; +} + +/* Implement TARGET_LEGITIMATE_CONSTANT_P. */ + +static bool +loongarch_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x) +{ + return loongarch_const_insns (x) > 0; +} + + +/* Return true if X is a thread-local symbol. */ + +static bool +loongarch_tls_symbol_p (rtx x) +{ + return GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0; +} + +/* Return true if SYMBOL_REF X is associated with a global symbol + (in the STB_GLOBAL sense). */ + +bool +loongarch_global_symbol_p (const_rtx x) +{ + if (GET_CODE (x) == LABEL_REF) + return false; + + const_tree decl = SYMBOL_REF_DECL (x); + + if (!decl) + return !SYMBOL_REF_LOCAL_P (x) || SYMBOL_REF_EXTERNAL_P (x); + + /* Weakref symbols are not TREE_PUBLIC, but their targets are global + or weak symbols. Relocations in the object file will be against + the target symbol, so it's that symbol's binding that matters here. */ + return DECL_P (decl) && (TREE_PUBLIC (decl) || DECL_WEAK (decl)); +} + +bool +loongarch_global_symbol_noweak_p (const_rtx x) +{ + if (GET_CODE (x) == LABEL_REF) + return false; + + const_tree decl = SYMBOL_REF_DECL (x); + + if (!decl) + return !SYMBOL_REF_LOCAL_P (x) || SYMBOL_REF_EXTERNAL_P (x); + + /* Weakref symbols are not TREE_PUBLIC, but their targets are global + or weak symbols. Relocations in the object file will be against + the target symbol, so it's that symbol's binding that matters here. */ + return DECL_P (decl) && TREE_PUBLIC (decl); +} + +bool +loongarch_weak_symbol_p (const_rtx x) +{ + const_tree decl; + if (GET_CODE (x) == LABEL_REF || !(decl = SYMBOL_REF_DECL (x))) + return false; + return DECL_P (decl) && DECL_WEAK (decl); +} + + +/* Return true if SYMBOL_REF X binds locally. */ + +bool +loongarch_symbol_binds_local_p (const_rtx x) +{ + if (GET_CODE (x) == LABEL_REF) + return false; + + return (SYMBOL_REF_DECL (x) + ? targetm.binds_local_p (SYMBOL_REF_DECL (x)) + : SYMBOL_REF_LOCAL_P (x)); +} + + +/* Return true if rtx constants of mode MODE should be put into a small + data section. */ + +static bool +loongarch_rtx_constant_in_small_data_p (machine_mode mode) +{ + return (!TARGET_EMBEDDED_DATA + && TARGET_LOCAL_SDATA + && GET_MODE_SIZE (mode) <= loongarch_small_data_threshold); +} + +/* Return the method that should be used to access SYMBOL_REF or + LABEL_REF X in context CONTEXT. */ + +static enum loongarch_symbol_type +loongarch_classify_symbol (const_rtx x, enum loongarch_symbol_context context) +{ + if (TARGET_RTP_PIC) + return SYMBOL_GOT_DISP; + + if (GET_CODE (x) == LABEL_REF) + { + return SYMBOL_GOT_DISP; + } + + gcc_assert (GET_CODE (x) == SYMBOL_REF); + + if (SYMBOL_REF_TLS_MODEL (x)) + return SYMBOL_TLS; + + if (GET_CODE (x) == SYMBOL_REF) + return SYMBOL_GOT_DISP; +} + +/* Return true if X is a symbolic constant that can be used in context + CONTEXT. If it is, store the type of the symbol in *SYMBOL_TYPE. */ + +bool +loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_context context, + enum loongarch_symbol_type *symbol_type) +{ + rtx offset; + + split_const (x, &x, &offset); + if (UNSPEC_ADDRESS_P (x)) + { + *symbol_type = UNSPEC_ADDRESS_TYPE (x); + x = UNSPEC_ADDRESS (x); + } + else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF) + { + *symbol_type = loongarch_classify_symbol (x, context); + if (*symbol_type == SYMBOL_TLS) + return true; + } + else + return false; + + if (offset == const0_rtx) + return true; + + /* Check whether a nonzero offset is valid for the underlying + relocations. */ + switch (*symbol_type) + { + /* Fall through. */ + + case SYMBOL_GOT_DISP: + case SYMBOL_TLSGD: + case SYMBOL_TLSLDM: + case SYMBOL_TLS: + return false; + } + gcc_unreachable (); +} + +/* Like loongarch_symbol_insns We rely on the fact that, in the worst case. */ + +static int +loongarch_symbol_insns_1 (enum loongarch_symbol_type type, machine_mode mode) +{ + if (loongarch_use_pcrel_pool_p[(int) type]) + { + /* The constant must be loaded and then dereferenced. */ + return 0; + } + + switch (type) + { + case SYMBOL_GOT_DISP: + /* The constant will have to be loaded from the GOT before it + is used in an address. */ + if (mode != MAX_MACHINE_MODE) + return 0; + + /* Fall through. */ + + return 3; + + case SYMBOL_TLSGD: + case SYMBOL_TLSLDM: + return 1; + + case SYMBOL_TLS: + /* We don't treat a bare TLS symbol as a constant. */ + return 0; + } + gcc_unreachable (); +} + +/* If MODE is MAX_MACHINE_MODE, return the number of instructions needed + to load symbols of type TYPE into a register. Return 0 if the given + type of symbol cannot be used as an immediate operand. + + Otherwise, return the number of instructions needed to load or store + values of mode MODE to or from addresses of type TYPE. Return 0 if + the given type of symbol is not valid in addresses. + + In both cases, instruction counts are based off BASE_INSN_LENGTH. */ + +static int +loongarch_symbol_insns (enum loongarch_symbol_type type, machine_mode mode) +{ + return loongarch_symbol_insns_1 (type, mode) * (1); +} + +/* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ + +static bool +loongarch_cannot_force_const_mem (machine_mode mode, rtx x) +{ + enum loongarch_symbol_type type; + rtx base, offset; + + /* There is no assembler syntax for expressing an address-sized + high part. */ + if (GET_CODE (x) == HIGH) + return true; + + /* As an optimization, reject constants that loongarch_legitimize_move + can expand inline. + + Suppose we have a multi-instruction sequence that loads constant C + into register R. If R does not get allocated a hard register, and + R is used in an operand that allows both registers and memory + references, reload will consider forcing C into memory and using + one of the instruction's memory alternatives. Returning false + here will force it to use an input reload instead. */ + if (CONST_INT_P (x) && loongarch_legitimate_constant_p (mode, x)) + return true; + + split_const (x, &base, &offset); + if (loongarch_symbolic_constant_p (base, SYMBOL_CONTEXT_LEA, &type)) + { + /* See whether we explicitly want these symbols in the pool. */ + if (loongarch_use_pcrel_pool_p[(int) type]) + return false; + + /* The same optimization as for CONST_INT. */ + if (SMALL_INT (offset) && loongarch_symbol_insns (type, MAX_MACHINE_MODE) > 0) + return true; + + } + + /* TLS symbols must be computed by loongarch_legitimize_move. */ + if (tls_referenced_p (x)) + return true; + + return false; +} + + +/* Return true if register REGNO is a valid base register for mode MODE. + STRICT_P is true if REG_OK_STRICT is in effect. */ + +int +loongarch_regno_mode_ok_for_base_p (int regno, machine_mode mode, + bool strict_p) +{ + if (!HARD_REGISTER_NUM_P (regno)) + { + if (!strict_p) + return true; + regno = reg_renumber[regno]; + } + + /* These fake registers will be eliminated to either the stack or + hard frame pointer, both of which are usually valid base registers. + Reload deals with the cases where the eliminated form isn't valid. */ + if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM) + return true; + + + return GP_REG_P (regno); +} + +/* Return true if X is a valid base register for mode MODE. + STRICT_P is true if REG_OK_STRICT is in effect. */ + +static bool +loongarch_valid_base_register_p (rtx x, machine_mode mode, bool strict_p) +{ + if (!strict_p && GET_CODE (x) == SUBREG) + x = SUBREG_REG (x); + + return (REG_P (x) + && loongarch_regno_mode_ok_for_base_p (REGNO (x), mode, strict_p)); +} + +/* Return true if, for every base register BASE_REG, (plus BASE_REG X) + can address a value of mode MODE. */ + +static bool +loongarch_valid_offset_p (rtx x, machine_mode mode) +{ + /* Check that X is a signed 12-bit number, + * or check that X is a signed 16-bit number + * and offset 4 byte aligned */ + if (!(const_arith_operand (x, Pmode) + || ((mode == E_SImode || mode == E_DImode) + && const_imm16_operand (x, Pmode) + && (loongarch_signed_immediate_p (INTVAL (x), 14, 2))))) + return false; + + /* We may need to split multiword moves, so make sure that every word + is accessible. */ + if (GET_MODE_SIZE (mode) > UNITS_PER_WORD + && !SMALL_OPERAND (INTVAL (x) + GET_MODE_SIZE (mode) - UNITS_PER_WORD)) + return false; + + return true; +} + +/* Return true if X is a valid address for machine mode MODE. If it is, + fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in + effect. */ + +static bool +loongarch_classify_address (struct loongarch_address_info *info, rtx x, + machine_mode mode, bool strict_p) +{ + switch (GET_CODE (x)) + { + case REG: + case SUBREG: + info->type = ADDRESS_REG; + info->reg = x; + info->offset = const0_rtx; + return loongarch_valid_base_register_p (info->reg, mode, strict_p); + + case PLUS: + info->type = ADDRESS_REG; + info->reg = XEXP (x, 0); + info->offset = XEXP (x, 1); + return (loongarch_valid_base_register_p (info->reg, mode, strict_p) + && loongarch_valid_offset_p (info->offset, mode)); + #if 0 + case LABEL_REF: + case SYMBOL_REF: + info->type = ADDRESS_SYMBOLIC; + return (loongarch_symbolic_constant_p (x, SYMBOL_CONTEXT_MEM, + &info->symbol_type) + && loongarch_symbol_insns (info->symbol_type, mode) > 0 + && !loongarch_split_p[info->symbol_type]); + + #endif + default: + return false; + } +} + +/* Implement TARGET_LEGITIMATE_ADDRESS_P. */ + +static bool +loongarch_legitimate_address_p (machine_mode mode, rtx x, bool strict_p) +{ + struct loongarch_address_info addr; + + return loongarch_classify_address (&addr, x, mode, strict_p); +} + +/* Return true if X is a legitimate $sp-based address for mode MODE. */ + +bool +loongarch_stack_address_p (rtx x, machine_mode mode) +{ + struct loongarch_address_info addr; + + return (loongarch_classify_address (&addr, x, mode, false) + && addr.type == ADDRESS_REG + && addr.reg == stack_pointer_rtx); +} + +/* Return true if ADDR matches the pattern for the L{B,H,W,D}{,U}X load + indexed address instruction. Note that such addresses are + not considered legitimate in the TARGET_LEGITIMATE_ADDRESS_P + sense, because their use is so restricted. */ + +static bool +loongarch_lx_address_p (rtx addr, machine_mode mode) +{ + if (GET_CODE (addr) != PLUS + || !REG_P (XEXP (addr, 0)) + || !REG_P (XEXP (addr, 1))) + return false; + return false; +} + + +/* Return the number of instructions needed to load or store a value + of mode MODE at address X, assuming that BASE_INSN_LENGTH is the + length of one instruction. Return 0 if X isn't valid for MODE. + Assume that multiword moves may need to be split into word moves + if MIGHT_SPLIT_P, otherwise assume that a single load or store is + enough. */ + +int +loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p) +{ + struct loongarch_address_info addr; + int factor; + + if (!loongarch_classify_address (&addr, x, mode, false)) + return 0; + + /* BLKmode is used for single unaligned loads and stores and should + not count as a multiword mode. (GET_MODE_SIZE (BLKmode) is pretty + meaningless, so we have to single it out as a special case one way + or the other.) */ + if (mode != BLKmode && might_split_p) + factor = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + else + factor = 1; + + if (loongarch_classify_address (&addr, x, mode, false)) + switch (addr.type) + { + case ADDRESS_REG: + return factor; + + case ADDRESS_CONST_INT: + return factor; + + case ADDRESS_SYMBOLIC: + return factor * loongarch_symbol_insns (addr.symbol_type, mode); + } + return 0; +} + +/* Return true if X fits within an unsigned field of BITS bits that is + shifted left SHIFT bits before being used. */ + +bool +loongarch_unsigned_immediate_p (unsigned HOST_WIDE_INT x, int bits, int shift = 0) +{ + return (x & ((1 << shift) - 1)) == 0 && x < ((unsigned) 1 << (shift + bits)); +} + +/* Return true if X fits within a signed field of BITS bits that is + shifted left SHIFT bits before being used. */ + +bool +loongarch_signed_immediate_p (unsigned HOST_WIDE_INT x, int bits, int shift = 0) +{ + x += 1 << (bits + shift - 1); + return loongarch_unsigned_immediate_p (x, bits, shift); +} + +/* Return true if X is a legitimate address with a 12-bit offset. + MODE is the mode of the value being accessed. */ + +bool +loongarch_12bit_offset_address_p (rtx x, machine_mode mode) +{ + struct loongarch_address_info addr; + + return (loongarch_classify_address (&addr, x, mode, false) + && addr.type == ADDRESS_REG + && CONST_INT_P (addr.offset) + && ULARCH_12BIT_OFFSET_P (INTVAL (addr.offset))); +} + +/* Return true if X is a legitimate address with a 9-bit offset. + MODE is the mode of the value being accessed. */ + +bool +loongarch_9bit_offset_address_p (rtx x, machine_mode mode) +{ + struct loongarch_address_info addr; + + return (loongarch_classify_address (&addr, x, mode, false) + && addr.type == ADDRESS_REG + && CONST_INT_P (addr.offset) + && LARCH_9BIT_OFFSET_P (INTVAL (addr.offset))); +} + +/* Return true if X is a legitimate address with a 14-bit offset shifted 2. + MODE is the mode of the value being accessed. */ + +bool +loongarch_14bit_shifted_offset_address_p (rtx x, machine_mode mode) +{ + struct loongarch_address_info addr; + + return (loongarch_classify_address (&addr, x, mode, false) + && addr.type == ADDRESS_REG + && CONST_INT_P (addr.offset) + && LISA_16BIT_OFFSET_P (INTVAL (addr.offset)) + && LISA_SHIFT_2_OFFSET_P (INTVAL (addr.offset))); +} + + +/* Return the number of instructions needed to load constant X, + assuming that BASE_INSN_LENGTH is the length of one instruction. + Return 0 if X isn't a valid constant. */ + +int +loongarch_const_insns (rtx x) +{ + struct loongarch_integer_op codes[LARCH_MAX_INTEGER_OPS]; + enum loongarch_symbol_type symbol_type; + rtx offset; + + switch (GET_CODE (x)) + { + case CONST_INT: + return loongarch_build_integer (codes, INTVAL (x)); + + case CONST_VECTOR: + /* Fall through. */ + case CONST_DOUBLE: + /* Allow zeros for normal mode, where we can use $0. */ + return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0; + + case CONST: + /* See if we can refer to X directly. */ + if (loongarch_symbolic_constant_p (x, SYMBOL_CONTEXT_LEA, &symbol_type)) + return loongarch_symbol_insns (symbol_type, MAX_MACHINE_MODE); + + /* Otherwise try splitting the constant into a base and offset. + If the offset is a 16-bit value, we can load the base address + into a register and then use (D)ADDIU to add in the offset. + If the offset is larger, we can load the base and offset + into separate registers and add them together with (D)ADDU. + However, the latter is only possible before reload; during + and after reload, we must have the option of forcing the + constant into the pool instead. */ + split_const (x, &x, &offset); + if (offset != 0) + { + int n = loongarch_const_insns (x); + if (n != 0) + { + if (SMALL_INT (offset)) + return n + 1; + else if (!targetm.cannot_force_const_mem (GET_MODE (x), x)) + return n + 1 + loongarch_build_integer (codes, INTVAL (offset)); + } + } + return 0; + + case SYMBOL_REF: + case LABEL_REF: + return loongarch_symbol_insns (loongarch_classify_symbol (x, SYMBOL_CONTEXT_LEA), + MAX_MACHINE_MODE); + + default: + return 0; + } +} + +/* X is a doubleword constant that can be handled by splitting it into + two words and loading each word separately. Return the number of + instructions required to do this, assuming that BASE_INSN_LENGTH + is the length of one instruction. */ + +int +loongarch_split_const_insns (rtx x) +{ + unsigned int low, high; + + low = loongarch_const_insns (loongarch_subword (x, false)); + high = loongarch_const_insns (loongarch_subword (x, true)); + gcc_assert (low > 0 && high > 0); + return low + high; +} + +/* Return one word of 128-bit value OP, taking into account the fixed + endianness of certain registers. BYTE selects from the byte address. */ + +rtx +loongarch_subword_at_byte (rtx op, unsigned int byte) +{ + machine_mode mode; + + mode = GET_MODE (op); + if (mode == VOIDmode) + mode = TImode; + + gcc_assert (!FP_REG_RTX_P (op)); + + if (MEM_P (op)) + return loongarch_rewrite_small_data (adjust_address (op, word_mode, byte)); + + return simplify_gen_subreg (word_mode, op, mode, byte); +} + +/* Return the number of instructions needed to implement INSN, + given that it loads from or stores to MEM. Assume that + BASE_INSN_LENGTH is the length of one instruction. */ + +int +loongarch_load_store_insns (rtx mem, rtx_insn *insn) +{ + machine_mode mode; + bool might_split_p; + rtx set; + + gcc_assert (MEM_P (mem)); + mode = GET_MODE (mem); + + /* Try to prove that INSN does not need to be split. */ + might_split_p = GET_MODE_SIZE (mode) > UNITS_PER_WORD; + if (might_split_p) + { + set = single_set (insn); + if (set && !loongarch_split_move_insn_p (SET_DEST (set), SET_SRC (set), insn)) + might_split_p = false; + } + + return loongarch_address_insns (XEXP (mem, 0), mode, might_split_p); +} + +/* Return the number of instructions needed for an integer division, + assuming that BASE_INSN_LENGTH is the length of one instruction. */ + +int +loongarch_idiv_insns (machine_mode mode) +{ + int count; + + count = 1; + if (TARGET_CHECK_ZERO_DIV) + count += 2; + + return count; +} + + +/* Emit a move from SRC to DEST. Assume that the move expanders can + handle all moves if !can_create_pseudo_p (). The distinction is + important because, unlike emit_move_insn, the move expanders know + how to force Pmode objects into the constant pool even when the + constant pool address is not itself legitimate. */ + +rtx_insn * +loongarch_emit_move (rtx dest, rtx src) +{ + return (can_create_pseudo_p () + ? emit_move_insn (dest, src) + : emit_move_insn_1 (dest, src)); +} + +/* Emit a move from SRC to DEST, splitting compound moves into individual + instructions. SPLIT_TYPE is the type of split to perform. */ + +static void +loongarch_emit_move_or_split (rtx dest, rtx src, enum loongarch_split_type split_type) +{ + if (loongarch_split_move_p (dest, src, split_type)) + loongarch_split_move (dest, src, split_type, NULL); + else + loongarch_emit_move (dest, src); +} + +/* Emit an instruction of the form (set TARGET (CODE OP0 OP1)). */ + +void +loongarch_emit_binary (enum rtx_code code, rtx target, rtx op0, rtx op1) +{ + emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (code, GET_MODE (target), + op0, op1))); +} + +/* Compute (CODE OP0 OP1) and store the result in a new register + of mode MODE. Return that new register. */ + +static rtx +loongarch_force_binary (machine_mode mode, enum rtx_code code, rtx op0, rtx op1) +{ + rtx reg; + + reg = gen_reg_rtx (mode); + loongarch_emit_binary (code, reg, op0, op1); + return reg; +} + +/* Copy VALUE to a register and return that register. If new pseudos + are allowed, copy it into a new register, otherwise use DEST. */ + +static rtx +loongarch_force_temporary (rtx dest, rtx value) +{ + if (can_create_pseudo_p ()) + return force_reg (Pmode, value); + else + { + loongarch_emit_move (dest, value); + return dest; + } +} + + +/* Wrap symbol or label BASE in an UNSPEC address of type SYMBOL_TYPE, + then add CONST_INT OFFSET to the result. */ + +static rtx +loongarch_unspec_address_offset (rtx base, rtx offset, + enum loongarch_symbol_type symbol_type) +{ + base = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, base), + UNSPEC_ADDRESS_FIRST + symbol_type); + if (offset != const0_rtx) + base = gen_rtx_PLUS (Pmode, base, offset); + return gen_rtx_CONST (Pmode, base); +} + +/* Return an UNSPEC address with underlying address ADDRESS and symbol + type SYMBOL_TYPE. */ + +rtx +loongarch_unspec_address (rtx address, enum loongarch_symbol_type symbol_type) +{ + rtx base, offset; + + split_const (address, &base, &offset); + return loongarch_unspec_address_offset (base, offset, symbol_type); +} + +/* If OP is an UNSPEC address, return the address to which it refers, + otherwise return OP itself. */ + +rtx +loongarch_strip_unspec_address (rtx op) +{ + rtx base, offset; + + split_const (op, &base, &offset); + if (UNSPEC_ADDRESS_P (base)) + op = plus_constant (Pmode, UNSPEC_ADDRESS (base), INTVAL (offset)); + return op; +} + + +/* Return a base register that holds pic_offset_table_rtx. + TEMP, if nonnull, is a scratch Pmode base register. */ + +rtx +loongarch_pic_base_register (rtx temp) +{ + return pic_offset_table_rtx; + +} + +/* If SRC is the RHS of a load_call insn, return the underlying symbol + reference. Return NULL_RTX otherwise. */ + +static rtx +loongarch_strip_unspec_call (rtx src) +{ + if (GET_CODE (src) == UNSPEC && XINT (src, 1) == UNSPEC_LOAD_CALL) + return loongarch_strip_unspec_address (XVECEXP (src, 0, 1)); + return NULL_RTX; +} + +/* Return a legitimate address for REG + OFFSET. TEMP is as for + loongarch_force_temporary; it is only needed when OFFSET is not a + SMALL_OPERAND. */ + +static rtx +loongarch_add_offset (rtx temp, rtx reg, HOST_WIDE_INT offset) +{ + if (!SMALL_OPERAND (offset)) + { + rtx high; + + /* Leave OFFSET as a 16-bit offset and put the excess in HIGH. + The addition inside the macro CONST_HIGH_PART may cause an + overflow, so we need to force a sign-extension check. */ + high = gen_int_mode (CONST_HIGH_PART (offset), Pmode); + offset = CONST_LOW_PART (offset); + high = loongarch_force_temporary (temp, high); + reg = loongarch_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg)); + } + return plus_constant (Pmode, reg, offset); +} + +/* The __tls_get_attr symbol. */ +static GTY(()) rtx loongarch_tls_symbol; + +/* Load an entry from the GOT for a TLS GD access. */ + +static rtx loongarch_got_load_tls_gd (rtx dest, rtx sym) +{ + if (Pmode == DImode) + return gen_got_load_tls_gddi (dest, sym); + else + return gen_got_load_tls_gdsi (dest, sym); +} + +/* Load an entry from the GOT for a TLS LD access. */ + +static rtx loongarch_got_load_tls_ld (rtx dest, rtx sym) +{ + if (Pmode == DImode) + return gen_got_load_tls_lddi (dest, sym); + else + return gen_got_load_tls_ldsi (dest, sym); +} + + +/* Load an entry from the GOT for a TLS IE access. */ + +static rtx loongarch_got_load_tls_ie (rtx dest, rtx sym) +{ + if (Pmode == DImode) + return gen_got_load_tls_iedi (dest, sym); + else + return gen_got_load_tls_iesi (dest, sym); +} + +/* Add in the thread pointer for a TLS LE access. */ + +static rtx loongarch_got_load_tls_le (rtx dest, rtx sym) +{ + if (Pmode == DImode) + return gen_got_load_tls_ledi (dest, sym); + else + return gen_got_load_tls_lesi (dest, sym); +} + +/* Return an instruction sequence that calls __tls_get_addr. SYM is + the TLS symbol we are referencing and TYPE is the symbol type to use + (either global dynamic or local dynamic). V0 is an RTX for the + return value location. */ + +static rtx_insn * +loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) +{ + rtx loc, a0; + rtx_insn *insn; + + a0 = gen_rtx_REG (Pmode, GP_ARG_FIRST); + + if (!loongarch_tls_symbol) + loongarch_tls_symbol = init_one_libfunc ("__tls_get_addr"); + + loc = loongarch_unspec_address (sym, type); + + start_sequence (); + + if (type == SYMBOL_TLSLDM) + emit_insn (loongarch_got_load_tls_ld (a0, loc)); + else if (type == SYMBOL_TLSGD) + emit_insn (loongarch_got_load_tls_gd (a0, loc)); + else + gcc_unreachable (); + + insn = emit_call_insn (gen_call_value_internal (v0, loongarch_tls_symbol, const0_rtx)); + RTL_CONST_CALL_P (insn) = 1; + use_reg (&CALL_INSN_FUNCTION_USAGE (insn), a0); + insn = get_insns (); + + end_sequence (); + + return insn; +} + +/* Generate the code to access LOC, a thread-local SYMBOL_REF, and return + its address. The return value will be both a valid address and a valid + SET_SRC (either a REG or a LO_SUM). */ + +static rtx +loongarch_legitimize_tls_address (rtx loc) +{ + rtx dest, tp, tmp; + enum tls_model model = SYMBOL_REF_TLS_MODEL (loc); + rtx_insn *insn; + + /* Since we support TLS copy relocs, non-PIC TLS accesses may all use LE. */ + #if 0 + if (!flag_pic) + model = TLS_MODEL_LOCAL_EXEC; + #endif + + switch (model) + { + case TLS_MODEL_LOCAL_DYNAMIC: + tmp = gen_rtx_REG (Pmode, GP_RETURN); + dest = gen_reg_rtx (Pmode); + insn = loongarch_call_tls_get_addr (loc, SYMBOL_TLSLDM, tmp); + emit_libcall_block (insn, dest, tmp, loc); + break; + + case TLS_MODEL_GLOBAL_DYNAMIC: + tmp = gen_rtx_REG (Pmode, GP_RETURN); + dest = gen_reg_rtx (Pmode); + insn = loongarch_call_tls_get_addr (loc, SYMBOL_TLSGD, tmp); + emit_libcall_block (insn, dest, tmp, loc); + break; + + case TLS_MODEL_INITIAL_EXEC: + /* la.tls.ie; tp-relative add */ + tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM); + tmp = gen_reg_rtx (Pmode); + emit_insn (loongarch_got_load_tls_ie (tmp, loc)); + dest = gen_reg_rtx (Pmode); + emit_insn (gen_add3_insn (dest, tmp, tp)); + break; + + case TLS_MODEL_LOCAL_EXEC: + /* la.tls.le; tp-relative add */ + tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM); + tmp = gen_reg_rtx (Pmode); + emit_insn (loongarch_got_load_tls_le (tmp, loc)); + dest = gen_reg_rtx (Pmode); + emit_insn (gen_add3_insn (dest, tmp, tp)); + break; + + default: + gcc_unreachable (); + } + return dest; +} + +rtx +loongarch_legitimize_call_address (rtx addr) +{ + if (!call_insn_operand (addr, VOIDmode)) + { + rtx reg = gen_reg_rtx (Pmode); + loongarch_emit_move (reg, addr); + return reg; + } + return addr; +} + +/* If X is not a valid address for mode MODE, force it into a register. */ + +static rtx +loongarch_force_address (rtx x, machine_mode mode) +{ + if (!loongarch_legitimate_address_p (mode, x, false)) + x = force_reg (Pmode, x); + return x; +} + +/* This function is used to implement LEGITIMIZE_ADDRESS. If X can + be legitimized in a way that the generic machinery might not expect, + return a new address, otherwise return NULL. MODE is the mode of + the memory being accessed. */ + +static rtx +loongarch_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, + machine_mode mode) +{ + rtx base, addr; + HOST_WIDE_INT offset; + + if (loongarch_tls_symbol_p (x)) + return loongarch_legitimize_tls_address (x); + + /* Handle BASE + OFFSET using loongarch_add_offset. */ + loongarch_split_plus (x, &base, &offset); + if (offset != 0) + { + if (!loongarch_valid_base_register_p (base, mode, false)) + base = copy_to_mode_reg (Pmode, base); + addr = loongarch_add_offset (NULL, base, offset); + return loongarch_force_address (addr, mode); + } + + return x; +} + +/* Load VALUE into DEST. TEMP is as for loongarch_force_temporary. */ + +void +loongarch_move_integer (rtx temp, rtx dest, unsigned HOST_WIDE_INT value) +{ + struct loongarch_integer_op codes[LARCH_MAX_INTEGER_OPS]; + machine_mode mode; + unsigned int i, num_ops; + rtx x; + + mode = GET_MODE (dest); + num_ops = loongarch_build_integer (codes, value); + + /* Apply each binary operation to X. Invariant: X is a legitimate + source operand for a SET pattern. */ + x = GEN_INT (codes[0].value); + for (i = 1; i < num_ops; i++) + { + if (!can_create_pseudo_p ()) + { + emit_insn (gen_rtx_SET (temp, x)); + x = temp; + } + else + x = force_reg (mode, x); + switch (codes[i].method) + { + case METHOD_NORMAL: + x = gen_rtx_fmt_ee (codes[i].code, mode, x, GEN_INT (codes[i].value)); + break; + case METHOD_LU32I: + emit_insn (gen_rtx_SET (x, gen_rtx_IOR (DImode, + gen_rtx_ZERO_EXTEND (DImode, + gen_rtx_SUBREG (SImode, x, 0)), + GEN_INT (codes[i].value)))); + break; + case METHOD_LU52I: + emit_insn (gen_rtx_SET (x, + gen_rtx_UNSPEC (DImode, + gen_rtvec (2, + x, + GEN_INT (codes[i].value)), + UNSPEC_LU52I_D))); + break; + case METHOD_INSV: + emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode, + x, + GEN_INT (20), + GEN_INT (32)), + gen_rtx_REG (DImode, 0))); + break; + default: + gcc_unreachable (); + } + } + + emit_insn (gen_rtx_SET (dest, x)); +} + +/* Subroutine of loongarch_legitimize_move. Move constant SRC into register + DEST given that SRC satisfies immediate_operand but doesn't satisfy + move_operand. */ + +static void +loongarch_legitimize_const_move (machine_mode mode, rtx dest, rtx src) +{ + rtx base, offset; + + /* Split moves of big integers into smaller pieces. */ + if (splittable_const_int_operand (src, mode)) + { + loongarch_move_integer (dest, dest, INTVAL (src)); + return; + } + + /* Generate the appropriate access sequences for TLS symbols. */ + if (loongarch_tls_symbol_p (src)) + { + loongarch_emit_move (dest, loongarch_legitimize_tls_address (src)); + return; + } + + /* If we have (const (plus symbol offset)), and that expression cannot + be forced into memory, load the symbol first and add in the offset. + prefer to do this even if the constant _can_ be forced into memory, + as it usually produces better code. */ + split_const (src, &base, &offset); + if (offset != const0_rtx + && (targetm.cannot_force_const_mem (mode, src) + || (can_create_pseudo_p ()))) + { + base = loongarch_force_temporary (dest, base); + loongarch_emit_move (dest, loongarch_add_offset (NULL, base, INTVAL (offset))); + return; + } + + src = force_const_mem (mode, src); + + loongarch_emit_move (dest, src); +} + +/* If (set DEST SRC) is not a valid move instruction, emit an equivalent + sequence that is valid. */ + +bool +loongarch_legitimize_move (machine_mode mode, rtx dest, rtx src) +{ + + if (!register_operand (dest, mode) && !reg_or_0_operand (src, mode)) + { + loongarch_emit_move (dest, force_reg (mode, src)); + return true; + } + + /* Both src and dest are non-registers; one special case is supported where + the source is (const_int 0) and the store can source the zero register. + */ + if (!register_operand (dest, mode) + && !register_operand (src, mode) + && !const_0_operand (src, mode)) + { + loongarch_emit_move (dest, force_reg (mode, src)); + return true; + } + + /* We need to deal with constants that would be legitimate + immediate_operands but aren't legitimate move_operands. */ + if (CONSTANT_P (src) && !move_operand (src, mode)) + { + loongarch_legitimize_const_move (mode, dest, src); + set_unique_reg_note (get_last_insn (), REG_EQUAL, copy_rtx (src)); + return true; + } + return false; +} + +/* Return true if OP refers to small data symbols directly, not through + a LO_SUM. CONTEXT is the context in which X appears. */ + +static int +loongarch_small_data_pattern_1 (rtx x, enum loongarch_symbol_context context) +{ + subrtx_var_iterator::array_type array; + FOR_EACH_SUBRTX_VAR (iter, array, x, ALL) + { + rtx x = *iter; + + /* Ignore things like "g" constraints in asms. We make no particular + guarantee about which symbolic constants are acceptable as asm operands + versus which must be forced into a GPR. */ + if (GET_CODE (x) == ASM_OPERANDS) + iter.skip_subrtxes (); + else if (MEM_P (x)) + { + if (loongarch_small_data_pattern_1 (XEXP (x, 0), SYMBOL_CONTEXT_MEM)) + return true; + iter.skip_subrtxes (); + } + } + return false; +} + +/* Return true if OP refers to small data symbols directly, not through + a LO_SUM. */ + +bool +loongarch_small_data_pattern_p (rtx op) +{ + return loongarch_small_data_pattern_1 (op, SYMBOL_CONTEXT_LEA); +} + +/* Rewrite *LOC so that it refers to small data using explicit + relocations. CONTEXT is the context in which *LOC appears. */ + +static void +loongarch_rewrite_small_data_1 (rtx *loc, enum loongarch_symbol_context context) +{ + subrtx_ptr_iterator::array_type array; + FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL) + { + rtx *loc = *iter; + if (MEM_P (*loc)) + { + loongarch_rewrite_small_data_1 (&XEXP (*loc, 0), SYMBOL_CONTEXT_MEM); + iter.skip_subrtxes (); + } + } +} + +/* Rewrite instruction pattern PATTERN so that it refers to small data + using explicit relocations. */ + +rtx +loongarch_rewrite_small_data (rtx pattern) +{ + pattern = copy_insn (pattern); + loongarch_rewrite_small_data_1 (&pattern, SYMBOL_CONTEXT_LEA); + return pattern; +} + +/* The cost of loading values from the constant pool. It should be + larger than the cost of any constant we want to synthesize inline. */ +#define CONSTANT_POOL_COST COSTS_N_INSNS (8) + +/* Return true if there is a instruction that implements CODE + and if that instruction accepts X as an immediate operand. */ + +static int +loongarch_immediate_operand_p (int code, HOST_WIDE_INT x) +{ + switch (code) + { + case ASHIFT: + case ASHIFTRT: + case LSHIFTRT: + /* All shift counts are truncated to a valid constant. */ + return true; + + case ROTATE: + case ROTATERT: + /* Likewise rotates, if the target supports rotates at all. */ + return true; + + case AND: + case IOR: + case XOR: + /* These instructions take 12-bit unsigned immediates. */ + return SMALL_OPERAND_UNSIGNED (x); + + case PLUS: + case LT: + case LTU: + /* These instructions take 12-bit signed immediates. */ + return SMALL_OPERAND (x); + + case EQ: + case NE: + case GT: + case GTU: + /* The "immediate" forms of these instructions are really + implemented as comparisons with register 0. */ + return x == 0; + + case GE: + case GEU: + /* Likewise, meaning that the only valid immediate operand is 1. */ + return x == 1; + + case LE: + /* We add 1 to the immediate and use SLT. */ + return SMALL_OPERAND (x + 1); + + case LEU: + /* Likewise SLTU, but reject the always-true case. */ + return SMALL_OPERAND (x + 1) && x + 1 != 0; + + case SIGN_EXTRACT: + case ZERO_EXTRACT: + /* The bit position and size are immediate operands. */ + return 1; + + default: + /* By default assume that $0 can be used for 0. */ + return x == 0; + } +} + +/* Return the cost of binary operation X, given that the instruction + sequence for a word-sized or smaller operation has cost SINGLE_COST + and that the sequence of a double-word operation has cost DOUBLE_COST. + If SPEED is true, optimize for speed otherwise optimize for size. */ + +static int +loongarch_binary_cost (rtx x, int single_cost, int double_cost, bool speed) +{ + int cost; + + if (GET_MODE_SIZE (GET_MODE (x)) == UNITS_PER_WORD * 2) + cost = double_cost; + else + cost = single_cost; + return (cost + + set_src_cost (XEXP (x, 0), GET_MODE (x), speed) + + rtx_cost (XEXP (x, 1), GET_MODE (x), GET_CODE (x), 1, speed)); +} + +/* Return the cost of floating-point multiplications of mode MODE. */ + +static int +loongarch_fp_mult_cost (machine_mode mode) +{ + return mode == DFmode ? loongarch_cost->fp_mult_df : loongarch_cost->fp_mult_sf; +} + +/* Return the cost of floating-point divisions of mode MODE. */ + +static int +loongarch_fp_div_cost (machine_mode mode) +{ + return mode == DFmode ? loongarch_cost->fp_div_df : loongarch_cost->fp_div_sf; +} + +/* Return the cost of sign-extending OP to mode MODE, not including the + cost of OP itself. */ + +static int +loongarch_sign_extend_cost (machine_mode mode, rtx op) +{ + if (MEM_P (op)) + /* Extended loads are as cheap as unextended ones. */ + return 0; + + if (TARGET_64BIT && mode == DImode && GET_MODE (op) == SImode) + /* A sign extension from SImode to DImode in 64-bit mode is free. */ + return 0; + + return COSTS_N_INSNS (1); +} + +/* Return the cost of zero-extending OP to mode MODE, not including the + cost of OP itself. */ + +static int +loongarch_zero_extend_cost (machine_mode mode, rtx op) +{ + if (MEM_P (op)) + /* Extended loads are as cheap as unextended ones. */ + return 0; + + if (TARGET_64BIT && mode == DImode && GET_MODE (op) == SImode) + /* We need a shift left by 32 bits and a shift right by 32 bits. */ + return COSTS_N_INSNS (2); + + /* We can use ANDI. */ + return COSTS_N_INSNS (1); +} + +/* Return the cost of moving between two registers of mode MODE, + assuming that the move will be in pieces of at most UNITS bytes. */ + +static int +loongarch_set_reg_reg_piece_cost (machine_mode mode, unsigned int units) +{ + return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units); +} + +/* Return the cost of moving between two registers of mode MODE. */ + +static int +loongarch_set_reg_reg_cost (machine_mode mode) +{ + switch (GET_MODE_CLASS (mode)) + { + case MODE_CC: + return loongarch_set_reg_reg_piece_cost (mode, GET_MODE_SIZE (CCmode)); + + case MODE_FLOAT: + case MODE_COMPLEX_FLOAT: + case MODE_VECTOR_FLOAT: + if (TARGET_HARD_FLOAT) + return loongarch_set_reg_reg_piece_cost (mode, UNITS_PER_HWFPVALUE); + /* Fall through */ + + default: + return loongarch_set_reg_reg_piece_cost (mode, UNITS_PER_WORD); + } +} + +/* Implement TARGET_RTX_COSTS. */ + +static bool +loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, + int opno ATTRIBUTE_UNUSED, int *total, bool speed) +{ + int code = GET_CODE (x); + bool float_mode_p = FLOAT_MODE_P (mode); + int cost; + rtx addr; + + /* The cost of a COMPARE is hard to define for LARCH. COMPAREs don't + appear in the instruction stream, and the cost of a comparison is + really the cost of the branch or scc condition. At the time of + writing, GCC only uses an explicit outer COMPARE code when optabs + is testing whether a constant is expensive enough to force into a + register. We want optabs to pass such constants through the LARCH + expanders instead, so make all constants very cheap here. */ + if (outer_code == COMPARE) + { + gcc_assert (CONSTANT_P (x)); + *total = 0; + return true; + } + + switch (code) + { + case CONST_INT: + /* Treat *clear_upper32-style ANDs as having zero cost in the + second operand. The cost is entirely in the first operand. + + ??? This is needed because we would otherwise try to CSE + the constant operand. Although that's the right thing for + instructions that continue to be a register operation throughout + compilation, it is disastrous for instructions that could + later be converted into a memory operation. */ + if (TARGET_64BIT + && outer_code == AND + && UINTVAL (x) == 0xffffffff) + { + *total = 0; + return true; + } + + /* When not optimizing for size, we care more about the cost + of hot code, and hot code is often in a loop. If a constant + operand needs to be forced into a register, we will often be + able to hoist the constant load out of the loop, so the load + should not contribute to the cost. */ + if (speed || loongarch_immediate_operand_p (outer_code, INTVAL (x))) + { + *total = 0; + return true; + } + /* Fall through. */ + + case CONST: + case SYMBOL_REF: + case LABEL_REF: + case CONST_DOUBLE: + if (force_to_mem_operand (x, VOIDmode)) + { + *total = COSTS_N_INSNS (1); + return true; + } + cost = loongarch_const_insns (x); + if (cost > 0) + { + /* If the constant is likely to be stored in a GPR, SETs of + single-insn constants are as cheap as register sets; we + never want to CSE them. + + Don't reduce the cost of storing a floating-point zero in + FPRs. If we have a zero in an FPR for other reasons, we + can get better cfg-cleanup and delayed-branch results by + using it consistently, rather than using $0 sometimes and + an FPR at other times. Also, moves between floating-point + registers are sometimes cheaper than MOVGR2FR.W/MOVGR2FR.D $0. */ + if (cost == 1 + && outer_code == SET + && !(float_mode_p && TARGET_HARD_FLOAT)) + cost = 0; + /* When code loads a constant N>1 times, we rarely + want to CSE the constant itself. It is usually better to + have N copies of the last operation in the sequence and one + shared copy of the other operations. + + Also, if we have a CONST_INT, we don't know whether it is + for a word or doubleword operation, so we cannot rely on + the result of loongarch_build_integer. */ + else if ((outer_code == SET || GET_MODE (x) == VOIDmode)) + cost = 1; + *total = COSTS_N_INSNS (cost); + return true; + } + /* The value will need to be fetched from the constant pool. */ + *total = CONSTANT_POOL_COST; + return true; + + case MEM: + /* If the address is legitimate, return the number of + instructions it needs. */ + addr = XEXP (x, 0); + cost = loongarch_address_insns (addr, mode, true); + if (cost > 0) + { + *total = COSTS_N_INSNS (cost + 1); + return true; + } + /* Check for a scaled indexed address. */ + if (loongarch_lx_address_p (addr, mode)) + { + *total = COSTS_N_INSNS (2); + return true; + } + /* Otherwise use the default handling. */ + return false; + + case FFS: + *total = COSTS_N_INSNS (6); + return false; + + case NOT: + *total = COSTS_N_INSNS (GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 2 : 1); + return false; + + case AND: + /* Check for a *clear_upper32 pattern and treat it like a zero + extension. See the pattern's comment for details. */ + if (TARGET_64BIT + && mode == DImode + && CONST_INT_P (XEXP (x, 1)) + && UINTVAL (XEXP (x, 1)) == 0xffffffff) + { + *total = (loongarch_zero_extend_cost (mode, XEXP (x, 0)) + + set_src_cost (XEXP (x, 0), mode, speed)); + return true; + } + /* (AND (NOT op0) (NOT op1) is a nor operation that can be done in + a single instruction. */ + if (GET_CODE (XEXP (x, 0)) == NOT + && GET_CODE (XEXP (x, 1)) == NOT) + { + cost = GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 2 : 1; + *total = (COSTS_N_INSNS (cost) + + set_src_cost (XEXP (XEXP (x, 0), 0), mode, speed) + + set_src_cost (XEXP (XEXP (x, 1), 0), mode, speed)); + return true; + } + + /* Fall through. */ + + case IOR: + case XOR: + /* Double-word operations use two single-word operations. */ + *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (2), + speed); + return true; + + case ASHIFT: + case ASHIFTRT: + case LSHIFTRT: + case ROTATE: + case ROTATERT: + if (CONSTANT_P (XEXP (x, 1))) + *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (4), + speed); + else + *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (12), + speed); + return true; + + case ABS: + if (float_mode_p) + *total = loongarch_cost->fp_add; + else + *total = COSTS_N_INSNS (4); + return false; + + case LT: + case LTU: + case LE: + case LEU: + case GT: + case GTU: + case GE: + case GEU: + case EQ: + case NE: + case UNORDERED: + case LTGT: + case UNGE: + case UNGT: + case UNLE: + case UNLT: + /* Branch comparisons have VOIDmode, so use the first operand's + mode instead. */ + mode = GET_MODE (XEXP (x, 0)); + if (FLOAT_MODE_P (mode)) + { + *total = loongarch_cost->fp_add; + return false; + } + *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (4), + speed); + return true; + + case MINUS: + case PLUS: + if (float_mode_p) + { + *total = loongarch_cost->fp_add; + return false; + } + + /* If it's an add + mult (which is equivalent to shift left) and + it's immediate operand satisfies const_immlsa_operand predicate. */ + if (((ISA_HAS_LSA && mode == SImode) + || (ISA_HAS_DLSA && mode == DImode)) + && GET_CODE (XEXP (x, 0)) == MULT) + { + rtx op2 = XEXP (XEXP (x, 0), 1); + if (const_immlsa_operand (op2, mode)) + { + *total = (COSTS_N_INSNS (1) + + set_src_cost (XEXP (XEXP (x, 0), 0), mode, speed) + + set_src_cost (XEXP (x, 1), mode, speed)); + return true; + } + } + + /* Double-word operations require three single-word operations and + an SLTU. */ + *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), + COSTS_N_INSNS (4), + speed); + return true; + + case NEG: + if (float_mode_p) + *total = loongarch_cost->fp_add; + else + *total = COSTS_N_INSNS (GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 4 : 1); + return false; + + case FMA: + *total = loongarch_fp_mult_cost (mode); + return false; + + case MULT: + if (float_mode_p) + *total = loongarch_fp_mult_cost (mode); + else if (mode == DImode && !TARGET_64BIT) + /* Synthesized from 2 mulsi3s, 1 mulsidi3 and two additions, + where the mulsidi3 always includes an MFHI and an MFLO. */ + // FIXED ME??? + *total = (speed + ? loongarch_cost->int_mult_si * 3 + 6 + : COSTS_N_INSNS (7)); + else if (!speed) + *total = COSTS_N_INSNS (1) + 1; + else if (mode == DImode) + *total = loongarch_cost->int_mult_di; + else + *total = loongarch_cost->int_mult_si; + return false; + + case DIV: + /* Check for a reciprocal. */ + if (float_mode_p + && ISA_HAS_FP_RECIP_RSQRT (mode) + && flag_unsafe_math_optimizations + && XEXP (x, 0) == CONST1_RTX (mode)) + { + if (outer_code == SQRT || GET_CODE (XEXP (x, 1)) == SQRT) + /* An rsqrta or rsqrtb pattern. Count the + division as being free. */ + *total = set_src_cost (XEXP (x, 1), mode, speed); + else + *total = (loongarch_fp_div_cost (mode) + + set_src_cost (XEXP (x, 1), mode, speed)); + return true; + } + /* Fall through. */ + + case SQRT: + case MOD: + if (float_mode_p) + { + *total = loongarch_fp_div_cost (mode); + return false; + } + /* Fall through. */ + + case UDIV: + case UMOD: + if (!speed) + { + *total = COSTS_N_INSNS (loongarch_idiv_insns (mode)); + } + else if (mode == DImode) + *total = loongarch_cost->int_div_di; + else + *total = loongarch_cost->int_div_si; + return false; + + case SIGN_EXTEND: + *total = loongarch_sign_extend_cost (mode, XEXP (x, 0)); + return false; + + case ZERO_EXTEND: + *total = loongarch_zero_extend_cost (mode, XEXP (x, 0)); + return false; + case TRUNCATE: + /* Costings for highpart multiplies. Matching patterns of the form: + + (lshiftrt:DI (mult:DI (sign_extend:DI (...) + (sign_extend:DI (...)) + (const_int 32) + */ + if ((GET_CODE (XEXP (x, 0)) == ASHIFTRT + || GET_CODE (XEXP (x, 0)) == LSHIFTRT) + && CONST_INT_P (XEXP (XEXP (x, 0), 1)) + && ((INTVAL (XEXP (XEXP (x, 0), 1)) == 32 + && GET_MODE (XEXP (x, 0)) == DImode) + || (ISA_HAS_DMUL + && INTVAL (XEXP (XEXP (x, 0), 1)) == 64 + && GET_MODE (XEXP (x, 0)) == TImode)) + && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT + && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND + && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND) + || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND + && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) + == ZERO_EXTEND)))) + { + if (!speed) + *total = COSTS_N_INSNS (1) + 1; + else if (mode == DImode) + *total = loongarch_cost->int_mult_di; + else + *total = loongarch_cost->int_mult_si; + + /* Sign extension is free, zero extension costs for DImode when + on a 64bit core / when DMUL is present. */ + for (int i = 0; i < 2; ++i) + { + rtx op = XEXP (XEXP (XEXP (x, 0), 0), i); + if (ISA_HAS_DMUL + && GET_CODE (op) == ZERO_EXTEND + && GET_MODE (op) == DImode) + *total += rtx_cost (op, DImode, MULT, i, speed); + else + *total += rtx_cost (XEXP (op, 0), VOIDmode, GET_CODE (op), + 0, speed); + } + + return true; + } + return false; + + case FLOAT: + case UNSIGNED_FLOAT: + case FIX: + case FLOAT_EXTEND: + case FLOAT_TRUNCATE: + *total = loongarch_cost->fp_add; + return false; + + case SET: + if (register_operand (SET_DEST (x), VOIDmode) + && reg_or_0_operand (SET_SRC (x), VOIDmode)) + { + *total = loongarch_set_reg_reg_cost (GET_MODE (SET_DEST (x))); + return true; + } + return false; + + default: + return false; + } +} + +/* Implement TARGET_ADDRESS_COST. */ + +static int +loongarch_address_cost (rtx addr, machine_mode mode, + addr_space_t as ATTRIBUTE_UNUSED, + bool speed ATTRIBUTE_UNUSED) +{ + return loongarch_address_insns (addr, mode, false); +} + + +/* Return one word of double-word value OP, taking into account the fixed + endianness of certain registers. HIGH_P is true to select the high part, + false to select the low part. */ + +rtx +loongarch_subword (rtx op, bool high_p) +{ + unsigned int byte, offset; + machine_mode mode; + + mode = GET_MODE (op); + if (mode == VOIDmode) + mode = TARGET_64BIT ? TImode : DImode; + + if (high_p) + byte = UNITS_PER_WORD; + else + byte = 0; + + if (FP_REG_RTX_P (op)) + { + /* Paired FPRs are always ordered little-endian. */ + offset = (UNITS_PER_WORD < UNITS_PER_HWFPVALUE ? high_p : byte != 0); + return gen_rtx_REG (word_mode, REGNO (op) + offset); + } + + if (MEM_P (op)) + return loongarch_rewrite_small_data (adjust_address (op, word_mode, byte)); + + return simplify_gen_subreg (word_mode, op, mode, byte); +} + +/* Return true if a move from SRC to DEST should be split into two. + SPLIT_TYPE describes the split condition. */ + +bool +loongarch_split_move_p (rtx dest, rtx src, enum loongarch_split_type split_type) +{ + /* FPR-to-FPR moves can be done in a single instruction, if they're + allowed at all. */ + unsigned int size = GET_MODE_SIZE (GET_MODE (dest)); + if (size == 8 && FP_REG_RTX_P (src) && FP_REG_RTX_P (dest)) + return false; + + /* Check for floating-point loads and stores. */ + if (size == 8) + { + if (FP_REG_RTX_P (dest) && MEM_P (src)) + return false; + if (FP_REG_RTX_P (src) && MEM_P (dest)) + return false; + } + /* Otherwise split all multiword moves. */ + return size > UNITS_PER_WORD; +} + +/* Split a move from SRC to DEST, given that loongarch_split_move_p holds. + SPLIT_TYPE describes the split condition. */ + +void +loongarch_split_move (rtx dest, rtx src, enum loongarch_split_type split_type, rtx insn_) +{ + rtx low_dest; + + gcc_checking_assert (loongarch_split_move_p (dest, src, split_type)); + if (FP_REG_RTX_P (dest) || FP_REG_RTX_P (src)) + { + if (!TARGET_64BIT && GET_MODE (dest) == DImode) + emit_insn (gen_move_doubleword_fprdi (dest, src)); + else if (!TARGET_64BIT && GET_MODE (dest) == DFmode) + emit_insn (gen_move_doubleword_fprdf (dest, src)); + else if (TARGET_64BIT && GET_MODE (dest) == TFmode) + emit_insn (gen_move_doubleword_fprtf (dest, src)); + else + gcc_unreachable (); + } + else + { + /* The operation can be split into two normal moves. Decide in + which order to do them. */ + low_dest = loongarch_subword (dest, false); + if (REG_P (low_dest) + && reg_overlap_mentioned_p (low_dest, src)) + { + loongarch_emit_move (loongarch_subword (dest, true), loongarch_subword (src, true)); + loongarch_emit_move (low_dest, loongarch_subword (src, false)); + } + else + { + loongarch_emit_move (low_dest, loongarch_subword (src, false)); + loongarch_emit_move (loongarch_subword (dest, true), loongarch_subword (src, true)); + } + } + + /* This is a hack. See if the next insn uses DEST and if so, see if we + can forward SRC for DEST. This is most useful if the next insn is a + simple store. */ + rtx_insn *insn = (rtx_insn *)insn_; + struct loongarch_address_info addr = {}; + if (insn) + { + rtx_insn *next = next_nonnote_nondebug_insn_bb (insn); + if (next) + { + rtx set = single_set (next); + if (set && SET_SRC (set) == dest) + { + if (MEM_P (src)) + { + rtx tmp = XEXP (src, 0); + loongarch_classify_address (&addr, tmp, GET_MODE (tmp), true); + if (addr.reg && !reg_overlap_mentioned_p (dest, addr.reg)) + validate_change (next, &SET_SRC (set), src, false); + } + else + validate_change (next, &SET_SRC (set), src, false); + } + } + } +} + +/* Return the split type for instruction INSN. */ + +static enum loongarch_split_type +loongarch_insn_split_type (rtx insn) +{ + basic_block bb = BLOCK_FOR_INSN (insn); + if (bb) + { + if (optimize_bb_for_speed_p (bb)) + return SPLIT_FOR_SPEED; + else + return SPLIT_FOR_SIZE; + } + /* Once CFG information has been removed, we should trust the optimization + decisions made by previous passes and only split where necessary. */ + return SPLIT_IF_NECESSARY; +} + + +/* Return true if a move from SRC to DEST in INSN should be split. */ + +bool +loongarch_split_move_insn_p (rtx dest, rtx src, rtx insn) +{ + return loongarch_split_move_p (dest, src, loongarch_insn_split_type (insn)); +} + +/* Split a move from SRC to DEST in INSN, given that loongarch_split_move_insn_p + holds. */ + +void +loongarch_split_move_insn (rtx dest, rtx src, rtx insn) +{ + loongarch_split_move (dest, src, loongarch_insn_split_type (insn), insn); +} + + +/* Forward declaration. Used below */ +static HOST_WIDE_INT +loongarch_constant_alignment (const_tree exp, HOST_WIDE_INT align); + +/* Return the appropriate instructions to move SRC into DEST. Assume + that SRC is operand 1 and DEST is operand 0. */ + +const char * +loongarch_output_move (rtx dest, rtx src) +{ + enum rtx_code dest_code = GET_CODE (dest); + enum rtx_code src_code = GET_CODE (src); + machine_mode mode = GET_MODE (dest); + bool dbl_p = (GET_MODE_SIZE (mode) == 8); + enum loongarch_symbol_type symbol_type; + + if (loongarch_split_move_p (dest, src, SPLIT_IF_NECESSARY)) + return "#"; + + if ((src_code == REG && GP_REG_P (REGNO (src))) + || (src == CONST0_RTX (mode))) + { + if (dest_code == REG) + { + if (GP_REG_P (REGNO (dest))) + return "or\t%0,%z1,$zero"; + + if (FP_REG_P (REGNO (dest))) + return dbl_p ? "movgr2fr.d\t%0,%z1" : "movgr2fr.w\t%0,%z1"; + } + if (dest_code == MEM) + { + rtx offset = XEXP (dest, 0); + if (GET_CODE(offset) == PLUS) + offset = XEXP(offset, 1); + switch (GET_MODE_SIZE (mode)) + { + case 1: return "st.b\t%z1,%0"; + case 2: return "st.h\t%z1,%0"; + case 4: + if (const_arith_operand (offset, Pmode)) + return "st.w\t%z1,%0"; + else + return "stptr.w\t%z1,%0"; + case 8: + if (const_arith_operand (offset, Pmode)) + return "st.d\t%z1,%0"; + else + return "stptr.d\t%z1,%0"; + default: gcc_unreachable (); + } + } + } + if (dest_code == REG && GP_REG_P (REGNO (dest))) + { + if (src_code == REG) + if (FP_REG_P (REGNO (src))) + return dbl_p ? "movfr2gr.d\t%0,%1" : "movfr2gr.s\t%0,%1"; + + if (src_code == MEM) + { + rtx offset = XEXP (src, 0); + if (GET_CODE(offset) == PLUS) + offset = XEXP(offset, 1); + switch (GET_MODE_SIZE (mode)) + { + case 1: return "ld.bu\t%0,%1"; + case 2: return "ld.hu\t%0,%1"; + case 4: + if (const_arith_operand (offset, Pmode)) + return "ld.w\t%0,%1"; + else + return "ldptr.w\t%0,%1"; + case 8: + if (const_arith_operand (offset, Pmode)) + return "ld.d\t%0,%1"; + else + return "ldptr.d\t%0,%1"; + default: gcc_unreachable (); + } + } + + if (src_code == CONST_INT) + { + if (LUI_INT (src)) + return "lu12i.w\t%0,%1>>12\t\t\t# %X1"; + else if (SMALL_INT (src)) + return "addi.w\t%0,$zero,%1\t\t\t# %X1"; + else if (SMALL_INT_UNSIGNED (src)) + return "ori\t%0,$zero,%1\t\t\t# %X1"; + else if (LU52I_INT (src)) + return "lu52i.d\t%0,$zero,%X1>>52\t\t\t# %1"; + else + gcc_unreachable (); + } + + if (symbolic_operand (src, VOIDmode)) + { + + switch (loongarch_cmodel_var) + { + case LARCH_CMODEL_TINY: + do + { + if (loongarch_global_symbol_p (src) + && !loongarch_symbol_binds_local_p (src)) + break; + case LARCH_CMODEL_TINY_STATIC: + if (loongarch_weak_symbol_p (src)) + break; + + /* The symbol must be aligned to 4 byte. */ + unsigned int align; + + if (GET_CODE (src) == LABEL_REF) + align = 128 /* whatever */; + /* copy from aarch64 */ + else if (CONSTANT_POOL_ADDRESS_P (src)) + align = GET_MODE_ALIGNMENT (get_pool_mode (src)); + else if (TREE_CONSTANT_POOL_ADDRESS_P (src)) + { + tree exp = SYMBOL_REF_DECL (src); + align = TYPE_ALIGN (TREE_TYPE (exp)); + align = loongarch_constant_alignment (exp, align); + } + else if (SYMBOL_REF_DECL (src)) + align = DECL_ALIGN (SYMBOL_REF_DECL (src)); + else if (SYMBOL_REF_HAS_BLOCK_INFO_P (src) + && SYMBOL_REF_BLOCK (src) != NULL) + align = SYMBOL_REF_BLOCK (src)->alignment; + else + align = BITS_PER_UNIT; + + if (align % (4 * 8) == 0) + return "pcaddi\t%0,%%pcrel(%1)>>2"; + } + while (0); + case LARCH_CMODEL_NORMAL: + case LARCH_CMODEL_LARGE: + if (!loongarch_global_symbol_p (src) + || loongarch_symbol_binds_local_p (src)) + return "la.local\t%0,%1"; + else + return "la.global\t%0,%1"; + case LARCH_CMODEL_EXTREME: + sorry ("not support yet."); + return ""; + break; + default: + gcc_unreachable (); + } + } + } + if (src_code == REG && FP_REG_P (REGNO (src))) + { + if (dest_code == REG && FP_REG_P (REGNO (dest))) + return dbl_p ? "fmov.d\t%0,%1" : "fmov.s\t%0,%1"; + + if (dest_code == MEM) + return dbl_p ? "fst.d\t%1,%0" : "fst.s\t%1,%0"; + } + if (dest_code == REG && FP_REG_P (REGNO (dest))) + { + if (src_code == MEM) + return dbl_p ? "fld.d\t%0,%1" : "fld.s\t%0,%1"; + } + gcc_unreachable (); +} + +/* Return true if CMP1 is a suitable second operand for integer ordering + test CODE. See also the *sCC patterns in loongarch.md. */ + +static bool +loongarch_int_order_operand_ok_p (enum rtx_code code, rtx cmp1) +{ + switch (code) + { + case GT: + case GTU: + return reg_or_0_operand (cmp1, VOIDmode); + + case GE: + case GEU: + return cmp1 == const1_rtx; + + case LT: + case LTU: + return arith_operand (cmp1, VOIDmode); + + case LE: + return sle_operand (cmp1, VOIDmode); + + case LEU: + return sleu_operand (cmp1, VOIDmode); + + default: + gcc_unreachable (); + } +} + +/* Return true if *CMP1 (of mode MODE) is a valid second operand for + integer ordering test *CODE, or if an equivalent combination can + be formed by adjusting *CODE and *CMP1. When returning true, update + *CODE and *CMP1 with the chosen code and operand, otherwise leave + them alone. */ + +static bool +loongarch_canonicalize_int_order_test (enum rtx_code *code, rtx *cmp1, + machine_mode mode) +{ + HOST_WIDE_INT plus_one; + + if (loongarch_int_order_operand_ok_p (*code, *cmp1)) + return true; + + if (CONST_INT_P (*cmp1)) + switch (*code) + { + case LE: + plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode); + if (INTVAL (*cmp1) < plus_one) + { + *code = LT; + *cmp1 = force_reg (mode, GEN_INT (plus_one)); + return true; + } + break; + + case LEU: + plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode); + if (plus_one != 0) + { + *code = LTU; + *cmp1 = force_reg (mode, GEN_INT (plus_one)); + return true; + } + break; + + default: + break; + } + return false; +} + +/* Compare CMP0 and CMP1 using ordering test CODE and store the result + in TARGET. CMP0 and TARGET are register_operands. If INVERT_PTR + is nonnull, it's OK to set TARGET to the inverse of the result and + flip *INVERT_PTR instead. */ + +static void +loongarch_emit_int_order_test (enum rtx_code code, bool *invert_ptr, + rtx target, rtx cmp0, rtx cmp1) +{ + machine_mode mode; + + /* First see if there is a LARCH instruction that can do this operation. + If not, try doing the same for the inverse operation. If that also + fails, force CMP1 into a register and try again. */ + mode = GET_MODE (cmp0); + if (loongarch_canonicalize_int_order_test (&code, &cmp1, mode)) + loongarch_emit_binary (code, target, cmp0, cmp1); + else + { + enum rtx_code inv_code = reverse_condition (code); + if (!loongarch_canonicalize_int_order_test (&inv_code, &cmp1, mode)) + { + cmp1 = force_reg (mode, cmp1); + loongarch_emit_int_order_test (code, invert_ptr, target, cmp0, cmp1); + } + else if (invert_ptr == 0) + { + rtx inv_target; + + inv_target = loongarch_force_binary (GET_MODE (target), + inv_code, cmp0, cmp1); + loongarch_emit_binary (XOR, target, inv_target, const1_rtx); + } + else + { + *invert_ptr = !*invert_ptr; + loongarch_emit_binary (inv_code, target, cmp0, cmp1); + } + } +} + +/* Return a register that is zero iff CMP0 and CMP1 are equal. + The register will have the same mode as CMP0. */ + +static rtx +loongarch_zero_if_equal (rtx cmp0, rtx cmp1) +{ + if (cmp1 == const0_rtx) + return cmp0; + + if (uns_arith_operand (cmp1, VOIDmode)) + return expand_binop (GET_MODE (cmp0), xor_optab, + cmp0, cmp1, 0, 0, OPTAB_DIRECT); + + return expand_binop (GET_MODE (cmp0), sub_optab, + cmp0, cmp1, 0, 0, OPTAB_DIRECT); +} + +/* Convert *CODE into a code that can be used in a floating-point + scc instruction (C.cond.fmt). Return true if the values of + the condition code registers will be inverted, with 0 indicating + that the condition holds. */ + +static bool +loongarch_reversed_fp_cond (enum rtx_code *code) +{ + switch (*code) + { + case NE: + case LTGT: + case ORDERED: + *code = reverse_condition_maybe_unordered (*code); + return true; + + default: + return false; + } +} + +/* Allocate a floating-point condition-code register of mode MODE. + + These condition code registers are used for certain kinds + of compound operation, such as compare and branches, vconds, + and built-in functions. At expand time, their use is entirely + controlled by LARCH-specific code and is entirely internal + to these compound operations. + + We could (and did in the past) expose condition-code values + as pseudo registers and leave the register allocator to pick + appropriate registers. The problem is that it is not practically + possible for the rtl optimizers to guarantee that no spills will + be needed, even when AVOID_CCMODE_COPIES is defined. We would + therefore need spill and reload sequences to handle the worst case. + + Although such sequences do exist, they are very expensive and are + not something we'd want to use. + + The main benefit of having more than one condition-code register + is to allow the pipelining of operations, especially those involving + comparisons and conditional moves. We don't really expect the + registers to be live for long periods, and certainly never want + them to be live across calls. + + Also, there should be no penalty attached to using all the available + registers. They are simply bits in the same underlying FPU control + register. + + We therefore expose the hardware registers from the outset and use + a simple round-robin allocation scheme. */ + +static rtx +loongarch_allocate_fcc (machine_mode mode) +{ + unsigned int regno, count; + + gcc_assert (TARGET_HARD_FLOAT); + + if (mode == CCmode) + count = 1; + else + gcc_unreachable (); + + cfun->machine->next_fcc += -cfun->machine->next_fcc & (count - 1); + if (cfun->machine->next_fcc > ST_REG_LAST - ST_REG_FIRST) + cfun->machine->next_fcc = 0; + + regno = ST_REG_FIRST + cfun->machine->next_fcc; + cfun->machine->next_fcc += count; + return gen_rtx_REG (mode, regno); +} + +/* Convert a comparison into something that can be used in a branch or + conditional move. On entry, *OP0 and *OP1 are the values being + compared and *CODE is the code used to compare them. + + Update *CODE, *OP0 and *OP1 so that they describe the final comparison. + If NEED_EQ_NE_P, then only EQ or NE comparisons against zero are possible, + otherwise any standard branch condition can be used. The standard branch + conditions are: + + - EQ or NE between two registers. + - any comparison between a register and zero. + - if compact branches are available then any condition is valid. */ + +static void +loongarch_emit_compare (enum rtx_code *code, rtx *op0, rtx *op1, bool need_eq_ne_p) +{ + rtx cmp_op0 = *op0; + rtx cmp_op1 = *op1; + + if (GET_MODE_CLASS (GET_MODE (*op0)) == MODE_INT) + { + if (!need_eq_ne_p && *op1 == const0_rtx) + ; + else if (*code == EQ || *code == NE) + { + if (need_eq_ne_p) + { + *op0 = loongarch_zero_if_equal (cmp_op0, cmp_op1); + *op1 = const0_rtx; + } + else + *op1 = force_reg (GET_MODE (cmp_op0), cmp_op1); + } + else if (!need_eq_ne_p) + { + bool swap = false; + switch (*code) + { + case LE: + swap = true; + *code = GE; + break; + case GT: + swap = true; + *code = LT; + break; + case LEU: + swap = true; + *code = GEU; + break; + case GTU: + swap = true; + *code = LTU; + break; + case GE: + case LT: + case GEU: + case LTU: + /* Do nothing. */ + break; + default: + gcc_unreachable (); + } + *op1 = force_reg (GET_MODE (cmp_op0), cmp_op1); + if (swap) + { + rtx tmp = *op1; + *op1 = *op0; + *op0 = tmp; + } + } + else + { + /* The comparison needs a separate scc instruction. Store the + result of the scc in *OP0 and compare it against zero. */ + bool invert = false; + *op0 = gen_reg_rtx (GET_MODE (cmp_op0)); + loongarch_emit_int_order_test (*code, &invert, *op0, cmp_op0, cmp_op1); + *code = (invert ? EQ : NE); + *op1 = const0_rtx; + } + } + else + { + enum rtx_code cmp_code; + + /* Floating-point tests use a separate FCMP.cond.fmt + comparison to set a register. The branch or conditional move will + then compare that register against zero. + + Set CMP_CODE to the code of the comparison instruction and + *CODE to the code that the branch or move should use. */ + cmp_code = *code; + /* Three FP conditions cannot be implemented by reversing the + operands for FCMP.cond.fmt, instead a reversed condition code is + required and a test for false. */ + *code = loongarch_reversed_fp_cond (&cmp_code) ? EQ : NE; + *op0 = loongarch_allocate_fcc (CCmode); + + *op1 = const0_rtx; + loongarch_emit_binary (cmp_code, *op0, cmp_op0, cmp_op1); + } +} + +/* Try performing the comparison in OPERANDS[1], whose arms are OPERANDS[2] + and OPERAND[3]. Store the result in OPERANDS[0]. + + On 64-bit targets, the mode of the comparison and target will always be + SImode, thus possibly narrower than that of the comparison's operands. */ + +void +loongarch_expand_scc (rtx operands[]) +{ + rtx target = operands[0]; + enum rtx_code code = GET_CODE (operands[1]); + rtx op0 = operands[2]; + rtx op1 = operands[3]; + + gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT); + + if (code == EQ || code == NE) + { + { + rtx zie = loongarch_zero_if_equal (op0, op1); + loongarch_emit_binary (code, target, zie, const0_rtx); + } + } + else + loongarch_emit_int_order_test (code, 0, target, op0, op1); +} + +/* Compare OPERANDS[1] with OPERANDS[2] using comparison code + CODE and jump to OPERANDS[3] if the condition holds. */ + +void +loongarch_expand_conditional_branch (rtx *operands) +{ + enum rtx_code code = GET_CODE (operands[0]); + rtx op0 = operands[1]; + rtx op1 = operands[2]; + rtx condition; + + loongarch_emit_compare (&code, &op0, &op1, 0); + condition = gen_rtx_fmt_ee (code, VOIDmode, op0, op1); + emit_jump_insn (gen_condjump (condition, operands[3])); +} + +/* Perform the comparison in OPERANDS[1]. Move OPERANDS[2] into OPERANDS[0] + if the condition holds, otherwise move OPERANDS[3] into OPERANDS[0]. */ + +bool +loongarch_expand_conditional_move (rtx *operands) +{ + rtx cond; + enum rtx_code code = GET_CODE (operands[1]); + rtx op0 = XEXP (operands[1], 0); + rtx op1 = XEXP (operands[1], 1); + + loongarch_emit_compare (&code, &op0, &op1, true); + cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1); + + /* There is no direct support for general conditional GP move involving + two registers using SEL. */ + if (INTEGRAL_MODE_P (GET_MODE (operands[2])) + && register_operand (operands[2], VOIDmode) + && register_operand (operands[3], VOIDmode)) + { + machine_mode mode = GET_MODE (operands[0]); + rtx temp = gen_reg_rtx (mode); + rtx temp2 = gen_reg_rtx (mode); + + emit_insn (gen_rtx_SET (temp, + gen_rtx_IF_THEN_ELSE (mode, cond, + operands[2], const0_rtx))); + + /* Flip the test for the second operand. */ + cond = gen_rtx_fmt_ee ((code == EQ) ? NE : EQ, GET_MODE (op0), op0, op1); + + emit_insn (gen_rtx_SET (temp2, + gen_rtx_IF_THEN_ELSE (mode, cond, + operands[3], const0_rtx))); + + /* Merge the two results, at least one is guaranteed to be zero. */ + emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, temp2))); + + return true; + } + else + return false; +} + + +/* Initialize *CUM for a call to a function of type FNTYPE. */ + +void +loongarch_init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype) +{ + memset (cum, 0, sizeof (*cum)); + cum->prototype = (fntype && prototype_p (fntype)); + cum->gp_reg_found = (cum->prototype && stdarg_p (fntype)); +} + +/* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the first + byte of the stack slot has useful data, PAD_DOWNWARD if the last byte + does. */ + +static pad_direction +loongarch_function_arg_padding (machine_mode mode, const_tree type) +{ + /* On little-endian targets, the first byte of every stack argument + is passed in the first byte of the stack slot. */ + if (!BYTES_BIG_ENDIAN) + return PAD_UPWARD; + + /* Otherwise, integral types are padded downward: the last byte of a + stack argument is passed in the last byte of the stack slot. */ + if (type != 0 + ? (INTEGRAL_TYPE_P (type) + || POINTER_TYPE_P (type) + || FIXED_POINT_TYPE_P (type)) + : (SCALAR_INT_MODE_P (mode) + || ALL_SCALAR_FIXED_POINT_MODE_P (mode))) + return PAD_DOWNWARD; + + return PAD_UPWARD; +} + +/* Likewise BLOCK_REG_PADDING (MODE, TYPE, ...). Return !BYTES_BIG_ENDIAN + if the least significant byte of the register has useful data. Return + the opposite if the most significant byte does. */ + +bool +loongarch_pad_reg_upward (machine_mode mode, tree type) +{ + /* No shifting is required for floating-point arguments. */ + if (type != 0 ? FLOAT_TYPE_P (type) : GET_MODE_CLASS (mode) == MODE_FLOAT) + return !BYTES_BIG_ENDIAN; + + /* Otherwise, apply the same padding to register arguments as we do + to stack arguments. */ + return loongarch_function_arg_padding (mode, type) == PAD_UPWARD; +} + +/* Implement TARGET_EXPAND_BUILTIN_VA_START. */ + +static void +loongarch_va_start (tree valist, rtx nextarg) +{ + nextarg = plus_constant (Pmode, nextarg, -cfun->machine->varargs_size); + std_expand_builtin_va_start (valist, nextarg); +} + + +/* Start a definition of function NAME. */ + +static void +loongarch_start_function_definition (const char *name) +{ + ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, name, "function"); + + /* Start the definition proper. */ + assemble_name (asm_out_file, name); + fputs (":\n", asm_out_file); +} + +/* Implement TARGET_FUNCTION_OK_FOR_SIBCALL. */ + +static bool +loongarch_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) +{ + if (!TARGET_SIBCALLS) + return false; + + /* Interrupt handlers need special epilogue code and therefore can't + use sibcalls. */ + if (loongarch_interrupt_type_p (TREE_TYPE (current_function_decl))) + return false; + + /* Otherwise OK. */ + return true; +} + +/* Implement TARGET_USE_MOVE_BY_PIECES_INFRASTRUCTURE_P. */ + +bool +loongarch_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size, + unsigned int align, + enum by_pieces_operation op, + bool speed_p) +{ + if (op == STORE_BY_PIECES) + return loongarch_store_by_pieces_p (size, align); + if (op == MOVE_BY_PIECES && HAVE_movmemsi) + { + /* movmemsi is meant to generate code that is at least as good as + move_by_pieces. However, movmemsi effectively uses a by-pieces + implementation both for moves smaller than a word and for + word-aligned moves of no more than LARCH_MAX_MOVE_BYTES_STRAIGHT + bytes. We should allow the tree-level optimisers to do such + moves by pieces, as it often exposes other optimization + opportunities. We might as well continue to use movmemsi at + the rtl level though, as it produces better code when + scheduling is disabled (such as at -O). */ + if (currently_expanding_to_rtl) + return false; + if (align < BITS_PER_WORD) + return size < UNITS_PER_WORD; + return size <= LARCH_MAX_MOVE_BYTES_STRAIGHT; + } + + return default_use_by_pieces_infrastructure_p (size, align, op, speed_p); +} + +/* Implement a handler for STORE_BY_PIECES operations + for TARGET_USE_MOVE_BY_PIECES_INFRASTRUCTURE_P. */ + +bool +loongarch_store_by_pieces_p (unsigned HOST_WIDE_INT size, unsigned int align) +{ + /* Storing by pieces involves moving constants into registers + of size MIN (ALIGN, BITS_PER_WORD), then storing them. + We need to decide whether it is cheaper to load the address of + constant data into a register and use a block move instead. */ + + /* If the data is only byte aligned, then: + + (a1) A block move of less than 4 bytes would involve three 3 LD.Bs and + 3 ST.Bs. We might as well use 3 single-instruction LIs and 3 SD.Bs + instead. + + (a2) A block move of 4 bytes from aligned source data can use an + LD.W/ST.W sequence. This is often better than the 4 LIs and + 4 SD.Bs that we would generate when storing by pieces. */ + if (align <= BITS_PER_UNIT) + return size < 4; + + /* If the data is 2-byte aligned, then: + + (b1) A block move of less than 4 bytes would use a combination of LD.Bs, + LD.Hs, SD.Bs and SD.Hs. We get better code by using single-instruction + LIs, SD.Bs and SD.Hs instead. + + (b2) A block move of 4 bytes from aligned source data would again use + an LD.W/ST.W sequence. In most cases, loading the address of + the source data would require at least one extra instruction. + It is often more efficient to use 2 single-instruction LIs and + 2 SHs instead. + + (b3) A block move of up to 3 additional bytes would be like (b1). + + (b4) A block move of 8 bytes from aligned source data can use two + LD.W/ST.W sequences. Both sequences are better than the 4 LIs + and 4 ST.Hs that we'd generate when storing by pieces. + + The reasoning for higher alignments is similar: + + (c1) A block move of less than 4 bytes would be the same as (b1). + + (c2) A block move of 4 bytes would use an LD.W/ST.W sequence. Again, + loading the address of the source data would typically require + at least one extra instruction. It is generally better to use + LUI/ORI/SW instead. + + (c3) A block move of up to 3 additional bytes would be like (b1). + + (c4) A block move of 8 bytes can use two LD.W/ST.W sequences or a single + LD.D/ST.D sequence, and in these cases we've traditionally preferred + the memory copy over the more bulky constant moves. */ + return size < 8; +} + +/* Emit straight-line code to move LENGTH bytes from SRC to DEST. + Assume that the areas do not overlap. */ + +static void +loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length) +{ + HOST_WIDE_INT offset, delta; + unsigned HOST_WIDE_INT bits; + int i; + machine_mode mode; + rtx *regs; + + /* Work out how many bits to move at a time. If both operands have + half-word alignment, it is usually better to move in half words. + For instance, lh/lh/sh/sh is usually better than lwl/lwr/swl/swr + and lw/lw/sw/sw is usually better than ldl/ldr/sdl/sdr. + Otherwise move word-sized chunks. + + For ISA_HAS_LWL_LWR we rely on the lwl/lwr & swl/swr load. Otherwise + picking the minimum of alignment or BITS_PER_WORD gets us the + desired size for bits. */ + + bits = MIN (BITS_PER_WORD, MIN (MEM_ALIGN (src), MEM_ALIGN (dest))); + + mode = int_mode_for_size (bits, 0).require (); + delta = bits / BITS_PER_UNIT; + + /* Allocate a buffer for the temporary registers. */ + regs = XALLOCAVEC (rtx, length / delta); + + /* Load as many BITS-sized chunks as possible. Use a normal load if + the source has enough alignment, otherwise use left/right pairs. */ + for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++) + { + regs[i] = gen_reg_rtx (mode); + loongarch_emit_move (regs[i], adjust_address (src, mode, offset)); + } + + + for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++) + loongarch_emit_move (adjust_address (dest, mode, offset), regs[i]); + + /* Mop up any left-over bytes. */ + if (offset < length) + { + src = adjust_address (src, BLKmode, offset); + dest = adjust_address (dest, BLKmode, offset); + move_by_pieces (dest, src, length - offset, + MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), RETURN_BEGIN); + } +} + +/* Helper function for doing a loop-based block operation on memory + reference MEM. Each iteration of the loop will operate on LENGTH + bytes of MEM. + + Create a new base register for use within the loop and point it to + the start of MEM. Create a new memory reference that uses this + register. Store them in *LOOP_REG and *LOOP_MEM respectively. */ + +static void +loongarch_adjust_block_mem (rtx mem, HOST_WIDE_INT length, + rtx *loop_reg, rtx *loop_mem) +{ + *loop_reg = copy_addr_to_reg (XEXP (mem, 0)); + + /* Although the new mem does not refer to a known location, + it does keep up to LENGTH bytes of alignment. */ + *loop_mem = change_address (mem, BLKmode, *loop_reg); + set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT)); +} + +/* Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER + bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that + the memory regions do not overlap. */ + +static void +loongarch_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length, + HOST_WIDE_INT bytes_per_iter) +{ + rtx_code_label *label; + rtx src_reg, dest_reg, final_src, test; + HOST_WIDE_INT leftover; + + leftover = length % bytes_per_iter; + length -= leftover; + + /* Create registers and memory references for use within the loop. */ + loongarch_adjust_block_mem (src, bytes_per_iter, &src_reg, &src); + loongarch_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest); + + /* Calculate the value that SRC_REG should have after the last iteration + of the loop. */ + final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length), + 0, 0, OPTAB_WIDEN); + + /* Emit the start of the loop. */ + label = gen_label_rtx (); + emit_label (label); + + /* Emit the loop body. */ + loongarch_block_move_straight (dest, src, bytes_per_iter); + + /* Move on to the next block. */ + loongarch_emit_move (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter)); + loongarch_emit_move (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter)); + + /* Emit the loop condition. */ + test = gen_rtx_NE (VOIDmode, src_reg, final_src); + if (Pmode == DImode) + emit_jump_insn (gen_cbranchdi4 (test, src_reg, final_src, label)); + else + emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label)); + + /* Mop up any left-over bytes. */ + if (leftover) + loongarch_block_move_straight (dest, src, leftover); + else + /* Temporary fix for PR79150. */ + emit_insn (gen_nop ()); +} + +/* Expand a movmemsi instruction, which copies LENGTH bytes from + memory reference SRC to memory reference DEST. */ + +bool +loongarch_expand_block_move (rtx dest, rtx src, rtx length) +{ + + int max_move_bytes = LARCH_MAX_MOVE_BYTES_STRAIGHT; + + if (CONST_INT_P (length) && INTVAL (length) <= loongarch_max_inline_memcpy_size) + { + if (INTVAL (length) <= max_move_bytes) + { + loongarch_block_move_straight (dest, src, INTVAL (length)); + return true; + } + else if (optimize) + { + loongarch_block_move_loop (dest, src, INTVAL (length), + LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER); + return true; + } + } + return false; +} + + +/* Expand a QI or HI mode atomic memory operation. + + GENERATOR contains a pointer to the gen_* function that generates + the SI mode underlying atomic operation using masks that we + calculate. + + RESULT is the return register for the operation. Its value is NULL + if unused. + + MEM is the location of the atomic access. + + OLDVAL is the first operand for the operation. + + NEWVAL is the optional second operand for the operation. Its value + is NULL if unused. */ + +void +loongarch_expand_atomic_qihi (union loongarch_gen_fn_ptrs generator, + rtx result, rtx mem, rtx oldval, + rtx newval, rtx model) +{ + rtx orig_addr, memsi_addr, memsi, shift, shiftsi, unshifted_mask; + rtx unshifted_mask_reg, mask, inverted_mask, si_op; + rtx res = NULL; + rtx tmp = NULL; + machine_mode mode; + + mode = GET_MODE (mem); + + /* Compute the address of the containing SImode value. */ + orig_addr = force_reg (Pmode, XEXP (mem, 0)); + memsi_addr = loongarch_force_binary (Pmode, AND, orig_addr, + force_reg (Pmode, GEN_INT (-4))); + + /* Create a memory reference for it. */ + memsi = gen_rtx_MEM (SImode, memsi_addr); + set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER); + MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem); + + /* Work out the byte offset of the QImode or HImode value, + counting from the least significant byte. */ + shift = loongarch_force_binary (Pmode, AND, orig_addr, GEN_INT (3)); + /* if (TARGET_BIG_ENDIAN) */ + /* loongarch_emit_binary (XOR, shift, shift, GEN_INT (mode == QImode ? 3 : 2)); */ + + /* Multiply by eight to convert the shift value from bytes to bits. */ + loongarch_emit_binary (ASHIFT, shift, shift, GEN_INT (3)); + + /* Make the final shift an SImode value, so that it can be used in + SImode operations. */ + shiftsi = force_reg (SImode, gen_lowpart (SImode, shift)); + + /* Set MASK to an inclusive mask of the QImode or HImode value. */ + unshifted_mask = GEN_INT (GET_MODE_MASK (mode)); + unshifted_mask_reg = force_reg (SImode, unshifted_mask); + mask = loongarch_force_binary (SImode, ASHIFT, unshifted_mask_reg, shiftsi); + + /* Compute the equivalent exclusive mask. */ + inverted_mask = gen_reg_rtx (SImode); + emit_insn (gen_rtx_SET (inverted_mask, gen_rtx_NOT (SImode, mask))); + + /* Shift the old value into place. */ + if (oldval != const0_rtx) + { + oldval = convert_modes (SImode, mode, oldval, true); + oldval = force_reg (SImode, oldval); + oldval = loongarch_force_binary (SImode, ASHIFT, oldval, shiftsi); + } + + /* Do the same for the new value. */ + if (newval && newval != const0_rtx) + { + newval = convert_modes (SImode, mode, newval, true); + newval = force_reg (SImode, newval); + newval = loongarch_force_binary (SImode, ASHIFT, newval, shiftsi); + } + + /* Do the SImode atomic access. */ + if (result) + res = gen_reg_rtx (SImode); + + if (newval) + si_op = generator.fn_7 (res, memsi, mask, inverted_mask, oldval, newval, model); + else if (result) + si_op = generator.fn_6 (res, memsi, mask, inverted_mask, oldval, model); + else + si_op = generator.fn_5 (memsi, mask, inverted_mask, oldval, model); + + //si_op = generator.fn_7 (res, memsi, mask, inverted_mask, oldval, newval, model); + + emit_insn (si_op); + + if (result) + { + /* Shift and convert the result. */ + loongarch_emit_binary (AND, res, res, mask); + loongarch_emit_binary (LSHIFTRT, res, res, shiftsi); + loongarch_emit_move (result, gen_lowpart (GET_MODE (result), res)); + } +} + +/* Return true if X is a MEM with the same size as MODE. */ + +bool +loongarch_mem_fits_mode_p (machine_mode mode, rtx x) +{ + return (MEM_P (x) + && MEM_SIZE_KNOWN_P (x) + && MEM_SIZE (x) == GET_MODE_SIZE (mode)); +} + +/* Return true if (zero_extract OP WIDTH BITPOS) can be used as the + source of an "ext" instruction or the destination of an "ins" + instruction. OP must be a register operand and the following + conditions must hold: + + 0 <= BITPOS < GET_MODE_BITSIZE (GET_MODE (op)) + 0 < WIDTH <= GET_MODE_BITSIZE (GET_MODE (op)) + 0 < BITPOS + WIDTH <= GET_MODE_BITSIZE (GET_MODE (op)) + + Also reject lengths equal to a word as they are better handled + by the move patterns. */ + +bool +loongarch_use_ins_ext_p (rtx op, HOST_WIDE_INT width, HOST_WIDE_INT bitpos) +{ + if (!register_operand (op, VOIDmode) + || GET_MODE_BITSIZE (GET_MODE (op)) > BITS_PER_WORD) + return false; + + if (!IN_RANGE (width, 1, GET_MODE_BITSIZE (GET_MODE (op)) - 1)) + return false; + + if (bitpos < 0 || bitpos + width > GET_MODE_BITSIZE (GET_MODE (op))) + return false; + + return true; +} + + +/* Return true iff OP1 and OP2 are valid operands together for the + *and3 patterns. For the cases to consider, + see the table in the comment before the pattern. */ + +bool +and_operands_ok (machine_mode mode, rtx op1, rtx op2) +{ + + if (memory_operand (op1, mode)) + { + return and_load_operand (op2, mode); + } + else + return and_reg_operand (op2, mode); +} + + +/* Wrappers around loongarch_push_asm_switch_1 and loongarch_pop_asm_switch_1 + that either print a complete line or print nothing. */ + +void +loongarch_push_asm_switch (struct loongarch_asm_switch *asm_switch) +{ +// loongarch_push_asm_switch_1 (asm_switch, "\t", "\n"); +} + +void +loongarch_pop_asm_switch (struct loongarch_asm_switch *asm_switch) +{ +// loongarch_pop_asm_switch_1 (asm_switch, "\t", "\n"); +} + +/* Print the text for PRINT_OPERAND punctation character CH to FILE. + The punctuation characters are: + + '.' Print the name of the register with a hard-wired zero (zero or $r0). + '$' Print the name of the stack pointer register (sp or $r3). + ':' Print "c" to use the compact version if the delay slot is a nop. + '!' Print "s" to use the short version if the delay slot contains a + 16-bit instruction. + + See also loongarch_init_print_operand_punct. */ + +static void +loongarch_print_operand_punctuation (FILE *file, int ch) +{ + switch (ch) + { + case '.': + fputs (reg_names[GP_REG_FIRST + 0], file); + break; + + case '$': + fputs (reg_names[STACK_POINTER_REGNUM], file); + break; + + case ':': + /* When final_sequence is 0, the delay slot will be a nop. We can + use the compact version where available. The %: formatter will + only be present if a compact form of the branch is available. */ + if (final_sequence == 0) + putc ('c', file); + break; + + default: + gcc_unreachable (); + break; + } +} + +/* Initialize loongarch_print_operand_punct. */ + +static void +loongarch_init_print_operand_punct (void) +{ + const char *p; + + for (p = ".$:"; *p; p++) + loongarch_print_operand_punct[(unsigned char) *p] = true; +} + +/* PRINT_OPERAND prefix LETTER refers to the integer branch instruction + associated with condition CODE. Print the condition part of the + opcode to FILE. */ + +static void +loongarch_print_int_branch_condition (FILE *file, enum rtx_code code, int letter) +{ + switch (code) + { + case EQ: + case NE: + case GT: + case GE: + case LT: + case LE: + case GTU: + case GEU: + case LTU: + case LEU: + /* Conveniently, the LARCH names for these conditions are the same + as their RTL equivalents. */ + fputs (GET_RTX_NAME (code), file); + break; + + default: + output_operand_lossage ("'%%%c' is not a valid operand prefix", letter); + break; + } +} + +/* Likewise floating-point branches. */ + +static void +loongarch_print_float_branch_condition (FILE *file, enum rtx_code code, int letter) +{ + switch (code) + { + case EQ: + fputs ("ceqz", file); + break; + + case NE: + fputs ("cnez", file); + break; + + default: + output_operand_lossage ("'%%%c' is not a valid operand prefix", letter); + break; + } +} + +/* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */ + +static bool +loongarch_print_operand_punct_valid_p (unsigned char code) +{ + return loongarch_print_operand_punct[code]; +} + +/* Return true if a FENCE should be emitted to before a memory access to + implement the release portion of memory model MODEL. */ + +static bool +loongarch_memmodel_needs_release_fence (enum memmodel model) +{ + switch (model) + { + case MEMMODEL_ACQ_REL: + case MEMMODEL_SEQ_CST: + case MEMMODEL_SYNC_SEQ_CST: + case MEMMODEL_RELEASE: + case MEMMODEL_SYNC_RELEASE: + return true; + + case MEMMODEL_ACQUIRE: + case MEMMODEL_CONSUME: + case MEMMODEL_SYNC_ACQUIRE: + case MEMMODEL_RELAXED: + return false; + + default: + gcc_unreachable (); + } +} + + +/* Implement TARGET_PRINT_OPERAND. The LARCH-specific operand codes are: + + 'X' Print CONST_INT OP in hexadecimal format. + 'x' Print the low 16 bits of CONST_INT OP in hexadecimal format. + 'd' Print CONST_INT OP in decimal. + 'm' Print one less than CONST_INT OP in decimal. + 'y' Print exact log2 of CONST_INT OP in decimal. + 'h' Print the high-part relocation associated with OP, after stripping + any outermost HIGH. + 'R' Print the low-part relocation associated with OP. + 'C' Print the integer branch condition for comparison OP. + 'N' Print the inverse of the integer branch condition for comparison OP. + 'F' Print the FPU branch condition for comparison OP. + 'W' Print the inverse of the FPU branch condition for comparison OP. + 'T' Print 'f' for (eq:CC ...), 't' for (ne:CC ...), + 'z' for (eq:?I ...), 'n' for (ne:?I ...). + 't' Like 'T', but with the EQ/NE cases reversed + 'Y' Print loongarch_fp_conditions[INTVAL (OP)] + 'Z' Print OP and a comma for 8CC, otherwise print nothing. + 'D' Print the second part of a double-word register or memory operand. + 'L' Print the low-order register in a double-word register operand. + 'M' Print high-order register in a double-word register operand. + 'z' Print $0 if OP is zero, otherwise print OP normally. + 'b' Print the address of a memory operand, without offset. + 'V' Print exact log2 of CONST_INT OP element 0 of a replicated + CONST_VECTOR in decimal. + 'A' Print a _DB suffix if the memory model requires a release. + 'G' Print a DBAR insn if the memory model requires a release. + 'i' Print i if the operand is not a register. */ + +static void +loongarch_print_operand (FILE *file, rtx op, int letter) +{ + enum rtx_code code; + + if (loongarch_print_operand_punct_valid_p (letter)) + { + loongarch_print_operand_punctuation (file, letter); + return; + } + + gcc_assert (op); + code = GET_CODE (op); + + switch (letter) + { + case 'X': + if (CONST_INT_P (op)) + fprintf (file, HOST_WIDE_INT_PRINT_HEX, INTVAL (op)); + else + output_operand_lossage ("invalid use of '%%%c'", letter); + break; + + case 'x': + if (CONST_INT_P (op)) + fprintf (file, HOST_WIDE_INT_PRINT_HEX, INTVAL (op) & 0xffff); + else + output_operand_lossage ("invalid use of '%%%c'", letter); + break; + + case 'd': + if (CONST_INT_P (op)) + fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op)); + else + output_operand_lossage ("invalid use of '%%%c'", letter); + break; + + case 'm': + if (CONST_INT_P (op)) + fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op) - 1); + else + output_operand_lossage ("invalid use of '%%%c'", letter); + break; + + case 'y': + if (CONST_INT_P (op)) + { + int val = exact_log2 (INTVAL (op)); + if (val != -1) + fprintf (file, "%d", val); + else + output_operand_lossage ("invalid use of '%%%c'", letter); + } + else + output_operand_lossage ("invalid use of '%%%c'", letter); + break; + + case 'V': + if (GET_CODE (op) == CONST_VECTOR) + { + machine_mode mode = GET_MODE_INNER (GET_MODE (op)); + unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (op, 0)); + int vlog2 = exact_log2 (val & GET_MODE_MASK (mode)); + if (vlog2 != -1) + fprintf (file, "%d", vlog2); + else + output_operand_lossage ("invalid use of '%%%c'", letter); + } + else + output_operand_lossage ("invalid use of '%%%c'", letter); + break; + + case 'C': + loongarch_print_int_branch_condition (file, code, letter); + break; + + case 'N': + loongarch_print_int_branch_condition (file, reverse_condition (code), letter); + break; + + case 'F': + loongarch_print_float_branch_condition (file, code, letter); + break; + + case 'W': + loongarch_print_float_branch_condition (file, reverse_condition (code), + letter); + break; + + case 'T': + case 't': + { + int truth = (code == NE) == (letter == 'T'); + fputc ("zfnt"[truth * 2 + ST_REG_P (REGNO (XEXP (op, 0)))], file); + } + break; + + case 'Y': + if (code == CONST_INT && UINTVAL (op) < ARRAY_SIZE (loongarch_fp_conditions)) + fputs (loongarch_fp_conditions[UINTVAL (op)], file); + else + output_operand_lossage ("'%%%c' is not a valid operand prefix", + letter); + break; + + case 'Z': + loongarch_print_operand (file, op, 0); + fputc (',', file); + break; + + case 'A': + if (loongarch_memmodel_needs_release_fence ((enum memmodel) INTVAL (op))) + fputs ("_db", file); + break; + + case 'G': + if (loongarch_memmodel_needs_release_fence ((enum memmodel) INTVAL (op))) + fputs ("dbar\t0", file); + break; + + case 'i': + if (code != REG) + fputs ("i", file); + break; + + default: + switch (code) + { + case REG: + { + unsigned int regno = REGNO (op); + if ((letter == 'M') + || (letter == 'L' ) + || letter == 'D') + regno++; + else if (letter && letter != 'z' && letter != 'M' && letter != 'L') + output_operand_lossage ("invalid use of '%%%c'", letter); + fprintf (file, "%s", reg_names[regno]); + } + break; + + case MEM: + if (letter == 'D') + output_address (GET_MODE (op), plus_constant (Pmode, + XEXP (op, 0), 4)); + else if (letter == 'b') + { + gcc_assert (REG_P (XEXP (op, 0))); + loongarch_print_operand (file, XEXP (op, 0), 0); + } + else if (letter && letter != 'z') + output_operand_lossage ("invalid use of '%%%c'", letter); + else + output_address (GET_MODE (op), XEXP (op, 0)); + break; + + default: + if (letter == 'z' && op == CONST0_RTX (GET_MODE (op))) + fputs (reg_names[GP_REG_FIRST], file); + else if (letter && letter != 'z') + output_operand_lossage ("invalid use of '%%%c'", letter); + else + output_addr_const (file, loongarch_strip_unspec_address (op)); + break; + } + } +} + +/* Implement TARGET_PRINT_OPERAND_ADDRESS. */ + +static void +loongarch_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x) +{ + struct loongarch_address_info addr; + + if (loongarch_classify_address (&addr, x, word_mode, true)) + switch (addr.type) + { + case ADDRESS_REG: + fprintf (file, "%s,", reg_names[REGNO (addr.reg)]); + loongarch_print_operand (file, addr.offset, 0); + return; + + case ADDRESS_CONST_INT: + fprintf (file, "%s,", reg_names[GP_REG_FIRST]); + output_addr_const (file, x); + return; + + case ADDRESS_SYMBOLIC: + output_addr_const (file, loongarch_strip_unspec_address (x)); + return; + } + if (GET_CODE (x) == CONST_INT) + output_addr_const (file, x); + else + gcc_unreachable (); +} + + +/* Implement TARGET_ENCODE_SECTION_INFO. */ + +static void +loongarch_encode_section_info (tree decl, rtx rtl, int first) +{ + default_encode_section_info (decl, rtl, first); + + if (TREE_CODE (decl) == FUNCTION_DECL) + { + rtx symbol = XEXP (rtl, 0); + tree type = TREE_TYPE (decl); + + /* Encode whether the symbol is short or long. */ + if ((TARGET_LONG_CALLS && !loongarch_near_type_p (type)) + || loongarch_far_type_p (type)) + SYMBOL_REF_FLAGS (symbol) |= SYMBOL_FLAG_LONG_CALL; + } +} + +/* Implement TARGET_SELECT_RTX_SECTION. */ + +static section * +loongarch_select_rtx_section (machine_mode mode, rtx x, + unsigned HOST_WIDE_INT align) +{ + /* ??? Consider using mergeable small data sections. */ + if (loongarch_rtx_constant_in_small_data_p (mode)) + return get_named_section (NULL, ".sdata", 0); + + return default_elf_select_rtx_section (mode, x, align); +} + +/* Implement TARGET_ASM_FUNCTION_RODATA_SECTION. + + The complication here is that, with the combination + !TARGET_ABSOLUTE_ABICALLS , jump tables will use + absolute addresses, and should therefore not be included in the + read-only part of a DSO. Handle such cases by selecting a normal + data section instead of a read-only one. The logic apes that in + default_function_rodata_section. */ + +static section * +loongarch_function_rodata_section (tree decl) +{ + return default_function_rodata_section (decl); +} + +/* Implement TARGET_IN_SMALL_DATA_P. */ + +static bool +loongarch_in_small_data_p (const_tree decl) +{ + unsigned HOST_WIDE_INT size; + + if (TREE_CODE (decl) == STRING_CST || TREE_CODE (decl) == FUNCTION_DECL) + return false; + + /* We don't yet generate small-data references for + VxWorks RTP code. See the related -G handling in + loongarch_option_override. */ + if (TARGET_VXWORKS_RTP) + return false; + + if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl) != 0) + { + const char *name; + + /* Reject anything that isn't in a known small-data section. */ + name = DECL_SECTION_NAME (decl); + if (strcmp (name, ".sdata") != 0 && strcmp (name, ".sbss") != 0) + return false; + + /* If a symbol is defined externally, the assembler will use the + usual -G rules when deciding how to implement macros. */ + if (!DECL_EXTERNAL (decl)) + return true; + } + else if (TARGET_EMBEDDED_DATA) + { + /* Don't put constants into the small data section: we want them + to be in ROM rather than RAM. */ + if (TREE_CODE (decl) != VAR_DECL) + return false; + + if (TREE_READONLY (decl) + && !TREE_SIDE_EFFECTS (decl) + && (!DECL_INITIAL (decl) || TREE_CONSTANT (DECL_INITIAL (decl)))) + return false; + } + + /* Enforce -mlocal-sdata. */ + if (!TARGET_LOCAL_SDATA && !TREE_PUBLIC (decl)) + return false; + + /* Enforce -mextern-sdata. */ + if (!TARGET_EXTERN_SDATA && DECL_P (decl)) + { + if (DECL_EXTERNAL (decl)) + return false; + if (DECL_COMMON (decl) && DECL_INITIAL (decl) == NULL) + return false; + } + + /* We have traditionally not treated zero-sized objects as small data, + so this is now effectively part of the ABI. */ + size = int_size_in_bytes (TREE_TYPE (decl)); + return size > 0 && size <= loongarch_small_data_threshold; +} + +/* Implement TARGET_USE_ANCHORS_FOR_SYMBOL_P. We don't want to use + anchors for small data: the GP register acts as an anchor in that + case. We also don't want to use them for PC-relative accesses, + where the PC acts as an anchor. */ + +static bool +loongarch_use_anchors_for_symbol_p (const_rtx symbol) +{ + return default_use_anchors_for_symbol_p (symbol); +} + +/* The LARCH debug format wants all automatic variables and arguments + to be in terms of the virtual frame pointer (stack pointer before + any adjustment in the function), while the LARCH 3.0 linker wants + the frame pointer to be the stack pointer after the initial + adjustment. So, we do the adjustment here. The arg pointer (which + is eliminated) points to the virtual frame pointer, while the frame + pointer (which may be eliminated) points to the stack pointer after + the initial adjustments. */ + +HOST_WIDE_INT +loongarch_debugger_offset (rtx addr, HOST_WIDE_INT offset) +{ + rtx offset2 = const0_rtx; + rtx reg = eliminate_constant_term (addr, &offset2); + + if (offset == 0) + offset = INTVAL (offset2); + + if (reg == stack_pointer_rtx + || reg == frame_pointer_rtx + || reg == hard_frame_pointer_rtx) + { + offset -= cfun->machine->frame.total_size; + if (reg == hard_frame_pointer_rtx) + offset += cfun->machine->frame.hard_frame_pointer_offset; + } + + return offset; +} + +/* Implement ASM_OUTPUT_EXTERNAL. */ + +void +loongarch_output_external (FILE *file, tree decl, const char *name) +{ + default_elf_asm_output_external (file, decl, name); + + /* We output the name if and only if TREE_SYMBOL_REFERENCED is + set in order to avoid putting out names that are never really + used. */ + if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl))) + { + if (loongarch_in_small_data_p (decl)) + { + /* When using assembler macros, emit .extern directives for + all small-data externs so that the assembler knows how + big they are. + + In most cases it would be safe (though pointless) to emit + .externs for other symbols too. One exception is when an + object is within the -G limit but declared by the user to + be in a section other than .sbss or .sdata. */ + fputs ("\t.extern\t", file); + assemble_name (file, name); + fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC "\n", + int_size_in_bytes (TREE_TYPE (decl))); + } + } +} + +/* Implement TARGET_ASM_OUTPUT_SOURCE_FILENAME. */ + +static void +loongarch_output_filename (FILE *stream, const char *name) +{ + /* If we are emitting DWARF-2, let dwarf2out handle the ".file" + directives. */ + if (write_symbols == DWARF2_DEBUG) + return; + else if (loongarch_output_filename_first_time) + { + loongarch_output_filename_first_time = 0; + num_source_filenames += 1; + current_function_file = name; + fprintf (stream, "\t.file\t%d ", num_source_filenames); + output_quoted_string (stream, name); + putc ('\n', stream); + } + /* If we are emitting stabs, let dbxout.c handle this (except for + the loongarch_output_filename_first_time case). */ + else if (write_symbols == DBX_DEBUG) + return; + else if (name != current_function_file + && strcmp (name, current_function_file) != 0) + { + num_source_filenames += 1; + current_function_file = name; + fprintf (stream, "\t.file\t%d ", num_source_filenames); + output_quoted_string (stream, name); + putc ('\n', stream); + } +} + +/* Implement TARGET_ASM_OUTPUT_DWARF_DTPREL. */ + +static void ATTRIBUTE_UNUSED +loongarch_output_dwarf_dtprel (FILE *file, int size, rtx x) +{ + switch (size) + { + case 4: + fputs ("\t.dtprelword\t", file); + break; + + case 8: + fputs ("\t.dtpreldword\t", file); + break; + + default: + gcc_unreachable (); + } + output_addr_const (file, x); + fputs ("+0x8000", file); +} + +/* Implement TARGET_DWARF_REGISTER_SPAN. */ + +static rtx +loongarch_dwarf_register_span (rtx reg) +{ + rtx high, low; + machine_mode mode; + + mode = GET_MODE (reg); + /* By default, GCC maps increasing register numbers to increasing + memory locations, but paired FPRs are always little-endian, + regardless of the prevailing endianness. */ + if (FP_REG_P (REGNO (reg)) + && MAX_FPRS_PER_FMT > 1 + && GET_MODE_SIZE (mode) > UNITS_PER_FPREG) + { + gcc_assert (GET_MODE_SIZE (mode) == UNITS_PER_HWFPVALUE); + high = loongarch_subword (reg, true); + low = loongarch_subword (reg, false); + return gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, high, low)); + } + + return NULL_RTX; +} + +/* Implement TARGET_DWARF_FRAME_REG_MODE. */ + +static machine_mode +loongarch_dwarf_frame_reg_mode (int regno) +{ + machine_mode mode = default_dwarf_frame_reg_mode (regno); + + if (FP_REG_P (regno) && loongarch_abi == ABILP32 && TARGET_FLOAT64) + mode = SImode; + + return mode; +} + + +/* Implement ASM_OUTPUT_ASCII. */ + +void +loongarch_output_ascii (FILE *stream, const char *string, size_t len) +{ + size_t i; + int cur_pos; + + cur_pos = 17; + fprintf (stream, "\t.ascii\t\""); + for (i = 0; i < len; i++) + { + int c; + + c = (unsigned char) string[i]; + if (ISPRINT (c)) + { + if (c == '\\' || c == '\"') + { + putc ('\\', stream); + cur_pos++; + } + putc (c, stream); + cur_pos++; + } + else + { + fprintf (stream, "\\%03o", c); + cur_pos += 4; + } + + if (cur_pos > 72 && i+1 < len) + { + cur_pos = 17; + fprintf (stream, "\"\n\t.ascii\t\""); + } + } + fprintf (stream, "\"\n"); +} + +/* Emit either a label, .comm, or .lcomm directive. When using assembler + macros, mark the symbol as written so that loongarch_asm_output_external + won't emit an .extern for it. STREAM is the output file, NAME is the + name of the symbol, INIT_STRING is the string that should be written + before the symbol and FINAL_STRING is the string that should be + written after it. FINAL_STRING is a printf format that consumes the + remaining arguments. */ + +void +loongarch_declare_object (FILE *stream, const char *name, const char *init_string, + const char *final_string, ...) +{ + va_list ap; + + fputs (init_string, stream); + assemble_name (stream, name); + va_start (ap, final_string); + vfprintf (stream, final_string, ap); + va_end (ap); + + tree name_tree = get_identifier (name); + TREE_ASM_WRITTEN (name_tree) = 1; +} + +/* Declare a common object of SIZE bytes using asm directive INIT_STRING. + NAME is the name of the object and ALIGN is the required alignment + in bytes. TAKES_ALIGNMENT_P is true if the directive takes a third + alignment argument. */ + +void +loongarch_declare_common_object (FILE *stream, const char *name, + const char *init_string, + unsigned HOST_WIDE_INT size, + unsigned int align, bool takes_alignment_p) +{ + if (!takes_alignment_p) + { + size += (align / BITS_PER_UNIT) - 1; + size -= size % (align / BITS_PER_UNIT); + loongarch_declare_object (stream, name, init_string, + "," HOST_WIDE_INT_PRINT_UNSIGNED "\n", size); + } + else + loongarch_declare_object (stream, name, init_string, + "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n", + size, align / BITS_PER_UNIT); +} + +/* Implement ASM_OUTPUT_ALIGNED_DECL_COMMON. This is usually the same as the + elfos.h version, but we also need to handle -muninit-const-in-rodata. */ + +void +loongarch_output_aligned_decl_common (FILE *stream, tree decl, const char *name, + unsigned HOST_WIDE_INT size, + unsigned int align) +{ + /* If the target wants uninitialized const declarations in + .rdata then don't put them in .comm. */ + if (TARGET_EMBEDDED_DATA + && TARGET_UNINIT_CONST_IN_RODATA + && TREE_CODE (decl) == VAR_DECL + && TREE_READONLY (decl) + && (DECL_INITIAL (decl) == 0 || DECL_INITIAL (decl) == error_mark_node)) + { + if (TREE_PUBLIC (decl) && DECL_NAME (decl)) + targetm.asm_out.globalize_label (stream, name); + + switch_to_section (readonly_data_section); + ASM_OUTPUT_ALIGN (stream, floor_log2 (align / BITS_PER_UNIT)); + loongarch_declare_object (stream, name, "", + ":\n\t.space\t" HOST_WIDE_INT_PRINT_UNSIGNED "\n", + size); + } + else + loongarch_declare_common_object (stream, name, "\n\t.comm\t", + size, align, true); +} + +#ifdef ASM_OUTPUT_SIZE_DIRECTIVE +extern int size_directive_output; + +/* Implement ASM_DECLARE_OBJECT_NAME. This is like most of the standard ELF + definitions except that it uses loongarch_declare_object to emit the label. */ + +void +loongarch_declare_object_name (FILE *stream, const char *name, + tree decl ATTRIBUTE_UNUSED) +{ +#ifdef ASM_OUTPUT_TYPE_DIRECTIVE +#ifdef USE_GNU_UNIQUE_OBJECT + /* As in elfos.h. */ + if (USE_GNU_UNIQUE_OBJECT && DECL_ONE_ONLY (decl) + && (!DECL_ARTIFICIAL (decl) || !TREE_READONLY (decl))) + ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "gnu_unique_object"); + else +#endif + ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object"); +#endif + + size_directive_output = 0; + if (!flag_inhibit_size_directive && DECL_SIZE (decl)) + { + HOST_WIDE_INT size; + + size_directive_output = 1; + size = int_size_in_bytes (TREE_TYPE (decl)); + ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size); + } + + loongarch_declare_object (stream, name, "", ":\n"); +} + +/* Implement ASM_FINISH_DECLARE_OBJECT. This is generic ELF stuff. */ + +void +loongarch_finish_declare_object (FILE *stream, tree decl, int top_level, int at_end) +{ + const char *name; + + name = XSTR (XEXP (DECL_RTL (decl), 0), 0); + if (!flag_inhibit_size_directive + && DECL_SIZE (decl) != 0 + && !at_end + && top_level + && DECL_INITIAL (decl) == error_mark_node + && !size_directive_output) + { + HOST_WIDE_INT size; + + size_directive_output = 1; + size = int_size_in_bytes (TREE_TYPE (decl)); + ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size); + } +} +#endif + +/* Mark text contents as code or data, mainly for the purpose of correct + disassembly. Emit a local symbol and set its type appropriately for + that purpose. Also emit `.insn' if marking contents as code so that + the ISA mode is recorded and any padding that follows is disassembled + as correct instructions. */ + +void +loongarch_set_text_contents_type (FILE *file ATTRIBUTE_UNUSED, + const char *prefix ATTRIBUTE_UNUSED, + unsigned long num ATTRIBUTE_UNUSED, + bool function_p ATTRIBUTE_UNUSED) +{ +#ifdef ASM_OUTPUT_TYPE_DIRECTIVE + char buf[(sizeof (num) * 10) / 4 + 2]; + const char *fnname; + char *sname; + rtx symbol; + + sprintf (buf, "%lu", num); + symbol = XEXP (DECL_RTL (current_function_decl), 0); + fnname = targetm.strip_name_encoding (XSTR (symbol, 0)); + sname = ACONCAT ((prefix, fnname, "_", buf, NULL)); + + ASM_OUTPUT_TYPE_DIRECTIVE (file, sname, function_p ? "function" : "object"); + assemble_name (file, sname); + fputs (":\n", file); +// if (function_p) +// fputs ("\t.insn\n", file); +#endif +} + +/* Implement TARGET_ASM_FILE_START. */ + +static void +loongarch_file_start (void) +{ + default_file_start (); +} + +/* Implement TARGET_ASM_CODE_END. */ + +static void +loongarch_code_end (void) +{ + if (NEED_INDICATE_EXEC_STACK) + /* Add .note.GNU-stack. */ + file_end_indicate_exec_stack (); +} + +/* Make the last instruction frame-related and note that it performs + the operation described by FRAME_PATTERN. */ + +static void +loongarch_set_frame_expr (rtx frame_pattern) +{ + rtx_insn *insn; + + insn = get_last_insn (); + RTX_FRAME_RELATED_P (insn) = 1; + REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR, + frame_pattern, + REG_NOTES (insn)); +} + +/* Return a frame-related rtx that stores REG at MEM. + REG must be a single register. */ + +static rtx +loongarch_frame_set (rtx mem, rtx reg) +{ + rtx set; + + set = gen_rtx_SET (mem, reg); + RTX_FRAME_RELATED_P (set) = 1; + + return set; +} + +/* Record that the epilogue has restored call-saved register REG. */ + +static void +loongarch_add_cfa_restore (rtx reg) +{ + loongarch_epilogue.cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, + loongarch_epilogue.cfa_restores); +} + + +/* Return true if REGNO is a register that is ordinarily call-clobbered + but must nevertheless be preserved by an interrupt handler. */ + +static bool +loongarch_interrupt_extra_call_saved_reg_p (unsigned int regno) +{ + if (GP_REG_P (regno) + && cfun->machine->use_shadow_register_set == SHADOW_SET_NO) + { + /* $0 is hard-wired. */ + if (regno == GP_REG_FIRST) + return false; + + /* The function will return the stack pointer to its original value + anyway. */ + if (regno == STACK_POINTER_REGNUM) + return false; + + /* Otherwise, return true for registers that aren't ordinarily + call-clobbered. */ + return call_used_regs[regno]; + } + + return false; +} + +/* Return true if the current function should treat register REGNO + as call-saved. */ + +static bool +loongarch_cfun_call_saved_reg_p (unsigned int regno) +{ + /* If the user makes an ordinarily-call-saved register global, + that register is no longer call-saved. */ + if (global_regs[regno]) + return false; + + /* Interrupt handlers need to save extra registers. */ + if (cfun->machine->interrupt_handler_p + && loongarch_interrupt_extra_call_saved_reg_p (regno)) + return true; + + return !call_used_regs[regno]; +} + +/* Return true if the function body might clobber register REGNO. + We know that REGNO is call-saved. */ + +static bool +loongarch_cfun_might_clobber_call_saved_reg_p (unsigned int regno) +{ + /* Some functions should be treated as clobbering all call-saved + registers. */ + if (crtl->saves_all_registers) + return true; + + /* DF handles cases where a register is explicitly referenced in + the rtl. Incoming values are passed in call-clobbered registers, + so we can assume that any live call-saved register is set within + the function. */ + if (df_regs_ever_live_p (regno)) + return true; + + /* Check for registers that are clobbered by FUNCTION_PROFILER. + These clobbers are not explicit in the rtl. */ + if (crtl->profile && LARCH_SAVE_REG_FOR_PROFILING_P (regno)) + return true; + + /* The function's prologue will need to set the frame pointer if + frame_pointer_needed. */ + if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed) + return true; + + + /* If REGNO is ordinarily call-clobbered, we must assume that any + called function could modify it. */ + if (cfun->machine->interrupt_handler_p + && !crtl->is_leaf + && loongarch_interrupt_extra_call_saved_reg_p (regno)) + return true; + + return false; +} + +/* Return true if the current function must save register REGNO. */ + +static bool +loongarch_save_reg_p (unsigned int regno) +{ + if (loongarch_cfun_call_saved_reg_p (regno)) + { + if (loongarch_cfun_might_clobber_call_saved_reg_p (regno)) + return true; + + /* Save both registers in an FPR pair if either one is used. This is + needed for the case when MIN_FPRS_PER_FMT == 1, which allows the odd + register to be used without the even register. */ + if (FP_REG_P (regno) + && MAX_FPRS_PER_FMT == 2 + && loongarch_cfun_might_clobber_call_saved_reg_p (regno + 1)) + return true; + } + + /* We need to save the incoming return address if __builtin_eh_return + is being used to set a different return address. */ + if (regno == RETURN_ADDR_REGNUM && crtl->calls_eh_return) + return true; + + return false; +} + +/* Populate the current function's loongarch_frame_info structure. + + LARCH stack frames look like: + + +-------------------------------+ + | | + | incoming stack arguments | + | | + +-------------------------------+ + | | + | caller-allocated save area | + A | for register arguments | + | | + +-------------------------------+ <-- incoming stack pointer + | | + | callee-allocated save area | + B | for arguments that are | + | split between registers and | + | the stack | + | | + +-------------------------------+ <-- arg_pointer_rtx + | | + C | callee-allocated save area | + | for register varargs | + | | + +-------------------------------+ <-- frame_pointer_rtx + | | + cop0_sp_offset + | COP0 reg save area | + UNITS_PER_WORD + | | + +-------------------------------+ <-- frame_pointer_rtx + acc_sp_offset + | | + UNITS_PER_WORD + | accumulator save area | + | | + +-------------------------------+ <-- stack_pointer_rtx + fp_sp_offset + | | + UNITS_PER_HWFPVALUE + | FPR save area | + | | + +-------------------------------+ <-- stack_pointer_rtx + gp_sp_offset + | | + UNITS_PER_WORD + | GPR save area | + | | + +-------------------------------+ <-- frame_pointer_rtx with + | | \ -fstack-protector + | local variables | | var_size + | | / + +-------------------------------+ + | | \ + | $gp save area | | cprestore_size + | | / + +-------------------------------+ | args_size + | | | + | caller-allocated save area | | + | for register arguments | | + | | / + +-------------------------------+ <-- stack_pointer_rtx + frame_pointer_rtx without + -fstack-protector + hard_frame_pointer_rtx for + code. + + At least two of A, B and C will be empty. + + Dynamic stack allocations such as alloca insert data at point P. + They decrease stack_pointer_rtx but leave frame_pointer_rtx and + hard_frame_pointer_rtx unchanged. */ + +static void +loongarch_compute_frame_info (void) +{ + struct loongarch_frame_info *frame; + HOST_WIDE_INT offset, size; + unsigned int regno, i; + + /* Skip re-computing the frame info after reload completed. */ + if (reload_completed) + return; + + /* Set this function's interrupt properties. */ + if (loongarch_interrupt_type_p (TREE_TYPE (current_function_decl))) + { + error ("the % attribute is not supported."); + // need to be improved !! + } + + frame = &cfun->machine->frame; + memset (frame, 0, sizeof (*frame)); + size = get_frame_size (); + + /* The first two blocks contain the outgoing argument area and the $gp save + slot. This area isn't needed in leaf functions. We can also skip it + if we know that none of the called functions will use this space. + + But if the target-independent frame size is nonzero, we have already + committed to allocating these in TARGET_STARTING_FRAME_OFFSET for + !FRAME_GROWS_DOWNWARD. */ + + if ((size == 0 || FRAME_GROWS_DOWNWARD) + && (crtl->is_leaf || (cfun->machine->optimize_call_stack && !flag_pic))) + { + /* The LARCH 3.0 linker does not like functions that dynamically + allocate the stack and have 0 for STACK_DYNAMIC_OFFSET, since it + looks like we are trying to create a second frame pointer to the + function, so allocate some stack space to make it happy. */ + if (cfun->calls_alloca) + frame->args_size = REG_PARM_STACK_SPACE (cfun->decl); + else + frame->args_size = 0; + frame->cprestore_size = 0; + } + else + { + frame->args_size = crtl->outgoing_args_size; + frame->cprestore_size = 0; + } + + + offset = frame->args_size + frame->cprestore_size; + + /* Move above the local variables. */ + frame->var_size = LARCH_STACK_ALIGN (size); + offset += frame->var_size; + + /* Find out which GPRs we need to save. */ + for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) + if (loongarch_save_reg_p (regno)) + { + frame->num_gp++; + frame->mask |= 1 << (regno - GP_REG_FIRST); + } + + /* If this function calls eh_return, we must also save and restore the + EH data registers. */ + if (crtl->calls_eh_return) + for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; i++) + { + frame->num_gp++; + frame->mask |= 1 << (EH_RETURN_DATA_REGNO (i) - GP_REG_FIRST); + } + + + /* Move above the GPR save area. */ + if (frame->num_gp > 0) + { + offset += LARCH_STACK_ALIGN (frame->num_gp * UNITS_PER_WORD); + frame->gp_sp_offset = offset - UNITS_PER_WORD; + } + + /* Find out which FPRs we need to save. This loop must iterate over + the same space as its companion in loongarch_for_each_saved_gpr_and_fpr. */ + if (TARGET_HARD_FLOAT) + for (regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno += MAX_FPRS_PER_FMT) + if (loongarch_save_reg_p (regno)) + { + frame->num_fp += MAX_FPRS_PER_FMT; + frame->fmask |= ~(~0U << MAX_FPRS_PER_FMT) << (regno - FP_REG_FIRST); + } + + /* Move above the FPR save area. */ + if (frame->num_fp > 0) + { + offset += LARCH_STACK_ALIGN (frame->num_fp * UNITS_PER_FPREG); + frame->fp_sp_offset = offset - UNITS_PER_HWFPVALUE; + } + + /* Add in space for the interrupt context information. */ + if (cfun->machine->interrupt_handler_p) + { + // need to be improved !! + } + + /* Move above the accumulator save area. */ + if (frame->num_acc > 0) + { + /* Each accumulator needs 2 words. */ + offset += frame->num_acc * 2 * UNITS_PER_WORD; + frame->acc_sp_offset = offset - UNITS_PER_WORD; + } + + /* Move above the COP0 register save area. */ + if (frame->num_cop0_regs > 0) + { + offset += frame->num_cop0_regs * UNITS_PER_WORD; + frame->cop0_sp_offset = offset - UNITS_PER_WORD; + } + + /* Determine if we can save the callee-saved registers in the frame + header. Restrict this to functions where there is no other reason + to allocate stack space so that we can eliminate the instructions + that modify the stack pointer. */ + + if (TARGET_OLDABI + && optimize > 0 + && flag_frame_header_optimization + && !MAIN_NAME_P (DECL_NAME (current_function_decl)) + && cfun->machine->varargs_size == 0 + && crtl->args.pretend_args_size == 0 + && frame->var_size == 0 + && frame->num_acc == 0 + && frame->num_cop0_regs == 0 + && frame->num_fp == 0 + && frame->num_gp > 0 + && frame->num_gp <= MAX_ARGS_IN_REGISTERS + && !cfun->machine->interrupt_handler_p + && cfun->machine->does_not_use_frame_header + && cfun->machine->optimize_call_stack + && !cfun->machine->callers_may_not_allocate_frame) + { + offset = 0; + frame->gp_sp_offset = REG_PARM_STACK_SPACE(cfun) - UNITS_PER_WORD; + cfun->machine->use_frame_header_for_callee_saved_regs = true; + } + + /* Move above the callee-allocated varargs save area. */ + offset += LARCH_STACK_ALIGN (cfun->machine->varargs_size); + frame->arg_pointer_offset = offset; + + /* Move above the callee-allocated area for pretend stack arguments. */ + offset += crtl->args.pretend_args_size; + frame->total_size = offset; + + /* Work out the offsets of the save areas from the top of the frame. */ + if (frame->gp_sp_offset > 0) + frame->gp_save_offset = frame->gp_sp_offset - offset; + if (frame->fp_sp_offset > 0) + frame->fp_save_offset = frame->fp_sp_offset - offset; + if (frame->acc_sp_offset > 0) + frame->acc_save_offset = frame->acc_sp_offset - offset; + if (frame->num_cop0_regs > 0) + frame->cop0_save_offset = frame->cop0_sp_offset - offset; +} + +/* Implement TARGET_FRAME_POINTER_REQUIRED. */ + +static bool +loongarch_frame_pointer_required (void) +{ + /* If the function contains dynamic stack allocations, we need to + use the frame pointer to access the static parts of the frame. */ + if (cfun->calls_alloca) + return true; + + return false; +} + +/* Make sure that we're not trying to eliminate to the wrong hard frame + pointer. */ + +static bool +loongarch_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) +{ + return (to == HARD_FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM); +} + +/* Implement INITIAL_ELIMINATION_OFFSET. FROM is either the frame pointer + or argument pointer. TO is either the stack pointer or hard frame + pointer. */ + +HOST_WIDE_INT +loongarch_initial_elimination_offset (int from, int to) +{ + HOST_WIDE_INT offset; + + loongarch_compute_frame_info (); + + /* Set OFFSET to the offset from the end-of-prologue stack pointer. */ + switch (from) + { + case FRAME_POINTER_REGNUM: + if (FRAME_GROWS_DOWNWARD) + offset = (cfun->machine->frame.args_size + + cfun->machine->frame.cprestore_size + + cfun->machine->frame.var_size); + else + offset = 0; + break; + + case ARG_POINTER_REGNUM: + offset = cfun->machine->frame.arg_pointer_offset; + break; + + default: + gcc_unreachable (); + } + + if (to == HARD_FRAME_POINTER_REGNUM) + offset -= cfun->machine->frame.hard_frame_pointer_offset; + + return offset; +} + + +/* Implement RETURN_ADDR_RTX. We do not support moving back to a + previous frame. */ + +rtx +loongarch_return_addr (int count, rtx frame ATTRIBUTE_UNUSED) +{ + if (count != 0) + return const0_rtx; + + return get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNUM); +} + +/* Emit code to change the current function's return address to + ADDRESS. SCRATCH is available as a scratch register, if needed. + ADDRESS and SCRATCH are both word-mode GPRs. */ + +void +loongarch_set_return_address (rtx address, rtx scratch) +{ + rtx slot_address; + + gcc_assert (BITSET_P (cfun->machine->frame.mask, RETURN_ADDR_REGNUM)); + slot_address = loongarch_add_offset (scratch, stack_pointer_rtx, + cfun->machine->frame.gp_sp_offset); + loongarch_emit_move (gen_frame_mem (GET_MODE (address), slot_address), address); +} + + +/* Fill *BASE and *OFFSET such that *BASE + *OFFSET refers to the + cprestore slot. LOAD_P is true if the caller wants to load from + the cprestore slot; it is false if the caller wants to store to + the slot. */ + +static void +loongarch_get_cprestore_base_and_offset (rtx *base, HOST_WIDE_INT *offset, + bool load_p) +{ + const struct loongarch_frame_info *frame; + + frame = &cfun->machine->frame; + /* .cprestore always uses the stack pointer instead of the frame pointer. + We have a free choice for direct stores, + Using the stack pointer would sometimes give more + (early) scheduling freedom, but using the frame pointer would + sometimes give more (late) scheduling freedom. It's hard to + predict which applies to a given function, so let's keep things + simple. + + Loads must always use the frame pointer in functions that call + alloca, and there's little benefit to using the stack pointer + otherwise. */ + if (frame_pointer_needed) + { + *base = hard_frame_pointer_rtx; + *offset = frame->args_size - frame->hard_frame_pointer_offset; + } + else + { + *base = stack_pointer_rtx; + *offset = frame->args_size; + } +} + +/* Return true if X is the load or store address of the cprestore slot; + LOAD_P says which. */ + +bool +loongarch_cprestore_address_p (rtx x, bool load_p) +{ + rtx given_base, required_base; + HOST_WIDE_INT given_offset, required_offset; + + loongarch_split_plus (x, &given_base, &given_offset); + loongarch_get_cprestore_base_and_offset (&required_base, &required_offset, load_p); + return given_base == required_base && given_offset == required_offset; +} + + +/* A function to save or store a register. The first argument is the + register and the second is the stack slot. */ +typedef void (*loongarch_save_restore_fn) (rtx, rtx); + +/* Use FN to save or restore register REGNO. MODE is the register's + mode and OFFSET is the offset of its save slot from the current + stack pointer. */ + +static void +loongarch_save_restore_reg (machine_mode mode, int regno, + HOST_WIDE_INT offset, loongarch_save_restore_fn fn) +{ + rtx mem; + + mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx, + offset)); + fn (gen_rtx_REG (mode, regno), mem); +} + +/* Save register REG to MEM. Make the instruction frame-related. */ + +static void +loongarch_save_reg (rtx reg, rtx mem) +{ + if (GET_MODE (reg) == DFmode + && (!TARGET_FLOAT64 + || loongarch_abi == ABILP32)) + { + rtx x1, x2; + + loongarch_emit_move_or_split (mem, reg, SPLIT_IF_NECESSARY); + + x1 = loongarch_frame_set (loongarch_subword (mem, false), + loongarch_subword (reg, false)); + x2 = loongarch_frame_set (loongarch_subword (mem, true), + loongarch_subword (reg, true)); + loongarch_set_frame_expr (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x1, x2))); + } + else + loongarch_emit_save_slot_move (mem, reg, LARCH_PROLOGUE_TEMP (GET_MODE (reg))); +} + +/* Call FN for each register that is saved by the current function. + SP_OFFSET is the offset of the current stack pointer from the start + of the frame. */ + +static void +loongarch_for_each_saved_gpr_and_fpr (HOST_WIDE_INT sp_offset, + loongarch_save_restore_fn fn) +{ + machine_mode fpr_mode; + int regno; + const struct loongarch_frame_info *frame = &cfun->machine->frame; + HOST_WIDE_INT offset; + unsigned int mask; + + /* Save registers starting from high to low. The debuggers prefer at least + the return register be stored at func+4, and also it allows us not to + need a nop in the epilogue if at least one register is reloaded in + addition to return address. */ + offset = frame->gp_sp_offset - sp_offset; + mask = frame->mask; + + for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) + if (BITSET_P (mask, regno - GP_REG_FIRST)) + { + /* Record the ra offset for use by loongarch_function_profiler. */ + if (regno == RETURN_ADDR_REGNUM) + cfun->machine->frame.ra_fp_offset = offset + sp_offset; + loongarch_save_restore_reg (word_mode, regno, offset, fn); + offset -= UNITS_PER_WORD; + } + + /* This loop must iterate over the same space as its companion in + loongarch_compute_frame_info. */ + offset = cfun->machine->frame.fp_sp_offset - sp_offset; + fpr_mode = (TARGET_SINGLE_FLOAT ? SFmode : DFmode); + for (regno = FP_REG_LAST - MAX_FPRS_PER_FMT + 1; + regno >= FP_REG_FIRST; + regno -= MAX_FPRS_PER_FMT) + if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST)) + { + if (!TARGET_FLOAT64 && TARGET_DOUBLE_FLOAT + && (fixed_regs[regno] || fixed_regs[regno + 1])) + { + if (fixed_regs[regno]) + loongarch_save_restore_reg (SFmode, regno + 1, offset, fn); + else + loongarch_save_restore_reg (SFmode, regno, offset, fn); + } + else + loongarch_save_restore_reg (fpr_mode, regno, offset, fn); + offset -= GET_MODE_SIZE (fpr_mode); + } +} + + +/* Return true if a move between register REGNO and its save slot (MEM) + can be done in a single move. LOAD_P is true if we are loading + from the slot, false if we are storing to it. */ + +static bool +loongarch_direct_save_slot_move_p (unsigned int regno, rtx mem, bool load_p) +{ + + return loongarch_secondary_reload_class (REGNO_REG_CLASS (regno), + GET_MODE (mem), mem, load_p) == NO_REGS; +} + +/* Emit a move from SRC to DEST, given that one of them is a register + save slot and that the other is a register. TEMP is a temporary + GPR of the same mode that is available if need be. */ + +void +loongarch_emit_save_slot_move (rtx dest, rtx src, rtx temp) +{ + unsigned int regno; + rtx mem; + + if (REG_P (src)) + { + regno = REGNO (src); + mem = dest; + } + else + { + regno = REGNO (dest); + mem = src; + } + + if (loongarch_direct_save_slot_move_p (regno, mem, mem == src)) + loongarch_emit_move (dest, src); + else + { + gcc_assert (!reg_overlap_mentioned_p (dest, temp)); + loongarch_emit_move (temp, src); + loongarch_emit_move (dest, temp); + } + if (MEM_P (dest)) + loongarch_set_frame_expr (loongarch_frame_set (dest, src)); +} + + +/* Implement ASM_DECLARE_FUNCTION_NAME. */ + +void loongarch_declare_function_name(FILE *stream ATTRIBUTE_UNUSED, + const char *name, tree fndecl ATTRIBUTE_UNUSED) +{ + loongarch_start_function_definition (name); +} + +/* Implement TARGET_OUTPUT_FUNCTION_PROLOGUE. */ + +static void +loongarch_output_function_prologue (FILE *file) +{ +// const char *fnname; + + + /* Get the function name the same way that toplev.c does before calling + assemble_start_function. This is needed so that the name used here + exactly matches the name used in ASM_DECLARE_FUNCTION_NAME. */ +// fnname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0); +// loongarch_start_function_definition (fnname); +} + +/* Implement TARGET_OUTPUT_FUNCTION_EPILOGUE. */ + +static void +loongarch_output_function_epilogue (FILE *) +{ + const char *fnname; + + /* Get the function name the same way that toplev.c does before calling + assemble_start_function. This is needed so that the name used here + exactly matches the name used in ASM_DECLARE_FUNCTION_NAME. */ + fnname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0); +} + +/* Emit an optimisation barrier for accesses to the current frame. */ + +static void +loongarch_frame_barrier (void) +{ + emit_clobber (gen_frame_mem (BLKmode, stack_pointer_rtx)); +} + + +#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP) + +#if PROBE_INTERVAL > 16384 +#error Cannot use indexed addressing mode for stack probing +#endif + +/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE, + inclusive. These are offsets from the current stack pointer. */ + +static void +loongarch_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size) +{ + + /* See if we have a constant small number of probes to generate. If so, + that's the easy case. */ + if (first + size <= 2048) + { + HOST_WIDE_INT i; + + /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until + it exceeds SIZE. If only one probe is needed, this will not + generate any code. Then probe at FIRST + SIZE. */ + for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL) + emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, + -(first + i))); + + emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, + -(first + size))); + } + + /* Otherwise, do the same as above, but in a loop. Note that we must be + extra careful with variables wrapping around because we might be at + the very top (or the very bottom) of the address space and we have + to be able to handle this case properly; in particular, we use an + equality test for the loop condition. */ + else + { + HOST_WIDE_INT rounded_size; + rtx r13 = LARCH_PROLOGUE_TEMP (Pmode); + rtx r12 = LARCH_PROLOGUE_TEMP2 (Pmode); + + /* Sanity check for the addressing mode we're going to use. */ + gcc_assert (first <= 16384); + + + /* Step 1: round SIZE to the previous multiple of the interval. */ + + rounded_size = ROUND_DOWN (size, PROBE_INTERVAL); + + + /* Step 2: compute initial and final value of the loop counter. */ + + /* TEST_ADDR = SP + FIRST. */ + if (first > 2048) + { + emit_move_insn (r12, GEN_INT (first)); + emit_insn (gen_rtx_SET (r13, gen_rtx_MINUS (Pmode, stack_pointer_rtx, + r12))); + } + else + emit_insn (gen_rtx_SET (r13, plus_constant (Pmode, stack_pointer_rtx, + -first))); + + /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */ + if (rounded_size > 2048) + { + emit_move_insn (r12, GEN_INT (rounded_size)); + emit_insn (gen_rtx_SET (r12, gen_rtx_MINUS (Pmode, r13, r12))); + } + else + emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, r13, + -rounded_size))); + + + /* Step 3: the loop + + do + { + TEST_ADDR = TEST_ADDR + PROBE_INTERVAL + probe at TEST_ADDR + } + while (TEST_ADDR != LAST_ADDR) + + probes at FIRST + N * PROBE_INTERVAL for values of N from 1 + until it is equal to ROUNDED_SIZE. */ + + emit_insn (PMODE_INSN (gen_probe_stack_range, (r13, r13, r12))); + + + /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time + that SIZE is equal to ROUNDED_SIZE. */ + + if (size != rounded_size) + emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size)); + } + + /* Make sure nothing is scheduled before we are done. */ + emit_insn (gen_blockage ()); +} + +/* Probe a range of stack addresses from REG1 to REG2 inclusive. These are + absolute addresses. */ + +const char * +loongarch_output_probe_stack_range (rtx reg1, rtx reg2) +{ + static int labelno = 0; + char loop_lab[32], tmp[64]; + rtx xops[2]; + + ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++); + + /* Loop. */ + ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); + + /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */ + xops[0] = reg1; + xops[1] = GEN_INT (-PROBE_INTERVAL/(PROBE_INTERVAL/2048)); + for (int i = 0; i < PROBE_INTERVAL/2048; i++) + if (TARGET_64BIT && TARGET_LONG64) + output_asm_insn ("addi.d\t%0,%0,%1", xops); + else + output_asm_insn ("addi.w\t%0,%0,%1", xops); + + /* Probe at TEST_ADDR, test if TEST_ADDR == LAST_ADDR and branch. */ + xops[1] = reg2; + strcpy (tmp, "bne\t%0,%1,"); + if (TARGET_64BIT) + output_asm_insn ("st.d\t$zero,%0,0", xops); + else + output_asm_insn ("st.w\t$zero,%0,0", xops); + output_asm_insn (strcat (tmp, &loop_lab[1]), xops); + + return ""; +} + +/* Expand the "prologue" pattern. */ + +void +loongarch_expand_prologue (void) +{ + const struct loongarch_frame_info *frame; + HOST_WIDE_INT size; + unsigned int nargs; + + frame = &cfun->machine->frame; + size = frame->total_size; + + if (flag_stack_usage_info) + current_function_static_stack_size = size; + + if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK + || flag_stack_clash_protection) + { + if (crtl->is_leaf && !cfun->calls_alloca) + { + if (size > PROBE_INTERVAL && size > get_stack_check_protect ()) + loongarch_emit_probe_stack_range (get_stack_check_protect (), + size - get_stack_check_protect ()); + } + else if (size > 0) + loongarch_emit_probe_stack_range (get_stack_check_protect (), size); + } + + /* Save the registers. Allocate up to LARCH_MAX_FIRST_STACK_STEP + bytes beforehand; this is enough to cover the register save area + without going out of range. */ + if (((frame->mask | frame->fmask | frame->acc_mask) != 0) + || frame->num_cop0_regs > 0) + { + HOST_WIDE_INT step1; + + step1 = MIN (size, LARCH_MAX_FIRST_STACK_STEP); + { + if (cfun->machine->interrupt_handler_p) + { +// need to be improved + } + else + { + if (step1 != 0) + { + rtx insn = gen_add3_insn (stack_pointer_rtx, + stack_pointer_rtx, + GEN_INT (-step1)); + RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; + loongarch_frame_barrier (); + size -= step1; + } + } + loongarch_for_each_saved_gpr_and_fpr (size, loongarch_save_reg); + } + } + + /* Allocate the rest of the frame. */ + if (size > 0) + { + if (SMALL_OPERAND (-size)) + RTX_FRAME_RELATED_P (emit_insn (gen_add3_insn (stack_pointer_rtx, + stack_pointer_rtx, + GEN_INT (-size)))) = 1; + else + { + loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode), GEN_INT (size)); + emit_insn (gen_sub3_insn (stack_pointer_rtx, + stack_pointer_rtx, + LARCH_PROLOGUE_TEMP (Pmode))); + + /* Describe the combined effect of the previous instructions. */ + loongarch_set_frame_expr + (gen_rtx_SET (stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, -size))); + } + loongarch_frame_barrier (); + } + + /* Set up the frame pointer, if we're using one. */ + if (frame_pointer_needed) + { + HOST_WIDE_INT offset; + + offset = frame->hard_frame_pointer_offset; + if (offset == 0) + { + rtx insn = loongarch_emit_move (hard_frame_pointer_rtx, stack_pointer_rtx); + RTX_FRAME_RELATED_P (insn) = 1; + } + else if (SMALL_OPERAND (offset)) + { + rtx insn = gen_add3_insn (hard_frame_pointer_rtx, + stack_pointer_rtx, GEN_INT (offset)); + RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; + } + else + { + loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode), GEN_INT (offset)); + loongarch_emit_move (hard_frame_pointer_rtx, stack_pointer_rtx); + emit_insn (gen_add3_insn (hard_frame_pointer_rtx, + hard_frame_pointer_rtx, + LARCH_PROLOGUE_TEMP (Pmode))); + loongarch_set_frame_expr + (gen_rtx_SET (hard_frame_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, offset))); + } + } + + /* If we are profiling, make sure no instructions are scheduled before + the call to mcount. */ + if (crtl->profile) + emit_insn (gen_blockage ()); +} + +/* Attach all pending register saves to the previous instruction. + Return that instruction. */ + +static rtx_insn * +loongarch_epilogue_emit_cfa_restores (void) +{ + rtx_insn *insn; + + insn = get_last_insn (); + if (loongarch_epilogue.cfa_restores) + { + gcc_assert (insn && !REG_NOTES (insn)); + RTX_FRAME_RELATED_P (insn) = 1; + REG_NOTES (insn) = loongarch_epilogue.cfa_restores; + loongarch_epilogue.cfa_restores = 0; + } + return insn; +} + +/* Like loongarch_epilogue_emit_cfa_restores, but also record that the CFA is + now at REG + OFFSET. */ + +static void +loongarch_epilogue_set_cfa (rtx reg, HOST_WIDE_INT offset) +{ + rtx_insn *insn; + + insn = loongarch_epilogue_emit_cfa_restores (); + if (reg != loongarch_epilogue.cfa_reg || offset != loongarch_epilogue.cfa_offset) + { + RTX_FRAME_RELATED_P (insn) = 1; + REG_NOTES (insn) = alloc_reg_note (REG_CFA_DEF_CFA, + plus_constant (Pmode, reg, offset), + REG_NOTES (insn)); + loongarch_epilogue.cfa_reg = reg; + loongarch_epilogue.cfa_offset = offset; + } +} + +/* Emit instructions to restore register REG from slot MEM. Also update + the cfa_restores list. */ + +static void +loongarch_restore_reg (rtx reg, rtx mem) +{ + /* There's instruction to load $31 directly. Load into + $7 instead and adjust the return insn appropriately. */ + if (GET_MODE (reg) == DFmode + && (!TARGET_FLOAT64 + || loongarch_abi == ABILP32)) + { + loongarch_add_cfa_restore (loongarch_subword (reg, true)); + loongarch_add_cfa_restore (loongarch_subword (reg, false)); + } + else + loongarch_add_cfa_restore (reg); + + loongarch_emit_save_slot_move (reg, mem, LARCH_EPILOGUE_TEMP (GET_MODE (reg))); + if (REGNO (reg) == REGNO (loongarch_epilogue.cfa_reg)) + /* The CFA is currently defined in terms of the register whose + value we have just restored. Redefine the CFA in terms of + the stack pointer. */ + loongarch_epilogue_set_cfa (stack_pointer_rtx, + loongarch_epilogue.cfa_restore_sp_offset); +} + +/* Emit code to set the stack pointer to BASE + OFFSET, given that + BASE + OFFSET is NEW_FRAME_SIZE bytes below the top of the frame. + BASE, if not the stack pointer, is available as a temporary. */ + +static void +loongarch_deallocate_stack (rtx base, rtx offset, HOST_WIDE_INT new_frame_size) +{ + if (base == stack_pointer_rtx && offset == const0_rtx) + return; + + loongarch_frame_barrier (); + if (offset == const0_rtx) + { + emit_move_insn (stack_pointer_rtx, base); + loongarch_epilogue_set_cfa (stack_pointer_rtx, new_frame_size); + } + else + { + emit_insn (gen_add3_insn (stack_pointer_rtx, base, offset)); + loongarch_epilogue_set_cfa (stack_pointer_rtx, new_frame_size); + } +} + +/* Expand an "epilogue" or "sibcall_epilogue" pattern; SIBCALL_P + says which. */ + +void +loongarch_expand_epilogue (bool sibcall_p) +{ + const struct loongarch_frame_info *frame; + HOST_WIDE_INT step1, step2; + rtx base, adjust; + rtx_insn *insn; + + if (!sibcall_p && loongarch_can_use_return_insn ()) + { + emit_jump_insn (gen_return ()); + return; + } + + + /* Split the frame into two. STEP1 is the amount of stack we should + deallocate before restoring the registers. STEP2 is the amount we + should deallocate afterwards. + + Start off by assuming that no registers need to be restored. */ + frame = &cfun->machine->frame; + step1 = frame->total_size; + step2 = 0; + + /* Work out which register holds the frame address. */ + if (!frame_pointer_needed) + base = stack_pointer_rtx; + else + { + base = hard_frame_pointer_rtx; + step1 -= frame->hard_frame_pointer_offset; + } + loongarch_epilogue.cfa_reg = base; + loongarch_epilogue.cfa_offset = step1; + loongarch_epilogue.cfa_restores = NULL_RTX; + + /* If we need to restore registers, deallocate as much stack as + possible in the second step without going out of range. */ + if ((frame->mask | frame->fmask | frame->acc_mask) != 0 + || frame->num_cop0_regs > 0) + { + step2 = MIN (step1, LARCH_MAX_FIRST_STACK_STEP); + step1 -= step2; + } + + /* Get an rtx for STEP1 that we can add to BASE. */ + adjust = GEN_INT (step1); + if (!SMALL_OPERAND (step1)) + { + loongarch_emit_move (LARCH_EPILOGUE_TEMP (Pmode), adjust); + adjust = LARCH_EPILOGUE_TEMP (Pmode); + } + loongarch_deallocate_stack (base, adjust, step2); + + loongarch_epilogue.cfa_restore_sp_offset = step2; + { + /* Restore the registers. */ + loongarch_for_each_saved_gpr_and_fpr (frame->total_size - step2, + loongarch_restore_reg); + + if (cfun->machine->interrupt_handler_p) + { +// need to be improved !! + } + else + /* Deallocate the final bit of the frame. */ + loongarch_deallocate_stack (stack_pointer_rtx, GEN_INT (step2), 0); + } + + if (cfun->machine->use_frame_header_for_callee_saved_regs) + loongarch_epilogue_emit_cfa_restores (); + else + gcc_assert (!loongarch_epilogue.cfa_restores); + + /* Add in the __builtin_eh_return stack adjustment. */ + if (crtl->calls_eh_return) + { + emit_insn (gen_add3_insn (stack_pointer_rtx, + stack_pointer_rtx, + EH_RETURN_STACKADJ_RTX)); + } + + if (!sibcall_p) + { + if (cfun->machine->interrupt_handler_p) + { + // need to be improved !! + } + else + { + rtx pat; + + rtx reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); + pat = gen_simple_return_internal (reg); + emit_jump_insn (pat); + } + } + +} + +/* Return nonzero if this function is known to have a null epilogue. + This allows the optimizer to omit jumps to jumps if no stack + was created. */ + +bool +loongarch_can_use_return_insn (void) +{ + /* Interrupt handlers need to go through the epilogue. */ + if (cfun->machine->interrupt_handler_p) + return false; + + if (!reload_completed) + return false; + + if (crtl->profile) + return false; + + + return (cfun->machine->frame.total_size == 0 + && !cfun->machine->use_frame_header_for_callee_saved_regs); +} + +/* Return true if register REGNO can store a value of mode MODE. + The result of this function is cached in loongarch_hard_regno_mode_ok. */ + +static bool +loongarch_hard_regno_mode_ok_uncached (unsigned int regno, machine_mode mode) +{ + unsigned int size; + enum mode_class mclass; + + if (mode == CCmode) + return ST_REG_P (regno); + + size = GET_MODE_SIZE (mode); + mclass = GET_MODE_CLASS (mode); + + if (GP_REG_P (regno)) + return ((regno - GP_REG_FIRST) & 1) == 0 || size <= UNITS_PER_WORD; + + if (FP_REG_P (regno) + && (((regno - FP_REG_FIRST) % MAX_FPRS_PER_FMT) == 0 + || (MIN_FPRS_PER_FMT == 1 && size <= UNITS_PER_FPREG))) + { + if (mclass == MODE_FLOAT + || mclass == MODE_COMPLEX_FLOAT + || mclass == MODE_VECTOR_FLOAT) + return size <= UNITS_PER_FPVALUE; + + /* Allow integer modes that fit into a single register. We need + to put integers into FPRs when using instructions like CVT + and TRUNC. There's no point allowing sizes smaller than a word, + because the FPU has no appropriate load/store instructions. */ + if (mclass == MODE_INT) + return size >= MIN_UNITS_PER_WORD && size <= UNITS_PER_FPREG; + } + + return false; +} + +/* Implement TARGET_HARD_REGNO_MODE_OK. */ + +static bool +loongarch_hard_regno_mode_ok (unsigned int regno, machine_mode mode) +{ + return loongarch_hard_regno_mode_ok_p[mode][regno]; +} + +/* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */ + +bool +loongarch_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED, + unsigned int new_reg) +{ + /* Interrupt functions can only use registers that have already been + saved by the prologue, even if they would normally be call-clobbered. */ + if (cfun->machine->interrupt_handler_p && !df_regs_ever_live_p (new_reg)) + return false; + + return true; +} + +/* Return nonzero if register REGNO can be used as a scratch register + in peephole2. */ + +bool +loongarch_hard_regno_scratch_ok (unsigned int regno) +{ + /* See loongarch_hard_regno_rename_ok. */ + if (cfun->machine->interrupt_handler_p && !df_regs_ever_live_p (regno)) + return false; + + return true; +} + +/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. Odd-numbered + single-precision registers are not considered callee-saved for o32 + FPXX as they will be clobbered when run on an FR=1 FPU.*/ + +static bool +loongarch_hard_regno_call_part_clobbered (unsigned int abi_id, + unsigned int regno, + machine_mode mode) +{ + return false; +} + +/* Implement TARGET_HARD_REGNO_NREGS. */ + +static unsigned int +loongarch_hard_regno_nregs (unsigned int regno, machine_mode mode) +{ + if (ST_REG_P (regno)) + /* The size of FP status registers is always 4, because they only hold + CCmode values, and CCmode is always considered to be 4 bytes wide. */ + return (GET_MODE_SIZE (mode) + 3) / 4; + + if (FP_REG_P (regno)) + return (GET_MODE_SIZE (mode) + UNITS_PER_FPREG - 1) / UNITS_PER_FPREG; + + /* All other registers are word-sized. */ + return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; +} + +/* Implement CLASS_MAX_NREGS, taking the maximum of the cases + in loongarch_hard_regno_nregs. */ + +int +loongarch_class_max_nregs (enum reg_class rclass, machine_mode mode) +{ + int size; + HARD_REG_SET left; + + size = 0x8000; + left = reg_class_contents[(int) rclass]; + if (hard_reg_set_intersect_p (left, reg_class_contents[(int) ST_REGS])) + { + if (loongarch_hard_regno_mode_ok (ST_REG_FIRST, mode)) + size = MIN (size, 4); + + left &= ~reg_class_contents[(int) ST_REGS]; + } + if (hard_reg_set_intersect_p (left, reg_class_contents[(int) FP_REGS])) + { + if (loongarch_hard_regno_mode_ok (FP_REG_FIRST, mode)) + size = MIN (size, UNITS_PER_FPREG); + + left &= ~reg_class_contents[(int) FP_REGS]; + } + if (!hard_reg_set_empty_p (left)) + size = MIN (size, UNITS_PER_WORD); + return (GET_MODE_SIZE (mode) + size - 1) / size; +} + +/* Implement TARGET_CAN_CHANGE_MODE_CLASS. */ + +static bool +loongarch_can_change_mode_class (machine_mode from, + machine_mode to, reg_class_t rclass) +{ + /* Allow conversions between different Loongson integer vectors, + and between those vectors and DImode. */ + if (GET_MODE_SIZE (from) == 8 && GET_MODE_SIZE (to) == 8 + && INTEGRAL_MODE_P (from) && INTEGRAL_MODE_P (to)) + return true; + + /* Otherwise, there are several problems with changing the modes of + values in floating-point registers: + + - When a multi-word value is stored in paired floating-point + registers, the first register always holds the low word. We + therefore can't allow FPRs to change between single-word and + multi-word modes on big-endian targets. + + - GCC assumes that each word of a multiword register can be + accessed individually using SUBREGs. This is not true for + floating-point registers if they are bigger than a word. + + - Loading a 32-bit value into a 64-bit floating-point register + will not sign-extend the value, despite what LOAD_EXTEND_OP + says. We can't allow FPRs to change from SImode to a wider + mode on 64-bit targets. + + - If the FPU has already interpreted a value in one format, we + must not ask it to treat the value as having a different + format. + + We therefore disallow all mode changes involving FPRs. */ + + return !reg_classes_intersect_p (FP_REGS, rclass); +} + +/* Implement target hook small_register_classes_for_mode_p. */ + +static bool +loongarch_small_register_classes_for_mode_p (machine_mode mode + ATTRIBUTE_UNUSED) +{ + return 0; +} + +/* Return true if moves in mode MODE can use the FPU's mov.fmt instruction, + */ + +static bool +loongarch_mode_ok_for_mov_fmt_p (machine_mode mode) +{ + switch (mode) + { + case E_SFmode: + return TARGET_HARD_FLOAT; + + case E_DFmode: + return TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT; + + case E_V2SFmode: + return 0; + + default: + return 0; + } +} + +/* Implement TARGET_MODES_TIEABLE_P. */ + +static bool +loongarch_modes_tieable_p (machine_mode mode1, machine_mode mode2) +{ + /* FPRs allow no mode punning, so it's not worth tying modes if we'd + prefer to put one of them in FPRs. */ + return (mode1 == mode2 + || (!loongarch_mode_ok_for_mov_fmt_p (mode1) + && !loongarch_mode_ok_for_mov_fmt_p (mode2))); +} + +/* Implement TARGET_PREFERRED_RELOAD_CLASS. */ + +static reg_class_t +loongarch_preferred_reload_class (rtx x, reg_class_t rclass) +{ + if (reg_class_subset_p (FP_REGS, rclass) + && loongarch_mode_ok_for_mov_fmt_p (GET_MODE (x))) + return FP_REGS; + + if (reg_class_subset_p (GR_REGS, rclass)) + rclass = GR_REGS; + + return rclass; +} + +/* RCLASS is a class involved in a REGISTER_MOVE_COST calculation. + Return a "canonical" class to represent it in later calculations. */ + +static reg_class_t +loongarch_canonicalize_move_class (reg_class_t rclass) +{ + if (reg_class_subset_p (rclass, GENERAL_REGS)) + rclass = GENERAL_REGS; + + return rclass; +} + +/* Return the cost of moving a value from a register of class FROM to a GPR. + Return 0 for classes that are unions of other classes handled by this + function. */ + +static int +loongarch_move_to_gpr_cost (reg_class_t from) +{ + switch (from) + { + case GENERAL_REGS: + /* MOVE macro. */ + return 2; + + case FP_REGS: + /* MFC1, etc. */ + return 4; + + default: + return 0; + } +} + +/* Return the cost of moving a value from a GPR to a register of class TO. + Return 0 for classes that are unions of other classes handled by this + function. */ + +static int +loongarch_move_from_gpr_cost (reg_class_t to) +{ + switch (to) + { + case GENERAL_REGS: + /*MOVE macro. */ + return 2; + + case FP_REGS: + /* MTC1, etc. */ + return 4; + + default: + return 0; + } +} + +/* Implement TARGET_REGISTER_MOVE_COST. Return 0 for classes that are the + maximum of the move costs for subclasses; regclass will work out + the maximum for us. */ + +static int +loongarch_register_move_cost (machine_mode mode, + reg_class_t from, reg_class_t to) +{ + reg_class_t dregs; + int cost1, cost2; + + from = loongarch_canonicalize_move_class (from); + to = loongarch_canonicalize_move_class (to); + + /* Handle moves that can be done without using general-purpose registers. */ + if (from == FP_REGS) + { + if (to == FP_REGS && loongarch_mode_ok_for_mov_fmt_p (mode)) + /* MOV.FMT. */ + return 4; + } + + /* Handle cases in which only one class deviates from the ideal. */ + dregs = GENERAL_REGS; + if (from == dregs) + return loongarch_move_from_gpr_cost (to); + if (to == dregs) + return loongarch_move_to_gpr_cost (from); + + /* Handles cases that require a GPR temporary. */ + cost1 = loongarch_move_to_gpr_cost (from); + if (cost1 != 0) + { + cost2 = loongarch_move_from_gpr_cost (to); + if (cost2 != 0) + return cost1 + cost2; + } + + return 0; +} + +/* Implement TARGET_MEMORY_MOVE_COST. */ + +static int +loongarch_memory_move_cost (machine_mode mode, reg_class_t rclass, bool in) +{ + return (loongarch_cost->memory_latency + + memory_move_secondary_cost (mode, rclass, in)); +} + +/* Implement TARGET_SECONDARY_MEMORY_NEEDED. + + When targeting the o32 FPXX ABI, all moves with a length of doubleword + or greater must be performed by FR-mode-aware instructions. + This can be achieved using MOVFRH2GR.S/MOVGR2FRH.W when these instructions are + available but otherwise moves must go via memory. + Using MOVGR2FR/MOVFR2GR to access the lower-half of these registers would require + a forbidden single-precision access. We require all double-word moves to use + memory because adding even and odd floating-point registers classes + would have a significant impact on the backend. */ + +static bool +loongarch_secondary_memory_needed (machine_mode mode, reg_class_t class1, + reg_class_t class2) +{ + /* Ignore spilled pseudos. */ + if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS)) + return false; + + return false; +} + +/* Return the register class required for a secondary register when + copying between one of the registers in RCLASS and value X, which + has mode MODE. X is the source of the move if IN_P, otherwise it + is the destination. Return NO_REGS if no secondary register is + needed. */ + +enum reg_class +loongarch_secondary_reload_class (enum reg_class rclass, + machine_mode mode, rtx x, bool) +{ + int regno; + + regno = true_regnum (x); + + /* Copying from accumulator registers to anywhere other than a general + register requires a temporary general register. */ +// if (reg_class_subset_p (rclass, ACC_REGS)) ?????? +// return GP_REG_P (regno) ? NO_REGS : GR_REGS; + if (reg_class_subset_p (rclass, FP_REGS)) + { + if (regno < 0 + || (MEM_P (x) + && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8))) + /* In this case we can use lwc1, swc1, ldc1 or sdc1. We'll use + pairs of lwc1s and swc1s if ldc1 and sdc1 are not supported. */ + return NO_REGS; + + if (GP_REG_P (regno) || x == CONST0_RTX (mode)) + /* In this case we can use movgr2fr.s, movfr2gr.s, movgr2fr.d or movfr2gr.d. */ + return NO_REGS; + + if (CONSTANT_P (x) && !targetm.cannot_force_const_mem (mode, x)) + /* We can force the constant to memory and use lwc1 + and ldc1. As above, we will use pairs of lwc1s if + ldc1 is not supported. */ + return NO_REGS; + + if (FP_REG_P (regno) && loongarch_mode_ok_for_mov_fmt_p (mode)) + /* In this case we can use mov.fmt. */ + return NO_REGS; + + /* Otherwise, we need to reload through an integer register. */ + return GR_REGS; + } + if (FP_REG_P (regno)) + return reg_class_subset_p (rclass, GR_REGS) ? NO_REGS : GR_REGS; + + return NO_REGS; +} + + +/* Implement TARGET_VALID_POINTER_MODE. */ + +static bool +loongarch_valid_pointer_mode (scalar_int_mode mode) +{ + return mode == SImode || (TARGET_64BIT && mode == DImode); +} + +/* Implement TARGET_SCALAR_MODE_SUPPORTED_P. */ + +static bool +loongarch_scalar_mode_supported_p (scalar_mode mode) +{ + if (ALL_FIXED_POINT_MODE_P (mode) + && GET_MODE_PRECISION (mode) <= 2 * BITS_PER_WORD) + return true; + + return default_scalar_mode_supported_p (mode); +} + +/* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE. */ + +static machine_mode +loongarch_preferred_simd_mode (scalar_mode mode) +{ + return word_mode; +} + +/* Return the length of INSN. LENGTH is the initial length computed by + attributes in the machine-description file. */ + +int +loongarch_adjust_insn_length (rtx_insn *insn, int length) +{ + /* loongarch.md uses MAX_PIC_BRANCH_LENGTH as a placeholder for the length + of a PIC long-branch sequence. Substitute the correct value. */ + if (length == MAX_PIC_BRANCH_LENGTH + && JUMP_P (insn) + && INSN_CODE (insn) >= 0 + && get_attr_type (insn) == TYPE_BRANCH) + { + /* Add the branch-over instruction and its delay slot, if this + is a conditional branch. */ + length = simplejump_p (insn) ? 0 : 8; + + /* Add the length of an indirect jump, ignoring the delay slot. */ + length += 4; + } + + /* A unconditional jump has an unfilled delay slot if it is not part + of a sequence. A conditional jump normally has a delay slot. */ + if (CALL_P (insn) || (JUMP_P (insn))) + length += 4; + + /* See how many nops might be needed to avoid hardware hazards. */ + if (!cfun->machine->ignore_hazard_length_p + && INSN_P (insn) + && INSN_CODE (insn) >= 0) + switch (get_attr_hazard (insn)) + { + case HAZARD_NONE: + break; + + case HAZARD_DELAY: + case HAZARD_FORBIDDEN_SLOT: + length += NOP_INSN_LENGTH; + break; + } + + return length; +} + +/* Return the assembly code for INSN, which has the operands given by + OPERANDS, and which branches to OPERANDS[0] if some condition is true. + BRANCH_IF_TRUE is the asm template that should be used if OPERANDS[0] + is in range of a direct branch. BRANCH_IF_FALSE is an inverted + version of BRANCH_IF_TRUE. */ + +const char * +loongarch_output_conditional_branch (rtx_insn *insn, rtx *operands, + const char *branch_if_true, + const char *branch_if_false) +{ + unsigned int length; + rtx taken; + + gcc_assert (LABEL_P (operands[0])); + + length = get_attr_length (insn); + if (length <= 12) + { + return branch_if_true; + } + + /* Generate a reversed branch around a direct jump. This fallback does + not use branch-likely instructions. */ + rtx_code_label *not_taken = gen_label_rtx (); + taken = operands[0]; + + /* Generate the reversed branch to NOT_TAKEN. */ + operands[0] = not_taken; + output_asm_insn (branch_if_false, operands); + + /* If INSN has a delay slot, we must provide delay slots for both the + branch to NOT_TAKEN and the conditional jump. We must also ensure + that INSN's delay slot is executed in the appropriate cases. */ + if (final_sequence) + { + /* This first delay slot will always be executed, so use INSN's + delay slot if is not annulled. */ + if (!INSN_ANNULLED_BRANCH_P (insn)) + { + final_scan_insn (final_sequence->insn (1), + asm_out_file, optimize, 1, NULL); + final_sequence->insn (1)->set_deleted (); + } + fprintf (asm_out_file, "\n"); + } + + output_asm_insn (LARCH_ABSOLUTE_JUMP ("b\t%0"), &taken); + + /* Now deal with its delay slot; see above. */ + if (final_sequence) + { + /* This delay slot will only be executed if the branch is taken. + Use INSN's delay slot if is annulled. */ + if (INSN_ANNULLED_BRANCH_P (insn)) + { + final_scan_insn (final_sequence->insn (1), + asm_out_file, optimize, 1, NULL); + final_sequence->insn (1)->set_deleted (); + } + fprintf (asm_out_file, "\n"); + } + + /* Output NOT_TAKEN. */ + targetm.asm_out.internal_label (asm_out_file, "L", + CODE_LABEL_NUMBER (not_taken)); + return ""; +} + +/* Return the assembly code for INSN, which branches to OPERANDS[0] + if some equality condition is true. The condition is given by + OPERANDS[1] if !INVERTED_P, otherwise it is the inverse of + OPERANDS[1]. OPERANDS[2] is the comparison's first operand; + OPERANDS[3] is the second operand and may be zero or a register. */ + +const char * +loongarch_output_equal_conditional_branch (rtx_insn* insn, rtx *operands, + bool inverted_p) +{ + const char *branch[2]; + if (operands[3] == const0_rtx) + { + branch[!inverted_p] = LARCH_BRANCH ("b%C1z", "%2,%0"); + branch[inverted_p] = LARCH_BRANCH ("b%N1z", "%2,%0"); + } else + { + branch[!inverted_p] = LARCH_BRANCH ("b%C1", "%2,%z3,%0"); + branch[inverted_p] = LARCH_BRANCH ("b%N1", "%2,%z3,%0"); + } + + return loongarch_output_conditional_branch (insn, operands, branch[1], branch[0]); +} + +/* Return the assembly code for INSN, which branches to OPERANDS[0] + if some ordering condition is true. The condition is given by + OPERANDS[1] if !INVERTED_P, otherwise it is the inverse of + OPERANDS[1]. OPERANDS[2] is the comparison's first operand; + OPERANDS[3] is the second operand and may be zero or a register. */ + +const char * +loongarch_output_order_conditional_branch (rtx_insn *insn, rtx *operands, + bool inverted_p) +{ + const char *branch[2]; + + /* Make BRANCH[1] branch to OPERANDS[0] when the condition is true. + Make BRANCH[0] branch on the inverse condition. */ + if (operands[3] != const0_rtx) + { + /* Handle degenerate cases that should not, but do, occur. */ + if (REGNO (operands[2]) == REGNO (operands[3])) + { + switch (GET_CODE (operands[1])) + { + case LT: + case LTU: + inverted_p = !inverted_p; + /* Fall through. */ + case GE: + case GEU: + branch[!inverted_p] = LARCH_BRANCH ("b", "%0"); + branch[inverted_p] = "\t# branch never"; + break; + default: + gcc_unreachable (); + } + } + else + { + switch (GET_CODE (operands[1])) + { + case LE: + branch[!inverted_p] = LARCH_BRANCH ("bge", "%3,%2,%0"); + branch[inverted_p] = LARCH_BRANCH ("blt", "%3,%2,%0"); + break; + case LEU: + branch[!inverted_p] = LARCH_BRANCH ("bgeu", "%3,%2,%0"); + branch[inverted_p] = LARCH_BRANCH ("bltu", "%3,%2,%0"); + break; + case GT: + branch[!inverted_p] = LARCH_BRANCH ("blt", "%3,%2,%0"); + branch[inverted_p] = LARCH_BRANCH ("bge", "%3,%2,%0"); + break; + case GTU: + branch[!inverted_p] = LARCH_BRANCH ("bltu", "%3,%2,%0"); + branch[inverted_p] = LARCH_BRANCH ("bgeu", "%3,%2,%0"); + break; + case LT: + case LTU: + case GE: + case GEU: + branch[!inverted_p] = LARCH_BRANCH ("b%C1", "%2,%3,%0"); + branch[inverted_p] = LARCH_BRANCH ("b%N1", "%2,%3,%0"); + break; + default: + gcc_unreachable (); + } + } + } + else + { + switch (GET_CODE (operands[1])) + { + /* These cases are equivalent to comparisons against zero. */ + case LEU: + inverted_p = !inverted_p; + /* Fall through. */ + case GTU: + branch[!inverted_p] = LARCH_BRANCH ("bne", "%2,%.,%0"); + branch[inverted_p] = LARCH_BRANCH ("beq", "%2,%.,%0"); + break; + + /* These cases are always true or always false. */ + case LTU: + inverted_p = !inverted_p; + /* Fall through. */ + case GEU: + branch[!inverted_p] = LARCH_BRANCH ("beq", "%.,%.,%0"); + branch[inverted_p] = LARCH_BRANCH ("bne", "%.,%.,%0"); + break; + + case LE: + branch[!inverted_p] = LARCH_BRANCH ("bge", "$zero,%2,%0"); + branch[inverted_p] = LARCH_BRANCH ("blt", "$zero,%2,%0"); + break; + case GT: + branch[!inverted_p] = LARCH_BRANCH ("blt", "$zero,%2,%0"); + branch[inverted_p] = LARCH_BRANCH ("bge", "$zero,%2,%0"); + break; + case LT: + case GE: + branch[!inverted_p] = LARCH_BRANCH ("b%C1", "%2,$zero,%0"); + branch[inverted_p] = LARCH_BRANCH ("b%N1", "%2,$zero,%0"); + break; + default: + gcc_unreachable (); + } + } + return loongarch_output_conditional_branch (insn, operands, branch[1], branch[0]); +} + + + +/* Return the assembly code for DIV or DDIV instruction DIVISION, which has + the operands given by OPERANDS. Add in a divide-by-zero check if needed. + + When working around R4000 and R4400 errata, we need to make sure that + the division is not immediately followed by a shift[1][2]. We also + need to stop the division from being put into a branch delay slot[3]. + The easiest way to avoid both problems is to add a nop after the + division. When a divide-by-zero check is needed, this nop can be + used to fill the branch delay slot. + + [1] If a double-word or a variable shift executes immediately + after starting an integer division, the shift may give an + incorrect result. See quotations of errata #16 and #28 from + "LARCH R4000PC/SC Errata, Processor Revision 2.2 and 3.0" + in loongarch.md for details. + + [2] A similar bug to [1] exists for all revisions of the + R4000 and the R4400 when run in an MC configuration. + From "LARCH R4000MC Errata, Processor Revision 2.2 and 3.0": + + "19. In this following sequence: + + ddiv (or ddivu or div or divu) + dsll32 (or dsrl32, dsra32) + + if an MPT stall occurs, while the divide is slipping the cpu + pipeline, then the following double shift would end up with an + incorrect result. + + Workaround: The compiler needs to avoid generating any + sequence with divide followed by extended double shift." + + This erratum is also present in "LARCH R4400MC Errata, Processor + Revision 1.0" and "LARCH R4400MC Errata, Processor Revision 2.0 + & 3.0" as errata #10 and #4, respectively. + + [3] From "LARCH R4000PC/SC Errata, Processor Revision 2.2 and 3.0" + (also valid for LARCH R4000MC processors): + + "52. R4000SC: This bug does not apply for the R4000PC. + + There are two flavors of this bug: + + 1) If the instruction just after divide takes an RF exception + (tlb-refill, tlb-invalid) and gets an instruction cache + miss (both primary and secondary) and the line which is + currently in secondary cache at this index had the first + data word, where the bits 5..2 are set, then R4000 would + get a wrong result for the div. + + ##1 + nop + div r8, r9 + ------------------- # end-of page. -tlb-refill + nop + ##2 + nop + div r8, r9 + ------------------- # end-of page. -tlb-invalid + nop + + 2) If the divide is in the taken branch delay slot, where the + target takes RF exception and gets an I-cache miss for the + exception vector or where I-cache miss occurs for the + target address, under the above mentioned scenarios, the + div would get wrong results. + + ##1 + j r2 # to next page mapped or unmapped + div r8,r9 # this bug would be there as long + # as there is an ICache miss and + nop # the "data pattern" is present + + ##2 + beq r0, r0, NextPage # to Next page + div r8,r9 + nop + + This bug is present for div, divu, ddiv, and ddivu + instructions. + + Workaround: For item 1), OS could make sure that the next page + after the divide instruction is also mapped. For item 2), the + compiler could make sure that the divide instruction is not in + the branch delay slot." + + These processors have PRId values of 0x00004220 and 0x00004300 for + the R4000 and 0x00004400, 0x00004500 and 0x00004600 for the R4400. */ + +const char * +loongarch_output_division (const char *division, rtx *operands) +{ + const char *s; + + s = division; + if (TARGET_CHECK_ZERO_DIV) + { + output_asm_insn (s, operands); + s = "bne\t%2,%.,1f\n\tbreak\t7\n1:"; + } + return s; +} + + +/* Return true if destination of IN_INSN is used as add source in + OUT_INSN. Both IN_INSN and OUT_INSN are of type fmadd. Example: + madd.s dst, x, y, z + madd.s a, dst, b, c */ + +bool +loongarch_fmadd_bypass (rtx_insn *out_insn, rtx_insn *in_insn) +{ + int dst_reg, src_reg; + + gcc_assert (get_attr_type (in_insn) == TYPE_FMADD); + gcc_assert (get_attr_type (out_insn) == TYPE_FMADD); + + extract_insn (in_insn); + dst_reg = REG_P (recog_data.operand[0]); + + extract_insn (out_insn); + src_reg = REG_P (recog_data.operand[1]); + + if (dst_reg == src_reg) + return true; + + return false; +} + +/* Return true if IN_INSN is a multiply-add or multiply-subtract + instruction and if OUT_INSN assigns to the accumulator operand. */ + +bool +loongarch_linked_madd_p (rtx_insn *out_insn, rtx_insn *in_insn) +{ + enum attr_accum_in accum_in; + int accum_in_opnum; + rtx accum_in_op; + + if (recog_memoized (in_insn) < 0) + return false; + + accum_in = get_attr_accum_in (in_insn); + if (accum_in == ACCUM_IN_NONE) + return false; + + accum_in_opnum = accum_in - ACCUM_IN_0; + + extract_insn (in_insn); + gcc_assert (accum_in_opnum < recog_data.n_operands); + accum_in_op = recog_data.operand[accum_in_opnum]; + + return reg_set_p (accum_in_op, out_insn); +} + +/* True if the dependency between OUT_INSN and IN_INSN is on the store + data rather than the address. We need this because the cprestore + pattern is type "store", but is defined using an UNSPEC_VOLATILE, + which causes the default routine to abort. We just return false + for that case. */ + +bool +loongarch_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn) +{ + if (GET_CODE (PATTERN (in_insn)) == UNSPEC_VOLATILE) + return false; + + return store_data_bypass_p (out_insn, in_insn); +} + + +/* Implement TARGET_SCHED_ADJUST_COST. We assume that anti and output + dependencies have no cost, except on the 20Kc where output-dependence + is treated like input-dependence. */ + +static int +loongarch_adjust_cost (rtx_insn *, int dep_type, rtx_insn *, int cost, unsigned int) +{ + if (dep_type != 0 && (dep_type != REG_DEP_OUTPUT)) + return 0; + return cost; +} + +/* Return the number of instructions that can be issued per cycle. */ + +static int +loongarch_issue_rate (void) +{ + switch (loongarch_tune) + { + case PROCESSOR_LOONGARCH64: + case PROCESSOR_GS464V: + return 4; + + default: + return 1; + } +} + +/* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD. This should + be as wide as the scheduling freedom in the DFA. */ + +static int +loongarch_multipass_dfa_lookahead (void) +{ + if (TUNE_LOONGARCH64 || TUNE_GS464V) + return 4; + + return 0; +} + + +static void +loongarch_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED, + int max_ready ATTRIBUTE_UNUSED) +{ +} + +/* Implement TARGET_SCHED_REORDER. */ + +static int +loongarch_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED, + rtx_insn **ready, int *nreadyp, int cycle ATTRIBUTE_UNUSED) +{ + return loongarch_issue_rate (); +} + +/* Implement TARGET_SCHED_REORDER2. */ + +static int +loongarch_sched_reorder2 (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED, + rtx_insn **ready, int *nreadyp, int cycle ATTRIBUTE_UNUSED) +{ + return cached_can_issue_more; +} + +/* Implement TARGET_SCHED_VARIABLE_ISSUE. */ + +static int +loongarch_variable_issue (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED, + rtx_insn *insn, int more) +{ + /* Ignore USEs and CLOBBERs; don't count them against the issue rate. */ + if (USEFUL_INSN_P (insn)) + { + if (get_attr_type (insn) != TYPE_GHOST) + more--; + } + + /* Instructions of type 'multi' should all be split before + the second scheduling pass. */ + gcc_assert (!reload_completed + || recog_memoized (insn) < 0 + || get_attr_type (insn) != TYPE_MULTI); + + cached_can_issue_more = more; + return more; +} + +/* Given that we have an rtx of the form (prefetch ... WRITE LOCALITY), + return the first operand of the associated PREF or PREFX insn. */ + +rtx +loongarch_prefetch_cookie (rtx write, rtx locality) +{ + /* store_streamed / load_streamed. */ + if (INTVAL (locality) <= 0) + return GEN_INT (INTVAL (write) + 4); + + /* store / load. */ + if (INTVAL (locality) <= 2) + return write; + + /* store_retained / load_retained. */ + return GEN_INT (INTVAL (write) + 6); +} + + + +/* Return whether CFG is used in loongarch_reorg. */ + +static bool +loongarch_cfg_in_reorg (void) +{ + return (TARGET_RELAX_PIC_CALLS); +} + +/* If INSN is a call, return the underlying CALL expr. Return NULL_RTX + otherwise. If INSN has two call rtx, then store the second one in + SECOND_CALL. */ + +static rtx +loongarch_call_expr_from_insn (rtx_insn *insn, rtx *second_call) +{ + rtx x; + rtx x2; + + if (!CALL_P (insn)) + return NULL_RTX; + + x = PATTERN (insn); + if (GET_CODE (x) == PARALLEL) + { + /* Calls returning complex values have two CALL rtx. Look for the second + one here, and return it via the SECOND_CALL arg. */ + x2 = XVECEXP (x, 0, 1); + if (GET_CODE (x2) == SET) + x2 = XEXP (x2, 1); + if (GET_CODE (x2) == CALL) + *second_call = x2; + + x = XVECEXP (x, 0, 0); + } + if (GET_CODE (x) == SET) + x = XEXP (x, 1); + gcc_assert (GET_CODE (x) == CALL); + + return x; +} + +/* REG is set in DEF. See if the definition is one of the ways we load a + register with a symbol address for a loongarch_use_pic_fn_addr_reg_p call. + If it is, return the symbol reference of the function, otherwise return + NULL_RTX. + + If RECURSE_P is true, use loongarch_find_pic_call_symbol to interpret + the values of source registers, otherwise treat such registers as + having an unknown value. */ + +static rtx +loongarch_pic_call_symbol_from_set (df_ref def, rtx reg, bool recurse_p) +{ + rtx_insn *def_insn; + rtx set; + + if (DF_REF_IS_ARTIFICIAL (def)) + return NULL_RTX; + + def_insn = DF_REF_INSN (def); + set = single_set (def_insn); + if (set && rtx_equal_p (SET_DEST (set), reg)) + { + rtx note, src, symbol; + + /* First see whether the source is a plain symbol. This is used + when calling symbols that are not lazily bound. */ + src = SET_SRC (set); + if (GET_CODE (src) == SYMBOL_REF) + return src; + + /* Handle %call16 references. */ + symbol = loongarch_strip_unspec_call (src); + if (symbol) + { + gcc_assert (GET_CODE (symbol) == SYMBOL_REF); + return symbol; + } + + /* If we have something more complicated, look for a + REG_EQUAL or REG_EQUIV note. */ + note = find_reg_equal_equiv_note (def_insn); + if (note && GET_CODE (XEXP (note, 0)) == SYMBOL_REF) + return XEXP (note, 0); + + /* Follow at most one simple register copy. Such copies are + interesting in cases like: + + for (...) + { + locally_binding_fn (...); + } + + and: + + locally_binding_fn (...); + ... + locally_binding_fn (...); + + where the load of locally_binding_fn can legitimately be + hoisted or shared. However, we do not expect to see complex + chains of copies, so a full worklist solution to the problem + would probably be overkill. */ + if (recurse_p && REG_P (src)) + return loongarch_find_pic_call_symbol (def_insn, src, false); + } + + return NULL_RTX; +} + +/* Find the definition of the use of REG in INSN. See if the definition + is one of the ways we load a register with a symbol address for a + loongarch_use_pic_fn_addr_reg_p call. If it is return the symbol reference + of the function, otherwise return NULL_RTX. RECURSE_P is as for + loongarch_pic_call_symbol_from_set. */ + +static rtx +loongarch_find_pic_call_symbol (rtx_insn *insn, rtx reg, bool recurse_p) +{ + df_ref use; + struct df_link *defs; + rtx symbol; + + use = df_find_use (insn, regno_reg_rtx[REGNO (reg)]); + if (!use) + return NULL_RTX; + defs = DF_REF_CHAIN (use); + if (!defs) + return NULL_RTX; + symbol = loongarch_pic_call_symbol_from_set (defs->ref, reg, recurse_p); + if (!symbol) + return NULL_RTX; + + /* If we have more than one definition, they need to be identical. */ + for (defs = defs->next; defs; defs = defs->next) + { + rtx other; + + other = loongarch_pic_call_symbol_from_set (defs->ref, reg, recurse_p); + if (!rtx_equal_p (symbol, other)) + return NULL_RTX; + } + + return symbol; +} + +/* Replace the args_size operand of the call expression CALL with the + call-attribute UNSPEC and fill in SYMBOL as the function symbol. */ + +static void +loongarch_annotate_pic_call_expr (rtx call, rtx symbol) +{ + rtx args_size; + + args_size = XEXP (call, 1); + XEXP (call, 1) = gen_rtx_UNSPEC (GET_MODE (args_size), + gen_rtvec (2, args_size, symbol), + UNSPEC_CALL_ATTR); +} + +/* OPERANDS[ARGS_SIZE_OPNO] is the arg_size operand of a CALL expression. See + if instead of the arg_size argument it contains the call attributes. If + yes return true along with setting OPERANDS[ARGS_SIZE_OPNO] to the function + symbol from the call attributes. Also return false if ARGS_SIZE_OPNO is + -1. */ + +bool +loongarch_get_pic_call_symbol (rtx *operands, int args_size_opno) +{ + rtx args_size, symbol; + + if (!TARGET_RELAX_PIC_CALLS || args_size_opno == -1) + return false; + + args_size = operands[args_size_opno]; + if (GET_CODE (args_size) != UNSPEC) + return false; + gcc_assert (XINT (args_size, 1) == UNSPEC_CALL_ATTR); + + symbol = XVECEXP (args_size, 0, 1); + gcc_assert (GET_CODE (symbol) == SYMBOL_REF); + + operands[args_size_opno] = symbol; + return true; +} + +/* Use DF to annotate PIC indirect calls with the function symbol they + dispatch to. */ + +static void +loongarch_annotate_pic_calls (void) +{ + basic_block bb; + rtx_insn *insn; + + FOR_EACH_BB_FN (bb, cfun) + FOR_BB_INSNS (bb, insn) + { + rtx call, reg, symbol, second_call; + + second_call = 0; + call = loongarch_call_expr_from_insn (insn, &second_call); + if (!call) + continue; + gcc_assert (MEM_P (XEXP (call, 0))); + reg = XEXP (XEXP (call, 0), 0); + if (!REG_P (reg)) + continue; + + symbol = loongarch_find_pic_call_symbol (insn, reg, true); + if (symbol) + { + loongarch_annotate_pic_call_expr (call, symbol); + if (second_call) + loongarch_annotate_pic_call_expr (second_call, symbol); + } + } +} + + +/* A structure representing the state of the processor pipeline. + Used by the loongarch_sim_* family of functions. */ +struct loongarch_sim { + /* The maximum number of instructions that can be issued in a cycle. + (Caches loongarch_issue_rate.) */ + unsigned int issue_rate; + + /* The current simulation time. */ + unsigned int time; + + /* How many more instructions can be issued in the current cycle. */ + unsigned int insns_left; + + /* LAST_SET[X].INSN is the last instruction to set register X. + LAST_SET[X].TIME is the time at which that instruction was issued. + INSN is null if no instruction has yet set register X. */ + struct { + rtx_insn *insn; + unsigned int time; + } last_set[FIRST_PSEUDO_REGISTER]; + + /* The pipeline's current DFA state. */ + state_t dfa_state; +}; + +/* Reset STATE to the initial simulation state. */ + +static void +loongarch_sim_reset (struct loongarch_sim *state) +{ + curr_state = state->dfa_state; + + state->time = 0; + state->insns_left = state->issue_rate; + memset (&state->last_set, 0, sizeof (state->last_set)); + state_reset (curr_state); + + targetm.sched.init (0, false, 0); + advance_state (curr_state); +} + +/* Initialize STATE before its first use. DFA_STATE points to an + allocated but uninitialized DFA state. */ + +static void +loongarch_sim_init (struct loongarch_sim *state, state_t dfa_state) +{ + if (targetm.sched.init_dfa_pre_cycle_insn) + targetm.sched.init_dfa_pre_cycle_insn (); + + if (targetm.sched.init_dfa_post_cycle_insn) + targetm.sched.init_dfa_post_cycle_insn (); + + state->issue_rate = loongarch_issue_rate (); + state->dfa_state = dfa_state; + loongarch_sim_reset (state); +} + + + +/* Set up costs based on the current architecture and tuning settings. */ + +static void +loongarch_set_tuning_info (void) +{ + + loongarch_tuning_info.arch = loongarch_arch; + loongarch_tuning_info.tune = loongarch_tune; + loongarch_tuning_info.initialized_p = true; + + dfa_start (); + + struct loongarch_sim state; + loongarch_sim_init (&state, alloca (state_size ())); + + dfa_finish (); +} + +/* Implement TARGET_EXPAND_TO_RTL_HOOK. */ + +static void +loongarch_expand_to_rtl_hook (void) +{ + /* We need to call this at a point where we can safely create sequences + of instructions, so TARGET_OVERRIDE_OPTIONS is too early. We also + need to call it at a point where the DFA infrastructure is not + already in use, so we can't just call it lazily on demand. + + At present, loongarch_tuning_info is only needed during post-expand + RTL passes such as split_insns, so this hook should be early enough. + We may need to move the call elsewhere if loongarch_tuning_info starts + to be used for other things (such as rtx_costs, or expanders that + could be called during gimple optimization). */ + loongarch_set_tuning_info (); +} + +/* This structure records that the current function has a LO_SUM + involving SYMBOL_REF or LABEL_REF BASE and that MAX_OFFSET is + the largest offset applied to BASE by all such LO_SUMs. */ +struct loongarch_lo_sum_offset { + rtx base; + HOST_WIDE_INT offset; +}; + +/* Return a hash value for SYMBOL_REF or LABEL_REF BASE. */ + +static hashval_t +loongarch_hash_base (rtx base) +{ + int do_not_record_p; + + return hash_rtx (base, GET_MODE (base), &do_not_record_p, NULL, false); +} + +/* Hashtable helpers. */ + +struct loongarch_lo_sum_offset_hasher : free_ptr_hash +{ + typedef rtx_def *compare_type; + static inline hashval_t hash (const loongarch_lo_sum_offset *); + static inline bool equal (const loongarch_lo_sum_offset *, const rtx_def *); +}; + +/* Hash-table callbacks for loongarch_lo_sum_offsets. */ + +inline hashval_t +loongarch_lo_sum_offset_hasher::hash (const loongarch_lo_sum_offset *entry) +{ + return loongarch_hash_base (entry->base); +} + +inline bool +loongarch_lo_sum_offset_hasher::equal (const loongarch_lo_sum_offset *entry, + const rtx_def *value) +{ + return rtx_equal_p (entry->base, value); +} + +typedef hash_table loongarch_offset_table; + + +/* Subroutine of loongarch_reorg to manage passes that require DF. */ + +static void +loongarch_df_reorg (void) +{ + /* Create def-use chains. */ + df_set_flags (DF_EQ_NOTES); + df_chain_add_problem (DF_UD_CHAIN); + df_analyze (); + + if (TARGET_RELAX_PIC_CALLS) + loongarch_annotate_pic_calls (); + + df_finish_pass (false); +} + +/* Implement TARGET_MACHINE_DEPENDENT_REORG. */ + +static void +loongarch_reorg (void) +{ + /* Restore the BLOCK_FOR_INSN pointers, which are needed by DF.DF insn info is only kept up + to date if the CFG is available. */ + if (loongarch_cfg_in_reorg ()) + compute_bb_for_insn (); + if (loongarch_cfg_in_reorg ()) + { + loongarch_df_reorg (); + free_bb_for_insn (); + } +} + +/* We use a machine specific pass to do a second machine dependent reorg + pass after delay branch scheduling. */ + +static unsigned int +loongarch_machine_reorg2 (void) +{ +// loongarch_insert_insn_pseudos (); + return 0; +} + +namespace { + +const pass_data pass_data_loongarch_machine_reorg2 = +{ + RTL_PASS, /* type */ + "mach2", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_MACH_DEP, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_loongarch_machine_reorg2 : public rtl_opt_pass +{ +public: + pass_loongarch_machine_reorg2(gcc::context *ctxt) + : rtl_opt_pass(pass_data_loongarch_machine_reorg2, ctxt) + {} + + /* opt_pass methods: */ + virtual unsigned int execute (function *) { return loongarch_machine_reorg2 (); } + +}; // class pass_loongarch_machine_reorg2 + +} // anon namespace + +rtl_opt_pass * +make_pass_loongarch_machine_reorg2 (gcc::context *ctxt) +{ + return new pass_loongarch_machine_reorg2 (ctxt); +} + + +/* Implement TARGET_ASM_OUTPUT_MI_THUNK. Generate rtl rather than asm text + in order to avoid duplicating too much logic from elsewhere. */ + +static void +loongarch_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED, + HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, + tree function) +{ + const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl)); + rtx this_rtx, temp1, temp2, fnaddr; + rtx_insn *insn; + bool use_sibcall_p; + + /* Pretend to be a post-reload pass while generating rtl. */ + reload_completed = 1; + + /* Mark the end of the (empty) prologue. */ + emit_note (NOTE_INSN_PROLOGUE_END); + + /* Determine if we can use a sibcall to call FUNCTION directly. */ + fnaddr = XEXP (DECL_RTL (function), 0); + use_sibcall_p = (loongarch_function_ok_for_sibcall (function, NULL) + && const_call_insn_operand (fnaddr, Pmode)); + +// /* Determine if we need to load FNADDR from the GOT. */ +// if (!use_sibcall_p +// && (loongarch_got_symbol_type_p +// (loongarch_classify_symbol (fnaddr, SYMBOL_CONTEXT_LEA)))) +// { +// /* Pick a global pointer. Use a call-clobbered register if +// TARGET_CALL_SAVED_GP. */ +// cfun->machine->global_pointer +// = GLOBAL_POINTER_REGNUM; +// cfun->machine->must_initialize_gp_p = true; +// SET_REGNO (pic_offset_table_rtx, cfun->machine->global_pointer); +// +// /* Set up the global pointer for n32 or n64 abicalls. */ +// loongarch_emit_loadgp (); +// } + + /* We need two temporary registers in some cases. */ + temp1 = gen_rtx_REG (Pmode, 12); + temp2 = gen_rtx_REG (Pmode, 13); + + /* Find out which register contains the "this" pointer. */ + if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)) + this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST + 1); + else + this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST); + + /* Add DELTA to THIS_RTX. */ + if (delta != 0) + { + rtx offset = GEN_INT (delta); + if (!SMALL_OPERAND (delta)) + { + loongarch_emit_move (temp1, offset); + offset = temp1; + } + emit_insn (gen_add3_insn (this_rtx, this_rtx, offset)); + } + + /* If needed, add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */ + if (vcall_offset != 0) + { + rtx addr; + + /* Set TEMP1 to *THIS_RTX. */ + loongarch_emit_move (temp1, gen_rtx_MEM (Pmode, this_rtx)); + + /* Set ADDR to a legitimate address for *THIS_RTX + VCALL_OFFSET. */ + addr = loongarch_add_offset (temp2, temp1, vcall_offset); + + /* Load the offset and add it to THIS_RTX. */ + loongarch_emit_move (temp1, gen_rtx_MEM (Pmode, addr)); + emit_insn (gen_add3_insn (this_rtx, this_rtx, temp1)); + } + + /* Jump to the target function. Use a sibcall if direct jumps are + allowed, otherwise load the address into a register first. */ + if (use_sibcall_p) + { + insn = emit_call_insn (gen_sibcall_internal (fnaddr, const0_rtx)); + SIBLING_CALL_P (insn) = 1; + } + else + { + loongarch_emit_move (temp1, fnaddr); + emit_jump_insn (gen_indirect_jump (temp1)); + } + + /* Run just enough of rest_of_compilation. This sequence was + "borrowed" from alpha.c. */ + insn = get_insns (); + split_all_insns_noflow (); + shorten_branches (insn); + assemble_start_function (thunk_fndecl, fnname); + final_start_function (insn, file, 1); + final (insn, file, 1); + final_end_function (); + assemble_end_function (thunk_fndecl, fnname); + + /* Clean up the vars set above. Note that final_end_function resets + the global pointer for us. */ + reload_completed = 0; +} + + +/* The last argument passed to loongarch_set_compression_mode, + or negative if the function hasn't been called yet. */ +static unsigned int old_compression_mode = -1; + +/* Set up the target-dependent global state for ISA mode COMPRESSION_MODE. */ + +static void +loongarch_set_compression_mode (unsigned int compression_mode) +{ + + if (compression_mode == old_compression_mode) + return; + + /* Restore base settings of various flags. */ + target_flags = loongarch_base_target_flags; + flag_schedule_insns = loongarch_base_schedule_insns; + flag_reorder_blocks_and_partition = loongarch_base_reorder_blocks_and_partition; + flag_move_loop_invariants = loongarch_base_move_loop_invariants; + str_align_loops = loongarch_base_align_loops; + str_align_jumps = loongarch_base_align_jumps; + str_align_functions = loongarch_base_align_functions; + target_flags |= compression_mode; + + /* Provide default values for align_* for 64-bit targets. */ + if (TARGET_64BIT) + { + if (flag_align_loops && !str_align_loops) + str_align_loops = "8"; + if (flag_align_jumps && !str_align_jumps) + str_align_jumps = "8"; + if (flag_align_functions && !str_align_functions) + str_align_functions = "8"; + } + + targetm.min_anchor_offset = -32768; + targetm.max_anchor_offset = 32767; + targetm.const_anchor = 0x8000; + restore_target_globals (&default_target_globals); + old_compression_mode = compression_mode; +} + +/* Implement TARGET_SET_CURRENT_FUNCTION. Decide whether the current + function should use switch modes accordingly. */ + +static void +loongarch_set_current_function (tree fndecl) +{ + loongarch_set_compression_mode (loongarch_get_compress_mode (fndecl)); +} + +/* Allocate a chunk of memory for per-function machine-dependent data. */ + +static struct machine_function * +loongarch_init_machine_status (void) +{ + return ggc_cleared_alloc (); +} + +/* Return the processor associated with the given ISA level, or null + if the ISA isn't valid. */ + +static const struct loongarch_cpu_info * +loongarch_cpu_info_from_isa (int isa) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE (loongarch_cpu_info_table); i++) + if (loongarch_cpu_info_table[i].isa == isa) + return loongarch_cpu_info_table + i; + + return NULL; +} + +/* Return a loongarch_cpu_info entry determined by an option valued + OPT. */ + +static const struct loongarch_cpu_info * +loongarch_cpu_info_from_opt (int opt) +{ + switch (opt) + { + case LARCH_ARCH_OPTION_FROM_ABI: + /* 'from-abi' selects the most compatible architecture for the + given ABI */ + return loongarch_cpu_info_from_isa (ABI_NEEDS_32BIT_REGS ? 1 + : (TARGET_64BIT ? 3 : 1)); + + case LARCH_ARCH_OPTION_NATIVE: + gcc_unreachable (); + + default: + return &loongarch_cpu_info_table[opt]; + } +} + +/* Return a default loongarch_cpu_info entry, given that no -march= option + was explicitly specified. */ + +static const struct loongarch_cpu_info * +loongarch_default_arch (void) +{ +#if defined (LARCH_CPU_STRING_DEFAULT) + unsigned int i; + for (i = 0; i < ARRAY_SIZE (loongarch_cpu_info_table); i++) + if (strcmp (loongarch_cpu_info_table[i].name, LARCH_CPU_STRING_DEFAULT) == 0) + return loongarch_cpu_info_table + i; + gcc_unreachable (); +#elif defined (LARCH_ISA_DEFAULT) + return loongarch_cpu_info_from_isa (LARCH_ISA_DEFAULT); +#else + /* 'from-abi' makes a good default: you get whatever the ABI + requires. */ + return loongarch_cpu_info_from_opt (LARCH_ARCH_OPTION_FROM_ABI); +#endif +} + +/* Set up globals to generate code for the ISA or processor + described by INFO. */ + +static void +loongarch_set_architecture (const struct loongarch_cpu_info *info) +{ + if (info != 0) + { + loongarch_arch_info = info; + loongarch_arch = info->cpu; + loongarch_isa = info->isa; + if (loongarch_isa < 32) + loongarch_isa_rev = 0; + else + loongarch_isa_rev = (loongarch_isa & 31) + 1; + } +} + +/* Likewise for tuning. */ + +static void +loongarch_set_tune (const struct loongarch_cpu_info *info) +{ + if (info != 0) + { + loongarch_tune_info = info; + loongarch_tune = info->cpu; + } +} + +/* Implement TARGET_OPTION_OVERRIDE. */ + +static void +loongarch_option_override (void) +{ + int i, start, regno, mode; + + if (global_options_set.x_loongarch_isa_option) + loongarch_isa_option_info = &loongarch_cpu_info_table[loongarch_isa_option]; + +#ifdef SUBTARGET_OVERRIDE_OPTIONS + SUBTARGET_OVERRIDE_OPTIONS; +#endif + + + /* Save the base compression state and process flags as though we + were generating uncompressed code. */ + loongarch_base_compression_flags = 0; + + /* -mno-float overrides -mhard-float and -msoft-float. */ + if (TARGET_NO_FLOAT) + { + target_flags |= MASK_SOFT_FLOAT_ABI; + target_flags_explicit |= MASK_SOFT_FLOAT_ABI; + } + + + /* Set the small data limit. */ + loongarch_small_data_threshold = (global_options_set.x_g_switch_value + ? g_switch_value + : LARCH_DEFAULT_GVALUE); + + /* The following code determines the architecture and register size. + Similar code was added to GAS 2.14 (see tc-loongarch.c:md_after_parse_args()). + The GAS and GCC code should be kept in sync as much as possible. */ + + if (global_options_set.x_loongarch_arch_option) + loongarch_set_architecture (loongarch_cpu_info_from_opt (loongarch_arch_option)); + + if (loongarch_isa_option_info != 0) + { + if (loongarch_arch_info == 0) + loongarch_set_architecture (loongarch_isa_option_info); + else if (loongarch_arch_info->isa != loongarch_isa_option_info->isa) + error ("%<-%s%> conflicts with the other architecture options, " + "which specify a %s processor", + loongarch_isa_option_info->name, + loongarch_cpu_info_from_isa (loongarch_arch_info->isa)->name); + } + + if (loongarch_arch_info == 0) + loongarch_set_architecture (loongarch_default_arch ()); + + /* Optimize for loongarch_arch, unless -mtune selects a different processor. */ + if (global_options_set.x_loongarch_tune_option) + loongarch_set_tune (loongarch_cpu_info_from_opt (loongarch_tune_option)); + + if (loongarch_tune_info == 0) + loongarch_set_tune (loongarch_arch_info); + + if ((target_flags_explicit & MASK_64BIT) != 0) + { + if (TARGET_64BIT && ABI_NEEDS_32BIT_REGS) + error ("%<-mgp64%> used with a 32-bit ABI"); + } + else + { + /* Infer the integer register size from the ABI and processor. + Restrict ourselves to 32-bit registers if that's all the + processor has, or if the ABI cannot handle 64-bit registers. */ + if (ABI_NEEDS_32BIT_REGS) + target_flags &= ~MASK_64BIT; + else + target_flags |= MASK_64BIT; + } + + if ((target_flags_explicit & MASK_FLOAT64) != 0) + { + if (loongarch_isa_rev >= 6 && !TARGET_FLOAT64) + error ("the %qs architecture does not support %<-mfp32%>", + loongarch_arch_info->name); + else if (TARGET_SINGLE_FLOAT && TARGET_FLOAT64) + error ("unsupported combination: %s", "-mfp64 -msingle-float"); + else if (TARGET_64BIT && TARGET_DOUBLE_FLOAT && !TARGET_FLOAT64) + error ("unsupported combination: %s", "-mgp64 -mfp32 -mdouble-float"); + else if (!TARGET_64BIT && TARGET_FLOAT64) + { + if (!ISA_HAS_MXFRH) + error ("%<-mgp32%> and %<-mfp64%> can only be combined if" + " the target supports the mfhc1 and mthc1 instructions"); + else if (loongarch_abi != ABILP32) + error ("%<-mgp32%> and %<-mfp64%> can only be combined when using" + " the o32 ABI"); + } + } + else + { + /* -msingle-float selects 32-bit float registers. On r6 and later, + -mdouble-float selects 64-bit float registers, since the old paired + register model is not supported. In other cases the float registers + should be the same size as the integer ones. */ + if (loongarch_isa_rev >= 6 && TARGET_DOUBLE_FLOAT) + target_flags |= MASK_FLOAT64; + else if (TARGET_64BIT && TARGET_DOUBLE_FLOAT) + target_flags |= MASK_FLOAT64; + else + target_flags &= ~MASK_FLOAT64; + } + + /* End of code shared with GAS. */ + + /* If a -mlong* option was given, check that it matches the ABI, + otherwise infer the -mlong* setting from the other options. */ + if ((target_flags_explicit & MASK_LONG64) != 0) + { + if (TARGET_LONG64) + { + if (loongarch_abi == ABILPX32) + error ("%qs is incompatible with %qs", "-mabi=lpx32", "-mlong64"); + else if (loongarch_abi == ABILP32) + error ("%qs is incompatible with %qs", "-mabi=lp32", "-mlong64"); + } + else + { + if (loongarch_abi == ABILP64) + error ("%qs is incompatible with %qs", "-mabi=lp64", "-mlong32"); + } + } + else + { + if (loongarch_abi == ABILP64) + target_flags |= MASK_LONG64; + else + target_flags &= ~MASK_LONG64; + } + + if (!TARGET_OLDABI) + flag_pcc_struct_return = 0; + + /* Decide which rtx_costs structure to use. */ + if (optimize_size) + loongarch_cost = &loongarch_rtx_cost_optimize_size; + else + loongarch_cost = &loongarch_rtx_cost_data[loongarch_tune]; + + /* If the user hasn't specified a branch cost, use the processor's + default. */ + if (loongarch_branch_cost == 0) + loongarch_branch_cost = loongarch_cost->branch_cost; + + /* Prefer a call to memcpy over inline code when optimizing for size, + though see MOVE_RATIO in loongarch.h. */ + if (optimize_size && (target_flags_explicit & MASK_MEMCPY) == 0) + target_flags |= MASK_MEMCPY; + + /* If we have a nonzero small-data limit, check that the -mgpopt + setting is consistent with the other target flags. */ + if (loongarch_small_data_threshold > 0) + { + if (!TARGET_GPOPT) + { + TARGET_LOCAL_SDATA = false; + TARGET_EXTERN_SDATA = false; + } + else + { + if (TARGET_VXWORKS_RTP) + warning (0, "cannot use small-data accesses for %qs", "-mrtp"); + } + } + + /* .cfi_* directives generate a read-only section, so fall back on + manual .eh_frame creation if we need the section to be writable. */ + if (TARGET_WRITABLE_EH_FRAME) + flag_dwarf2_cfi_asm = 0; + + loongarch_init_print_operand_punct (); + + /* Set up array to map GCC register number to debug register number. + Ignore the special purpose register numbers. */ + + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + { + loongarch_dbx_regno[i] = IGNORED_DWARF_REGNUM; + if (GP_REG_P (i) || FP_REG_P (i)) + loongarch_dwarf_regno[i] = i; + else + loongarch_dwarf_regno[i] = INVALID_REGNUM; + } + + start = GP_DBX_FIRST - GP_REG_FIRST; + for (i = GP_REG_FIRST; i <= GP_REG_LAST; i++) + loongarch_dbx_regno[i] = i + start; + + start = FP_DBX_FIRST - FP_REG_FIRST; + for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++) + loongarch_dbx_regno[i] = i + start; + + /* Set up loongarch_hard_regno_mode_ok. */ + for (mode = 0; mode < MAX_MACHINE_MODE; mode++) + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + loongarch_hard_regno_mode_ok_p[mode][regno] + = loongarch_hard_regno_mode_ok_uncached (regno, (machine_mode) mode); + + /* Function to allocate machine-dependent function status. */ + init_machine_status = &loongarch_init_machine_status; + target_flags &= ~MASK_RELAX_PIC_CALLS; + + /* Save base state of options. */ + loongarch_base_target_flags = target_flags; + loongarch_base_schedule_insns = flag_schedule_insns; + loongarch_base_reorder_blocks_and_partition = flag_reorder_blocks_and_partition; + loongarch_base_move_loop_invariants = flag_move_loop_invariants; + loongarch_base_align_loops = str_align_loops; + loongarch_base_align_jumps = str_align_jumps; + loongarch_base_align_functions = str_align_functions; + + /* Now select the ISA mode. + + Do all CPP-sensitive stuff in uncompressed mode; we'll switch modes + later if required. */ + loongarch_set_compression_mode (0); + + /* We register a second machine specific reorg pass after delay slot + filling. Registering the pass must be done at start up. It's + convenient to do it here. */ + opt_pass *new_pass = make_pass_loongarch_machine_reorg2 (g); + struct register_pass_info insert_pass_loongarch_machine_reorg2 = + { + new_pass, /* pass */ + "dbr", /* reference_pass_name */ + 1, /* ref_pass_instance_number */ + PASS_POS_INSERT_AFTER /* po_op */ + }; + register_pass (&insert_pass_loongarch_machine_reorg2); + + loongarch_register_frame_header_opt (); +} + + +/* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */ + +static void +loongarch_conditional_register_usage (void) +{ + if (!TARGET_HARD_FLOAT) + { + accessible_reg_set &= ~(reg_class_contents[(int) FP_REGS] | reg_class_contents[(int) ST_REGS]); + } +} + +/* Implement EH_USES. */ + +bool +loongarch_eh_uses (unsigned int regno) +{ + return false; +} + +/* Implement EPILOGUE_USES. */ + +bool +loongarch_epilogue_uses (unsigned int regno) +{ + /* Say that the epilogue uses the return address register. Note that + in the case of sibcalls, the values "used by the epilogue" are + considered live at the start of the called function. */ + if (regno == RETURN_ADDR_REGNUM) + return true; + + /* An interrupt handler must preserve some registers that are + ordinarily call-clobbered. */ + if (cfun->machine->interrupt_handler_p + && loongarch_interrupt_extra_call_saved_reg_p (regno)) + return true; + + return false; +} + +/* Return true if MEM1 and MEM2 use the same base register, and the + offset of MEM2 equals the offset of MEM1 plus 4. FIRST_REG is the + register into (from) which the contents of MEM1 will be loaded + (stored), depending on the value of LOAD_P. + SWAP_P is true when the 1st and 2nd instructions are swapped. */ + +static bool +loongarch_load_store_pair_p_1 (bool load_p, bool swap_p, + rtx first_reg, rtx mem1, rtx mem2) +{ + rtx base1, base2; + HOST_WIDE_INT offset1, offset2; + + if (!MEM_P (mem1) || !MEM_P (mem2)) + return false; + + loongarch_split_plus (XEXP (mem1, 0), &base1, &offset1); + loongarch_split_plus (XEXP (mem2, 0), &base2, &offset2); + + if (!REG_P (base1) || !rtx_equal_p (base1, base2)) + return false; + + /* Avoid invalid load pair instructions. */ + if (load_p && REGNO (first_reg) == REGNO (base1)) + return false; + + /* We must avoid this case for anti-dependence. + Ex: lw $3, 4($3) + lw $2, 0($3) + first_reg is $2, but the base is $3. */ + if (load_p + && swap_p + && REGNO (first_reg) + 1 == REGNO (base1)) + return false; + + if (offset2 != offset1 + 4) + return false; + + if (!ULARCH_12BIT_OFFSET_P (offset1)) + return false; + + return true; +} + +bool +loongarch_load_store_bonding_p (rtx *operands, machine_mode mode, bool load_p) +{ + rtx reg1, reg2, mem1, mem2, base1, base2; + enum reg_class rc1, rc2; + HOST_WIDE_INT offset1, offset2; + + if (load_p) + { + reg1 = operands[0]; + reg2 = operands[2]; + mem1 = operands[1]; + mem2 = operands[3]; + } + else + { + reg1 = operands[1]; + reg2 = operands[3]; + mem1 = operands[0]; + mem2 = operands[2]; + } + + if (loongarch_address_insns (XEXP (mem1, 0), mode, false) == 0 + || loongarch_address_insns (XEXP (mem2, 0), mode, false) == 0) + return false; + + loongarch_split_plus (XEXP (mem1, 0), &base1, &offset1); + loongarch_split_plus (XEXP (mem2, 0), &base2, &offset2); + + /* Base regs do not match. */ + if (!REG_P (base1) || !rtx_equal_p (base1, base2)) + return false; + + /* Either of the loads is clobbering base register. It is legitimate to bond + loads if second load clobbers base register. However, hardware does not + support such bonding. */ + if (load_p + && (REGNO (reg1) == REGNO (base1) + || (REGNO (reg2) == REGNO (base1)))) + return false; + + /* Loading in same registers. */ + if (load_p + && REGNO (reg1) == REGNO (reg2)) + return false; + + /* The loads/stores are not of same type. */ + rc1 = REGNO_REG_CLASS (REGNO (reg1)); + rc2 = REGNO_REG_CLASS (REGNO (reg2)); + if (rc1 != rc2 + && !reg_class_subset_p (rc1, rc2) + && !reg_class_subset_p (rc2, rc1)) + return false; + + if (abs (offset1 - offset2) != GET_MODE_SIZE (mode)) + return false; + + return true; +} + +/* OPERANDS describes the operands to a pair of SETs, in the order + dest1, src1, dest2, src2. Return true if the operands can be used + in an LWP or SWP instruction; LOAD_P says which. */ + +bool +loongarch_load_store_pair_p (bool load_p, rtx *operands) +{ + rtx reg1, reg2, mem1, mem2; + + if (load_p) + { + reg1 = operands[0]; + reg2 = operands[2]; + mem1 = operands[1]; + mem2 = operands[3]; + } + else + { + reg1 = operands[1]; + reg2 = operands[3]; + mem1 = operands[0]; + mem2 = operands[2]; + } + + if (REGNO (reg2) == REGNO (reg1) + 1) + return loongarch_load_store_pair_p_1 (load_p, false, reg1, mem1, mem2); + + if (REGNO (reg1) == REGNO (reg2) + 1) + return loongarch_load_store_pair_p_1 (load_p, true, reg2, mem2, mem1); + + return false; +} + +/* Return true if REG1 and REG2 match the criteria for a movep insn. */ + +bool +loongarch_movep_target_p (rtx reg1, rtx reg2) +{ + int regno1, regno2, pair; + unsigned int i; + static const int match[8] = { + 0x00000060, /* 5, 6 */ + 0x000000a0, /* 5, 7 */ + 0x000000c0, /* 6, 7 */ + 0x00200010, /* 4, 21 */ + 0x00400010, /* 4, 22 */ + 0x00000030, /* 4, 5 */ + 0x00000050, /* 4, 6 */ + 0x00000090 /* 4, 7 */ + }; + + if (!REG_P (reg1) || !REG_P (reg2)) + return false; + + regno1 = REGNO (reg1); + regno2 = REGNO (reg2); + + if (!GP_REG_P (regno1) || !GP_REG_P (regno2)) + return false; + + pair = (1 << regno1) | (1 << regno2); + + for (i = 0; i < ARRAY_SIZE (match); i++) + if (pair == match[i]) + return true; + + return false; +} + +/* Return the size in bytes of the trampoline code, padded to + TRAMPOLINE_ALIGNMENT bits. The static chain pointer and target + function address immediately follow. */ + +int +loongarch_trampoline_code_size (void) +{ + return 4 * 4; +} + +/* Implement TARGET_TRAMPOLINE_INIT. */ + +static void +loongarch_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) +{ + rtx addr, end_addr, high, low, opcode, mem; + rtx trampoline[8]; + unsigned int i, j; + HOST_WIDE_INT end_addr_offset, static_chain_offset, target_function_offset; + + /* Work out the offsets of the pointers from the start of the + trampoline code. */ + end_addr_offset = loongarch_trampoline_code_size (); + static_chain_offset = end_addr_offset; + target_function_offset = static_chain_offset + GET_MODE_SIZE (ptr_mode); + + /* Get pointers to the beginning and end of the code block. */ + addr = force_reg (Pmode, XEXP (m_tramp, 0)); + end_addr = loongarch_force_binary (Pmode, PLUS, addr, GEN_INT (end_addr_offset)); + +#define OP(X) gen_int_mode (X, SImode) + + /* Build up the code in TRAMPOLINE. */ + i = 0; + /* + pcaddi $static_chain,0 + ld.[dw] $tmp,$static_chain,target_function_offset + ld.[dw] $static_chain,$static_chain,static_chain_offset + jirl $r0,$tmp,0 + */ + trampoline[i++] = OP (0x18000000 | (STATIC_CHAIN_REGNUM - GP_REG_FIRST)); + trampoline[i++] = OP ((ptr_mode == DImode ? 0x28c00000 : 0x28800000) + | 19 /* $t7 */ + | ((STATIC_CHAIN_REGNUM - GP_REG_FIRST) << 5) + | ((target_function_offset & 0xfff) << 10)); + trampoline[i++] = OP ((ptr_mode == DImode ? 0x28c00000 : 0x28800000) + | (STATIC_CHAIN_REGNUM - GP_REG_FIRST) + | ((STATIC_CHAIN_REGNUM - GP_REG_FIRST) << 5) + | ((static_chain_offset & 0xfff) << 10)); + trampoline[i++] = OP (0x4c000000 | (19 << 5)); +#undef OP + + for (j = 0; j < i; j++) + { + mem = adjust_address (m_tramp, SImode, j * GET_MODE_SIZE (SImode)); + loongarch_emit_move (mem, trampoline[j]); + } + + /* Set up the static chain pointer field. */ + mem = adjust_address (m_tramp, ptr_mode, static_chain_offset); + loongarch_emit_move (mem, chain_value); + + /* Set up the target function field. */ + mem = adjust_address (m_tramp, ptr_mode, target_function_offset); + loongarch_emit_move (mem, XEXP (DECL_RTL (fndecl), 0)); + + /* Flush the code part of the trampoline. */ + emit_insn (gen_add3_insn (end_addr, addr, GEN_INT (TRAMPOLINE_SIZE))); + emit_insn (gen_clear_cache (addr, end_addr)); +} + + +/* Implement TARGET_SHIFT_TRUNCATION_MASK. We want to keep the default + behavior of TARGET_SHIFT_TRUNCATION_MASK for non-vector modes even + when TARGET_LOONGSON_MMI is true. */ + +static unsigned HOST_WIDE_INT +loongarch_shift_truncation_mask (machine_mode mode) +{ + return GET_MODE_BITSIZE (mode) - 1; +} + +/* Implement TARGET_PREPARE_PCH_SAVE. */ + +static void +loongarch_prepare_pch_save (void) +{ + loongarch_set_compression_mode (0); +} + +/* Generate or test for an insn that supports a constant permutation. */ + +#define MAX_VECT_LEN 32 + +struct expand_vec_perm_d +{ + rtx target, op0, op1; + unsigned char perm[MAX_VECT_LEN]; + machine_mode vmode; + unsigned char nelt; + bool one_vector_p; + bool testing_p; +}; + +/* Construct (set target (vec_select op0 (parallel perm))) and + return true if that's a valid instruction in the active ISA. */ + +static bool +loongarch_expand_vselect (rtx target, rtx op0, + const unsigned char *perm, unsigned nelt) +{ + rtx rperm[MAX_VECT_LEN], x; + rtx_insn *insn; + unsigned i; + + for (i = 0; i < nelt; ++i) + rperm[i] = GEN_INT (perm[i]); + + x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm)); + x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x); + x = gen_rtx_SET (target, x); + + insn = emit_insn (x); + if (recog_memoized (insn) < 0) + { + remove_insn (insn); + return false; + } + return true; +} + +/* Similar, but generate a vec_concat from op0 and op1 as well. */ + +static bool +loongarch_expand_vselect_vconcat (rtx target, rtx op0, rtx op1, + const unsigned char *perm, unsigned nelt) +{ + machine_mode v2mode; + rtx x; + + if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode)) + return false; + x = gen_rtx_VEC_CONCAT (v2mode, op0, op1); + return loongarch_expand_vselect (target, x, perm, nelt); +} + +static bool +loongarch_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) +{ + unsigned int i, nelt = d->nelt; + unsigned char perm2[MAX_VECT_LEN]; + + if (d->one_vector_p) + { + /* Try interleave with alternating operands. */ + memcpy (perm2, d->perm, sizeof(perm2)); + for (i = 1; i < nelt; i += 2) + perm2[i] += nelt; + if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1, perm2, nelt)) + return true; + } + else + { + if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1, + d->perm, nelt)) + return true; + + /* Try again with swapped operands. */ + for (i = 0; i < nelt; ++i) + perm2[i] = (d->perm[i] + nelt) & (2 * nelt - 1); + if (loongarch_expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt)) + return true; + } + + return false; +} + +/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */ + +static bool +loongarch_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, + rtx op1, const vec_perm_indices &sel) +{ + struct expand_vec_perm_d d; + int i, nelt, which; + unsigned char orig_perm[MAX_VECT_LEN]; + bool ok; + + d.target = target; + d.op0 = op0; + d.op1 = op1; + + d.vmode = vmode; + gcc_assert (VECTOR_MODE_P (vmode)); + d.nelt = nelt = GET_MODE_NUNITS (vmode); + d.testing_p = !target; + + /* This is overly conservative, but ensures we don't get an + uninitialized warning on ORIG_PERM. */ + memset (orig_perm, 0, MAX_VECT_LEN); + for (i = which = 0; i < nelt; ++i) + { + int ei = sel[i] & (2 * nelt - 1); + which |= (ei < nelt ? 1 : 2); + orig_perm[i] = ei; + } + memcpy (d.perm, orig_perm, MAX_VECT_LEN); + + switch (which) + { + default: + gcc_unreachable(); + + case 3: + d.one_vector_p = false; + if (d.testing_p || !rtx_equal_p (d.op0, d.op1)) + break; + /* FALLTHRU */ + + case 2: + for (i = 0; i < nelt; ++i) + d.perm[i] &= nelt - 1; + d.op0 = d.op1; + d.one_vector_p = true; + break; + + case 1: + d.op1 = d.op0; + d.one_vector_p = true; + break; + } + + if (d.testing_p) + { + d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); + d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); + if (!d.one_vector_p) + d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); + + start_sequence (); + ok = loongarch_expand_vec_perm_const_1 (&d); + end_sequence (); + return ok; + } + + ok = loongarch_expand_vec_perm_const_1 (&d); + + /* If we were given a two-vector permutation which just happened to + have both input vectors equal, we folded this into a one-vector + permutation. There are several loongson patterns that are matched + via direct vec_select+vec_concat expansion, but we do not have + support in loongarch_expand_vec_perm_const_1 to guess the adjustment + that should be made for a single operand. Just try again with + the original permutation. */ + if (!ok && which == 3) + { + d.op0 = op0; + d.op1 = op1; + d.one_vector_p = false; + memcpy (d.perm, orig_perm, MAX_VECT_LEN); + ok = loongarch_expand_vec_perm_const_1 (&d); + } + + return ok; +} + +/* Implement TARGET_SCHED_REASSOCIATION_WIDTH. */ + +static int +loongarch_sched_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED, + machine_mode mode) +{ + return 1; +} + +/* A subroutine of loongarch_expand_vec_init, match constant vector elements. */ + +static inline bool +loongarch_constant_elt_p (rtx x) +{ + return CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE; +} + + +/* Implement HARD_REGNO_CALLER_SAVE_MODE. */ + +machine_mode +loongarch_hard_regno_caller_save_mode (unsigned int regno, + unsigned int nregs, + machine_mode mode) +{ + /* For performance, avoid saving/restoring upper parts of a register + by returning MODE as save mode when the mode is known. */ + if (mode == VOIDmode) + return choose_hard_reg_mode (regno, nregs, NULL); + else + return mode; +} + +/* Implement TARGET_CASE_VALUES_THRESHOLD. */ + +unsigned int +loongarch_case_values_threshold (void) +{ + return default_case_values_threshold (); +} + + +/* Implement TARGET_SPILL_CLASS. */ + +static reg_class_t +loongarch_spill_class (reg_class_t rclass ATTRIBUTE_UNUSED, + machine_mode mode ATTRIBUTE_UNUSED) +{ + return NO_REGS; +} + +/* Implement TARGET_LRA_P. */ + +static bool +loongarch_lra_p (void) +{ + return loongarch_lra_flag; +} + +/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS. */ + +static reg_class_t +loongarch_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class, + reg_class_t best_class ATTRIBUTE_UNUSED) +{ + /* LRA will allocate an FPR for an integer mode pseudo instead of spilling + to memory if an FPR is present in the allocno class. It is rare that + we actually need to place an integer mode value in an FPR so where + possible limit the allocation to GR_REGS. This will slightly pessimize + code that involves integer to/from float conversions as these will have + to reload into FPRs in LRA. Such reloads are sometimes eliminated and + sometimes only partially eliminated. We choose to take this penalty + in order to eliminate usage of FPRs in code that does not use floating + point data. + + This change has a similar effect to increasing the cost of FPR->GPR + register moves for integer modes so that they are higher than the cost + of memory but changing the allocno class is more reliable. + + This is also similar to forbidding integer mode values in FPRs entirely + but this would lead to an inconsistency in the integer to/from float + instructions that say integer mode values must be placed in FPRs. */ + if (INTEGRAL_MODE_P (PSEUDO_REGNO_MODE (regno)) && allocno_class == ALL_REGS) + return GR_REGS; + return allocno_class; +} + +/* Implement TARGET_PROMOTE_FUNCTION_MODE */ + +/* This function is equivalent to default_promote_function_mode_always_promote + except that it returns a promoted mode even if type is NULL_TREE. This is + needed by libcalls which have no type (only a mode) such as fixed conversion + routines that take a signed or unsigned char/short argument and convert it + to a fixed type. */ + +static machine_mode +loongarch_promote_function_mode (const_tree type ATTRIBUTE_UNUSED, + machine_mode mode, + int *punsignedp ATTRIBUTE_UNUSED, + const_tree fntype ATTRIBUTE_UNUSED, + int for_return ATTRIBUTE_UNUSED) +{ + int unsignedp; + + if (type != NULL_TREE) + return promote_mode (type, mode, punsignedp); + + unsignedp = *punsignedp; + PROMOTE_MODE (mode, unsignedp, type); + *punsignedp = unsignedp; + return mode; +} + +/* Implement TARGET_TRULY_NOOP_TRUNCATION. */ + +static bool +loongarch_truly_noop_truncation (poly_uint64 outprec, poly_uint64 inprec) +{ + return !TARGET_64BIT || inprec <= 32 || outprec > 32; +} + +/* Implement TARGET_CONSTANT_ALIGNMENT. */ + +static HOST_WIDE_INT +loongarch_constant_alignment (const_tree exp, HOST_WIDE_INT align) +{ + if (TREE_CODE (exp) == STRING_CST || TREE_CODE (exp) == CONSTRUCTOR) + return MAX (align, BITS_PER_WORD); + return align; +} + +/* Implement TARGET_STARTING_FRAME_OFFSET. See loongarch_compute_frame_info + for details about the frame layout. */ + +static HOST_WIDE_INT +loongarch_starting_frame_offset (void) +{ + if (FRAME_GROWS_DOWNWARD) + return 0; + return crtl->outgoing_args_size; +} + + +/* Initialize the GCC target structure. */ +#undef TARGET_ASM_ALIGNED_HI_OP +#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" +#undef TARGET_ASM_ALIGNED_SI_OP +#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" +#undef TARGET_ASM_ALIGNED_DI_OP +#define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t" + +#undef TARGET_OPTION_OVERRIDE +#define TARGET_OPTION_OVERRIDE loongarch_option_override + +#undef TARGET_LEGITIMIZE_ADDRESS +#define TARGET_LEGITIMIZE_ADDRESS loongarch_legitimize_address + +#undef TARGET_ASM_FUNCTION_PROLOGUE +#define TARGET_ASM_FUNCTION_PROLOGUE loongarch_output_function_prologue +#undef TARGET_ASM_FUNCTION_EPILOGUE +#define TARGET_ASM_FUNCTION_EPILOGUE loongarch_output_function_epilogue +#undef TARGET_ASM_SELECT_RTX_SECTION +#define TARGET_ASM_SELECT_RTX_SECTION loongarch_select_rtx_section +#undef TARGET_ASM_FUNCTION_RODATA_SECTION +#define TARGET_ASM_FUNCTION_RODATA_SECTION loongarch_function_rodata_section + +#undef TARGET_SCHED_INIT +#define TARGET_SCHED_INIT loongarch_sched_init +#undef TARGET_SCHED_REORDER +#define TARGET_SCHED_REORDER loongarch_sched_reorder +#undef TARGET_SCHED_REORDER2 +#define TARGET_SCHED_REORDER2 loongarch_sched_reorder2 +#undef TARGET_SCHED_VARIABLE_ISSUE +#define TARGET_SCHED_VARIABLE_ISSUE loongarch_variable_issue +#undef TARGET_SCHED_ADJUST_COST +#define TARGET_SCHED_ADJUST_COST loongarch_adjust_cost +#undef TARGET_SCHED_ISSUE_RATE +#define TARGET_SCHED_ISSUE_RATE loongarch_issue_rate +#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD +#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ + loongarch_multipass_dfa_lookahead +#undef TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P +#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P \ + loongarch_small_register_classes_for_mode_p + +#undef TARGET_FUNCTION_OK_FOR_SIBCALL +#define TARGET_FUNCTION_OK_FOR_SIBCALL loongarch_function_ok_for_sibcall + +#undef TARGET_MERGE_DECL_ATTRIBUTES +#define TARGET_MERGE_DECL_ATTRIBUTES loongarch_merge_decl_attributes +#undef TARGET_CAN_INLINE_P +#define TARGET_CAN_INLINE_P loongarch_can_inline_p +#undef TARGET_SET_CURRENT_FUNCTION +#define TARGET_SET_CURRENT_FUNCTION loongarch_set_current_function + +#undef TARGET_VALID_POINTER_MODE +#define TARGET_VALID_POINTER_MODE loongarch_valid_pointer_mode +#undef TARGET_REGISTER_MOVE_COST +#define TARGET_REGISTER_MOVE_COST loongarch_register_move_cost +#undef TARGET_MEMORY_MOVE_COST +#define TARGET_MEMORY_MOVE_COST loongarch_memory_move_cost +#undef TARGET_RTX_COSTS +#define TARGET_RTX_COSTS loongarch_rtx_costs +#undef TARGET_ADDRESS_COST +#define TARGET_ADDRESS_COST loongarch_address_cost + +#undef TARGET_IN_SMALL_DATA_P +#define TARGET_IN_SMALL_DATA_P loongarch_in_small_data_p + +#undef TARGET_MACHINE_DEPENDENT_REORG +#define TARGET_MACHINE_DEPENDENT_REORG loongarch_reorg + +#undef TARGET_PREFERRED_RELOAD_CLASS +#define TARGET_PREFERRED_RELOAD_CLASS loongarch_preferred_reload_class + +#undef TARGET_EXPAND_TO_RTL_HOOK +#define TARGET_EXPAND_TO_RTL_HOOK loongarch_expand_to_rtl_hook +#undef TARGET_ASM_FILE_START +#define TARGET_ASM_FILE_START loongarch_file_start +#undef TARGET_ASM_FILE_START_FILE_DIRECTIVE +#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true +#undef TARGET_ASM_CODE_END +#define TARGET_ASM_CODE_END loongarch_code_end + +#undef TARGET_EXPAND_BUILTIN_VA_START +#define TARGET_EXPAND_BUILTIN_VA_START loongarch_va_start + +#undef TARGET_PROMOTE_FUNCTION_MODE +#define TARGET_PROMOTE_FUNCTION_MODE loongarch_promote_function_mode +#undef TARGET_RETURN_IN_MEMORY +#define TARGET_RETURN_IN_MEMORY n_loongarch_return_in_memory + +#undef TARGET_ASM_OUTPUT_MI_THUNK +#define TARGET_ASM_OUTPUT_MI_THUNK loongarch_output_mi_thunk +#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK +#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true + +#undef TARGET_PRINT_OPERAND +#define TARGET_PRINT_OPERAND loongarch_print_operand +#undef TARGET_PRINT_OPERAND_ADDRESS +#define TARGET_PRINT_OPERAND_ADDRESS loongarch_print_operand_address +#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P +#define TARGET_PRINT_OPERAND_PUNCT_VALID_P loongarch_print_operand_punct_valid_p + +#undef TARGET_SETUP_INCOMING_VARARGS +#define TARGET_SETUP_INCOMING_VARARGS n_loongarch_setup_incoming_varargs +#undef TARGET_STRICT_ARGUMENT_NAMING +#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true +#undef TARGET_MUST_PASS_IN_STACK +#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size +#undef TARGET_PASS_BY_REFERENCE +#define TARGET_PASS_BY_REFERENCE n_loongarch_pass_by_reference +#undef TARGET_ARG_PARTIAL_BYTES +#define TARGET_ARG_PARTIAL_BYTES n_loongarch_arg_partial_bytes +#undef TARGET_FUNCTION_ARG +#define TARGET_FUNCTION_ARG n_loongarch_function_arg +#undef TARGET_FUNCTION_ARG_ADVANCE +#define TARGET_FUNCTION_ARG_ADVANCE n_loongarch_function_arg_advance +#undef TARGET_FUNCTION_ARG_BOUNDARY +#define TARGET_FUNCTION_ARG_BOUNDARY n_loongarch_function_arg_boundary + +#undef TARGET_SCALAR_MODE_SUPPORTED_P +#define TARGET_SCALAR_MODE_SUPPORTED_P loongarch_scalar_mode_supported_p + +#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE +#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE loongarch_preferred_simd_mode + +#undef TARGET_INIT_BUILTINS +#define TARGET_INIT_BUILTINS loongarch_init_builtins +#undef TARGET_BUILTIN_DECL +#define TARGET_BUILTIN_DECL loongarch_builtin_decl +#undef TARGET_EXPAND_BUILTIN +#define TARGET_EXPAND_BUILTIN loongarch_expand_builtin + +#undef TARGET_HAVE_TLS +#define TARGET_HAVE_TLS HAVE_AS_TLS + +#undef TARGET_CANNOT_FORCE_CONST_MEM +#define TARGET_CANNOT_FORCE_CONST_MEM loongarch_cannot_force_const_mem + +#undef TARGET_LEGITIMATE_CONSTANT_P +#define TARGET_LEGITIMATE_CONSTANT_P loongarch_legitimate_constant_p + +#undef TARGET_ENCODE_SECTION_INFO +#define TARGET_ENCODE_SECTION_INFO loongarch_encode_section_info + +#undef TARGET_ATTRIBUTE_TABLE +#define TARGET_ATTRIBUTE_TABLE loongarch_attribute_table +/* All our function attributes are related to how out-of-line copies should + be compiled or called. They don't in themselves prevent inlining. */ +#undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P +#define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true + +#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P +#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true +#undef TARGET_USE_ANCHORS_FOR_SYMBOL_P +#define TARGET_USE_ANCHORS_FOR_SYMBOL_P loongarch_use_anchors_for_symbol_p + +#undef TARGET_COMP_TYPE_ATTRIBUTES +#define TARGET_COMP_TYPE_ATTRIBUTES loongarch_comp_type_attributes + +#ifdef HAVE_AS_DTPRELWORD +#undef TARGET_ASM_OUTPUT_DWARF_DTPREL +#define TARGET_ASM_OUTPUT_DWARF_DTPREL loongarch_output_dwarf_dtprel +#endif +#undef TARGET_DWARF_REGISTER_SPAN +#define TARGET_DWARF_REGISTER_SPAN loongarch_dwarf_register_span +#undef TARGET_DWARF_FRAME_REG_MODE +#define TARGET_DWARF_FRAME_REG_MODE loongarch_dwarf_frame_reg_mode + +#undef TARGET_LEGITIMATE_ADDRESS_P +#define TARGET_LEGITIMATE_ADDRESS_P loongarch_legitimate_address_p + +#undef TARGET_FRAME_POINTER_REQUIRED +#define TARGET_FRAME_POINTER_REQUIRED loongarch_frame_pointer_required + +#undef TARGET_CAN_ELIMINATE +#define TARGET_CAN_ELIMINATE loongarch_can_eliminate + +#undef TARGET_CONDITIONAL_REGISTER_USAGE +#define TARGET_CONDITIONAL_REGISTER_USAGE loongarch_conditional_register_usage + +#undef TARGET_TRAMPOLINE_INIT +#define TARGET_TRAMPOLINE_INIT loongarch_trampoline_init + +#undef TARGET_ASM_OUTPUT_SOURCE_FILENAME +#define TARGET_ASM_OUTPUT_SOURCE_FILENAME loongarch_output_filename + +#undef TARGET_SHIFT_TRUNCATION_MASK +#define TARGET_SHIFT_TRUNCATION_MASK loongarch_shift_truncation_mask + +#undef TARGET_PREPARE_PCH_SAVE +#define TARGET_PREPARE_PCH_SAVE loongarch_prepare_pch_save + +#undef TARGET_VECTORIZE_VEC_PERM_CONST +#define TARGET_VECTORIZE_VEC_PERM_CONST loongarch_vectorize_vec_perm_const + +#undef TARGET_SCHED_REASSOCIATION_WIDTH +#define TARGET_SCHED_REASSOCIATION_WIDTH loongarch_sched_reassociation_width + +#undef TARGET_CASE_VALUES_THRESHOLD +#define TARGET_CASE_VALUES_THRESHOLD loongarch_case_values_threshold + +#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV +#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV loongarch_atomic_assign_expand_fenv + +#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS +#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true + +#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P +#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \ + loongarch_use_by_pieces_infrastructure_p + +#undef TARGET_SPILL_CLASS +#define TARGET_SPILL_CLASS loongarch_spill_class +#undef TARGET_LRA_P +#define TARGET_LRA_P loongarch_lra_p +#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS +#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS loongarch_ira_change_pseudo_allocno_class + +#undef TARGET_HARD_REGNO_SCRATCH_OK +#define TARGET_HARD_REGNO_SCRATCH_OK loongarch_hard_regno_scratch_ok + +#undef TARGET_HARD_REGNO_NREGS +#define TARGET_HARD_REGNO_NREGS loongarch_hard_regno_nregs +#undef TARGET_HARD_REGNO_MODE_OK +#define TARGET_HARD_REGNO_MODE_OK loongarch_hard_regno_mode_ok + +#undef TARGET_MODES_TIEABLE_P +#define TARGET_MODES_TIEABLE_P loongarch_modes_tieable_p + +#undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED +#define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \ + loongarch_hard_regno_call_part_clobbered + +#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS +#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2 + +#undef TARGET_SECONDARY_MEMORY_NEEDED +#define TARGET_SECONDARY_MEMORY_NEEDED loongarch_secondary_memory_needed + +#undef TARGET_CAN_CHANGE_MODE_CLASS +#define TARGET_CAN_CHANGE_MODE_CLASS loongarch_can_change_mode_class + +#undef TARGET_TRULY_NOOP_TRUNCATION +#define TARGET_TRULY_NOOP_TRUNCATION loongarch_truly_noop_truncation + +#undef TARGET_CONSTANT_ALIGNMENT +#define TARGET_CONSTANT_ALIGNMENT loongarch_constant_alignment + +#undef TARGET_STARTING_FRAME_OFFSET +#define TARGET_STARTING_FRAME_OFFSET loongarch_starting_frame_offset + +struct gcc_target targetm = TARGET_INITIALIZER; + +#include "gt-loongarch.h" diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h new file mode 100644 index 0000000000000000000000000000000000000000..d388f8bc31ee6d063be77b54dacc4b7a8ad8169c --- /dev/null +++ b/gcc/config/loongarch/loongarch.h @@ -0,0 +1,2076 @@ +/* Definitions of target machine for GNU compiler. LARCH version. + Copyright (C) 1989-2018 Free Software Foundation, Inc. + Contributed by A. Lichnewsky (lich@inria.inria.fr). + Changed by Michael Meissner (meissner@osf.org). + 64-bit r4000 support by Ian Lance Taylor (ian@cygnus.com) and + Brendan Eich (brendan@microunity.com). + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + + +#include "config/vxworks-dummy.h" + +#ifdef GENERATOR_FILE +/* This is used in some insn conditions, so needs to be declared, but + does not need to be defined. */ +extern int target_flags_explicit; +#endif + +/* LARCH external variables defined in loongarch.c. */ + +/* Which ABI to use. ABILP32 (original 32, or o32), ABILPX32 (n32), + ABILP64 (n64) are all defined by SGI. */ + +#define ABILP32 0 +#define ABILPX32 1 +#define ABILP64 2 + +/* Information about one recognized processor. Defined here for the + benefit of TARGET_CPU_CPP_BUILTINS. */ +struct loongarch_cpu_info { + /* The 'canonical' name of the processor as far as GCC is concerned. + It's typically a manufacturer's prefix followed by a numerical + designation. It should be lowercase. */ + const char *name; + + /* The internal processor number that most closely matches this + entry. Several processors can have the same value, if there's no + difference between them from GCC's point of view. */ + enum processor cpu; + + /* The ISA level that the processor implements. */ + int isa; + + /* A mask of PTF_* values. */ + unsigned int tune_flags; +}; + +#include "config/loongarch/loongarch-opts.h" + +/* Macros to silence warnings about numbers being signed in traditional + C and unsigned in ISO C when compiled on 32-bit hosts. */ + +#define BITMASK_HIGH (((unsigned long)1) << 31) /* 0x80000000 */ +#define BITMASK_UPPER16 ((unsigned long)0xffff << 16) /* 0xffff0000 */ +#define BITMASK_LOWER16 ((unsigned long)0xffff) /* 0x0000ffff */ + + +/* Run-time compilation parameters selecting different hardware subsets. */ + +/* True if we are generating position-independent VxWorks RTP code. */ +#define TARGET_RTP_PIC (TARGET_VXWORKS_RTP && flag_pic) + +/* True if we can optimize sibling calls. For simplicity, we only + handle cases in which call_insn_operand will reject invalid + sibcall addresses. There are two cases in which this isn't true: + + - TARGET_USE_GOT && !TARGET_EXPLICIT_RELOCS. call_insn_operand + accepts global constants, but all sibcalls must be indirect. */ +#define TARGET_SIBCALLS (1) + +/* True if we can use the J and JAL instructions. */ +#define TARGET_ABSOLUTE_JUMPS (!flag_pic) + +/* True if the output must have a writable .eh_frame. + See ASM_PREFERRED_EH_DATA_FORMAT for details. */ +#ifdef HAVE_LD_PERSONALITY_RELAXATION +#define TARGET_WRITABLE_EH_FRAME 0 +#else +#define TARGET_WRITABLE_EH_FRAME (flag_pic && TARGET_SHARED) +#endif + + +/* ISA has LSA available. */ +#define ISA_HAS_LSA (1) + +/* ISA has DLSA available. */ +#define ISA_HAS_DLSA (TARGET_64BIT) + +/* Architecture target defines. */ +#define TARGET_LOONGARCH64 (loongarch_arch == PROCESSOR_LOONGARCH64) +#define TUNE_LOONGARCH64 (loongarch_tune == PROCESSOR_LOONGARCH64) +#define TARGET_GS464V (loongarch_arch == PROCESSOR_GS464V) +#define TUNE_GS464V (loongarch_tune == PROCESSOR_GS464V) +/* True if the pre-reload scheduler should try to create chains of + multiply-add or multiply-subtract instructions. For example, + suppose we have: + + t1 = a * b + t2 = t1 + c * d + t3 = e * f + t4 = t3 - g * h + + t1 will have a higher priority than t2 and t3 will have a higher + priority than t4. However, before reload, there is no dependence + between t1 and t3, and they can often have similar priorities. + The scheduler will then tend to prefer: + + t1 = a * b + t3 = e * f + t2 = t1 + c * d + t4 = t3 - g * h + + which stops us from making full use of macc/madd-style instructions. + This sort of situation occurs frequently in Fourier transforms and + in unrolled loops. + + To counter this, the TUNE_MACC_CHAINS code will reorder the ready + queue so that chained multiply-add and multiply-subtract instructions + appear ahead of any other instruction that is likely to clobber lo. + In the example above, if t2 and t3 become ready at the same time, + the code ensures that t2 is scheduled first. + + Multiply-accumulate instructions are a bigger win for some targets + than others, so this macro is defined on an opt-in basis. */ +#define TUNE_MACC_CHAINS 0 + +#define TARGET_OLDABI (loongarch_abi == ABILP32) +#define TARGET_NEWABI (loongarch_abi == ABILPX32 || loongarch_abi == ABILP64) + +/* TARGET_HARD_FLOAT and TARGET_SOFT_FLOAT reflect whether the FPU is + directly accessible, while the command-line options select + TARGET_HARD_FLOAT_ABI and TARGET_SOFT_FLOAT_ABI to reflect the ABI + in use. */ +#define TARGET_HARD_FLOAT (TARGET_HARD_FLOAT_ABI) +#define TARGET_SOFT_FLOAT (TARGET_SOFT_FLOAT_ABI) + +/* TARGET_FLOAT64 represents -mfp64 represents + -mfpxx, derive TARGET_FLOAT32 to represent -mfp32. */ +#define TARGET_FLOAT32 (!TARGET_FLOAT64) + +/* False if SC acts as a memory barrier with respect to itself, + otherwise a SYNC will be emitted after SC for atomic operations + that require ordering between the SC and following loads and + stores. It does not tell anything about ordering of loads and + stores prior to and following the SC, only about the SC itself and + those loads and stores follow it. */ +#define TARGET_SYNC_AFTER_SC (1) + +/* Define preprocessor macros for the -march and -mtune options. + PREFIX is either _LARCH_ARCH or _LARCH_TUNE, INFO is the selected + processor. If INFO's canonical name is "foo", define PREFIX to + be "foo", and define an additional macro PREFIX_FOO. */ +#define LARCH_CPP_SET_PROCESSOR(PREFIX, INFO) \ + do \ + { \ + char *macro, *p; \ + \ + macro = concat ((PREFIX), "_", (INFO)->name, NULL); \ + for (p = macro; *p != 0; p++) \ + if (*p == '+') \ + *p = 'P'; \ + else \ + *p = TOUPPER (*p); \ + \ + builtin_define (macro); \ + builtin_define_with_value ((PREFIX), (INFO)->name, 1); \ + free (macro); \ + } \ + while (0) + +/* Target CPU builtins. */ +#define TARGET_CPU_CPP_BUILTINS() loongarch_cpu_cpp_builtins (pfile) + +/* Target CPU versions for D. */ +#define TARGET_D_CPU_VERSIONS loongarch_d_target_versions + +/* Default target_flags if no switches are specified */ + +#ifndef TARGET_DEFAULT +#define TARGET_DEFAULT 0 +#endif + +#ifndef TARGET_CPU_DEFAULT +#define TARGET_CPU_DEFAULT 0 +#endif + +#ifndef LARCH_CPU_STRING_DEFAULT +#define LARCH_CPU_STRING_DEFAULT "gs464v" +#endif +#ifndef TARGET_ENDIAN_DEFAULT +#define TARGET_ENDIAN_DEFAULT MASK_BIG_ENDIAN +#endif + +#ifdef IN_LIBGCC2 +#undef TARGET_64BIT +/* Make this compile time constant for libgcc2 */ +#ifdef __loongarch64 +#define TARGET_64BIT 1 +#else +#define TARGET_64BIT 0 +#endif +#endif /* IN_LIBGCC2 */ + +#define TARGET_LIBGCC_SDATA_SECTION ".sdata" + +#ifndef MULTILIB_ENDIAN_DEFAULT +#if TARGET_ENDIAN_DEFAULT == 0 +#define MULTILIB_ENDIAN_DEFAULT "EL" +#else +#define MULTILIB_ENDIAN_DEFAULT "EB" +#endif +#endif + +#ifndef MULTILIB_ISA_DEFAULT +#define MULTILIB_ISA_DEFAULT "loongarch64" +#endif + +#ifndef LARCH_ABI_DEFAULT +#define LARCH_ABI_DEFAULT ABILP32 +#endif + +/* Use the most portable ABI flag for the ASM specs. */ + +#if LARCH_ABI_DEFAULT == ABILP32 +#define MULTILIB_ABI_DEFAULT "mabi=lp32" +#elif LARCH_ABI_DEFAULT == ABILP64 +#define MULTILIB_ABI_DEFAULT "mabi=lp64" +#endif + +#ifndef MULTILIB_DEFAULTS +#define MULTILIB_DEFAULTS \ + { MULTILIB_ENDIAN_DEFAULT, MULTILIB_ISA_DEFAULT, MULTILIB_ABI_DEFAULT } +#endif + +/* We must pass -EL to the linker by default for little endian embedded + targets using linker scripts with a OUTPUT_FORMAT line. Otherwise, the + linker will default to using big-endian output files. The OUTPUT_FORMAT + line must be in the linker script, otherwise -EB/-EL will not work. */ + +// #ifndef ENDIAN_SPEC +// #if TARGET_ENDIAN_DEFAULT == 0 +// #define ENDIAN_SPEC "%{!EB:%{!meb:-EL}} %{EB|meb:-EB}" +// #else +// #define ENDIAN_SPEC "%{!EL:%{!mel:-EB}} %{EL|mel:-EL}" +// #endif +// #endif + +/* A spec condition that matches all -loongarch arguments. */ + +#define LARCH_ISA_LEVEL_OPTION_SPEC \ + "loongarch" + +/* A spec condition that matches all architecture arguments. */ + +#define LARCH_ARCH_OPTION_SPEC \ + LARCH_ISA_LEVEL_OPTION_SPEC "|march=*" + +/* A spec that infers a -loongarch argument from an -march argument. */ + +#define LARCH_ISA_LEVEL_SPEC \ + "%{" LARCH_ISA_LEVEL_OPTION_SPEC ":;:}" + +/* A spec that injects the default multilib ISA if no architecture is + specified. */ + +#define LARCH_DEFAULT_ISA_LEVEL_SPEC \ + "%{" LARCH_ISA_LEVEL_OPTION_SPEC ":;: \ + %{!march=*: -" MULTILIB_ISA_DEFAULT "}}" + +/* A spec that infers a -mhard-float or -msoft-float setting from an + -march argument. Note that soft-float and hard-float code are not + link-compatible. */ + +#define LARCH_ARCH_FLOAT_SPEC \ + "%{mhard-float|msoft-float|mno-float|march=loongarch*:; \ + march=vr41*|march=m4k|march=4k*|march=24kc|march=24kec \ + |march=34kc|march=34kn|march=74kc|march=1004kc|march=5kc \ + |march=m14k*|march=m5101|march=octeon|march=xlr: -msoft-float; \ + march=*: -mhard-float}" + +/* A spec condition that matches 32-bit options. It only works if + LARCH_ISA_LEVEL_SPEC has been applied. */ + +#define LARCH_32BIT_OPTION_SPEC \ + "loongarch1|loongarch2|loongarch32*|mgp32" + +#if (LARCH_ABI_DEFAULT == ABILPX32 \ + || LARCH_ABI_DEFAULT == ABILP64) +#define OPT_ARCH64 "mabi=32|mgp32:;" +#define OPT_ARCH32 "mabi=32|mgp32" +#else +#define OPT_ARCH64 "mabi=o64|mabi=n32|mabi=64|mgp64" +#define OPT_ARCH32 "mabi=o64|mabi=n32|mabi=64|mgp64:;" +#endif + +/* Support for a compile-time default CPU, et cetera. The rules are: + --with-arch is ignored if -march is specified or a -loongarch is specified + ; likewise --with-arch-32 and --with-arch-64. + --with-tune is ignored if -mtune is specified; likewise + --with-tune-32 and --with-tune-64. + --with-abi is ignored if -mabi is specified. + --with-float is ignored if -mhard-float or -msoft-float are + specified. + --with-fpu is ignored if -msoft-float, -msingle-float or -mdouble-float are + specified. + --with-fp-32 is ignored if -msoft-float, -msingle-float or -mfp are + specified. + --with-divide is ignored if -mdivide-traps or -mdivide-breaks are + specified. */ +#define OPTION_DEFAULT_SPECS \ + {"arch", "%{" LARCH_ARCH_OPTION_SPEC ":;: -march=%(VALUE)}" }, \ + {"arch_32", "%{" OPT_ARCH32 ":%{" LARCH_ARCH_OPTION_SPEC ":;: -march=%(VALUE)}}" }, \ + {"arch_64", "%{" OPT_ARCH64 ":%{" LARCH_ARCH_OPTION_SPEC ":;: -march=%(VALUE)}}" }, \ + {"tune", "%{!mtune=*:-mtune=%(VALUE)}" }, \ + {"tune_32", "%{" OPT_ARCH32 ":%{!mtune=*:-mtune=%(VALUE)}}" }, \ + {"tune_64", "%{" OPT_ARCH64 ":%{!mtune=*:-mtune=%(VALUE)}}" }, \ + {"abi", "%{!mabi=*:-mabi=%(VALUE)}" }, \ + {"float", "%{!msoft-float:%{!mhard-float:-m%(VALUE)-float}}" }, \ + {"fpu", "%{!msoft-float:%{!msingle-float:%{!mdouble-float:-m%(VALUE)-float}}}" }, \ + {"fp_32", "%{" OPT_ARCH32 \ + ":%{!msoft-float:%{!msingle-float:%{!mfp*:-mfp%(VALUE)}}}}}" }, \ + {"divide", "%{!mdivide-traps:%{!mdivide-breaks:-mdivide-%(VALUE)}}" }, \ + {"fix-loongson3-llsc", "%{!mfix-loongson3-llsc: \ + %{!mno-fix-loongson3-llsc:-m%(VALUE)}}" } + + +/* A spec that infers the: + -mlvz setting from a -march=gs464v argument. */ +#define BASE_DRIVER_SELF_SPECS \ + LARCH_ASE_LVZ_SPEC + +#define LARCH_ASE_LVZ_SPEC \ + "%{!mno-lvz: \ + %{march=gs464v: -mlvz}}" + +#define DRIVER_SELF_SPECS \ + BASE_DRIVER_SELF_SPECS + +/* from N_LARCH */ +#define ABI_SPEC \ + "%{mabi=lp32:32}" \ + "%{mabi=lp64:64}" \ + +#define STARTFILE_PREFIX_SPEC \ + "/lib" ABI_SPEC "/ " \ + "/usr/lib" ABI_SPEC "/ " \ + "/lib/ " \ + "/usr/lib/ " + +/* This definition replaces the formerly used 'm' constraint with a + different constraint letter in order to avoid changing semantics of + the 'm' constraint when accepting new address formats in + TARGET_LEGITIMATE_ADDRESS_P. The constraint letter defined here + must not be used in insn definitions or inline assemblies. */ +#define TARGET_MEM_CONSTRAINT 'w' + +/* Likewise for 32-bit regs. */ +#define ABI_NEEDS_32BIT_REGS (loongarch_abi == ABILP32) + +/* True if the file format uses 64-bit symbols. At present, this is + only true for n64, which uses 64-bit ELF. */ +#define FILE_HAS_64BIT_SYMBOLS (loongarch_abi == ABILP64) + +/* True if symbols are 64 bits wide. This is usually determined by + the ABI's file format, but it can be overridden by -msym32. Note that + overriding the size with -msym32 changes the ABI of relocatable objects, + although it doesn't change the ABI of a fully-linked object. */ +#define ABI_HAS_64BIT_SYMBOLS (FILE_HAS_64BIT_SYMBOLS \ + && Pmode == DImode \ + && !TARGET_SYM32) + +/* ISA supports instructions DMUL, DMULU, DMUH, DMUHU. */ +#define ISA_HAS_DMUL (TARGET_64BIT) + +/* ISA has floating-point RECIP.fmt and RSQRT.fmt instructions. The + LARCH64 rev. 1 ISA says that RECIP.D and RSQRT.D are unpredictable when + doubles are stored in pairs of FPRs, so for safety's sake, we apply + this restriction to the LARCH IV ISA too. */ +#define ISA_HAS_FP_RECIP_RSQRT(MODE) \ + ((MODE) == SFmode \ + || (TARGET_FLOAT64 \ + && (MODE) == DFmode)) + +/* ISA has instructions for accessing top part of 64-bit fp regs. */ +#define ISA_HAS_MXFRH (!TARGET_FLOAT32) + +/* The LoongArch VZ is available. */ +#define ISA_HAS_LVZ (TARGET_LVZ) + +/* Tell collect what flags to pass to nm. */ +#ifndef NM_FLAGS +#define NM_FLAGS "-Bn" +#endif + + +/* SUBTARGET_ASM_DEBUGGING_SPEC handles passing debugging options to + the assembler. It may be overridden by subtargets. + + Beginning with gas 2.13, -mdebug must be passed to correctly handle + COFF debugging info. */ + +#ifndef SUBTARGET_ASM_DEBUGGING_SPEC +#define SUBTARGET_ASM_DEBUGGING_SPEC "\ +%{g} %{g0} %{g1} %{g2} %{g3} \ +%{ggdb:-g} %{ggdb0:-g0} %{ggdb1:-g1} %{ggdb2:-g2} %{ggdb3:-g3} \ +%{gstabs:-g} %{gstabs0:-g0} %{gstabs1:-g1} %{gstabs2:-g2} %{gstabs3:-g3} \ +%{gstabs+:-g} %{gstabs+0:-g0} %{gstabs+1:-g1} %{gstabs+2:-g2} %{gstabs+3:-g3}" +#endif + +/* FP_ASM_SPEC represents the floating-point options that must be passed + to the assembler when FPXX support exists. Prior to that point the + assembler could accept the options but were not required for + correctness. We only add the options when absolutely necessary + because passing -msoft-float to the assembler will cause it to reject + all hard-float instructions which may require some user code to be + updated. */ + +#ifdef HAVE_AS_DOT_MODULE +#define FP_ASM_SPEC "\ +%{mhard-float} %{msoft-float} \ +%{msingle-float} %{mdouble-float}" +#else +#define FP_ASM_SPEC +#endif + +/* SUBTARGET_ASM_SPEC is always passed to the assembler. It may be + overridden by subtargets. */ + +#ifndef SUBTARGET_ASM_SPEC +#define SUBTARGET_ASM_SPEC "" +#endif + +#undef ASM_SPEC +#define ASM_SPEC "\ +%{mabi=*} %{!mabi=*: %(asm_abi_default_spec)} \ +" +/* Extra switches sometimes passed to the linker. */ + +#ifndef LINK_SPEC +#define LINK_SPEC "" +#endif /* LINK_SPEC defined */ + + +/* Specs for the compiler proper */ + +/* SUBTARGET_CC1_SPEC is passed to the compiler proper. It may be + overridden by subtargets. */ +#ifndef SUBTARGET_CC1_SPEC +#define SUBTARGET_CC1_SPEC "" +#endif + +/* CC1_SPEC is the set of arguments to pass to the compiler proper. */ + +#undef CC1_SPEC +#define CC1_SPEC "\ +%{G*} %{EB:-meb} %{EL:-mel} %{EB:%{EL:%emay not use both -EB and -EL}} \ +%(subtarget_cc1_spec)" + +/* Preprocessor specs. */ + +/* SUBTARGET_CPP_SPEC is passed to the preprocessor. It may be + overridden by subtargets. */ +#ifndef SUBTARGET_CPP_SPEC +#define SUBTARGET_CPP_SPEC "" +#endif + +#define CPP_SPEC "%(subtarget_cpp_spec)" + +/* This macro defines names of additional specifications to put in the specs + that can be used in various specifications like CC1_SPEC. Its definition + is an initializer with a subgrouping for each command option. + + Each subgrouping contains a string constant, that defines the + specification name, and a string constant that used by the GCC driver + program. + + Do not define this macro if it does not need to do anything. */ + +#define EXTRA_SPECS \ + { "subtarget_cc1_spec", SUBTARGET_CC1_SPEC }, \ + { "subtarget_cpp_spec", SUBTARGET_CPP_SPEC }, \ + { "subtarget_asm_debugging_spec", SUBTARGET_ASM_DEBUGGING_SPEC }, \ + { "subtarget_asm_spec", SUBTARGET_ASM_SPEC }, \ + { "asm_abi_default_spec", "-" MULTILIB_ABI_DEFAULT }, \ + SUBTARGET_EXTRA_SPECS + +#ifndef SUBTARGET_EXTRA_SPECS +#define SUBTARGET_EXTRA_SPECS +#endif + +#define DBX_DEBUGGING_INFO 1 /* generate stabs (OSF/rose) */ +#define DWARF2_DEBUGGING_INFO 1 /* dwarf2 debugging info */ + +#ifndef PREFERRED_DEBUGGING_TYPE +#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG +#endif + +/* The size of DWARF addresses should be the same as the size of symbols + in the target file format. They shouldn't depend on things like -msym32, + because many DWARF consumers do not allow the mixture of address sizes + that one would then get from linking -msym32 code with -msym64 code. +*/ +#define DWARF2_ADDR_SIZE (FILE_HAS_64BIT_SYMBOLS ? 8 : 4) + +/* By default, turn on GDB extensions. */ +#define DEFAULT_GDB_EXTENSIONS 1 + +/* Registers may have a prefix which can be ignored when matching + user asm and register definitions. */ +#ifndef REGISTER_PREFIX +#define REGISTER_PREFIX "$" +#endif + +/* Local compiler-generated symbols must have a prefix that the assembler + understands. By default, this is $, although some targets (e.g., + NetBSD-ELF) need to override this. */ + +#ifndef LOCAL_LABEL_PREFIX +#define LOCAL_LABEL_PREFIX "$" +#endif + +/* By default on the loongarch, external symbols do not have an underscore + prepended, but some targets (e.g., NetBSD) require this. */ + +#ifndef USER_LABEL_PREFIX +#define USER_LABEL_PREFIX "" +#endif + +/* On Sun 4, this limit is 2048. We use 1500 to be safe, + since the length can run past this up to a continuation point. */ +#undef DBX_CONTIN_LENGTH +#define DBX_CONTIN_LENGTH 1500 + +/* How to renumber registers for dbx and gdb. */ +#define DBX_REGISTER_NUMBER(REGNO) loongarch_dbx_regno[REGNO] + +/* The mapping from gcc register number to DWARF 2 CFA column number. */ +#define DWARF_FRAME_REGNUM(REGNO) loongarch_dwarf_regno[REGNO] + +/* The DWARF 2 CFA column which tracks the return address. */ +#define DWARF_FRAME_RETURN_COLUMN RETURN_ADDR_REGNUM + +/* Before the prologue, RA lives in r1. */ +#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM) + +/* Describe how we implement __builtin_eh_return. */ +#define EH_RETURN_DATA_REGNO(N) \ + ((N) < (4) ? (N) + GP_ARG_FIRST : INVALID_REGNUM) + +#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, GP_ARG_FIRST + 4) + +#define EH_USES(N) loongarch_eh_uses (N) + +/* Offsets recorded in opcodes are a multiple of this alignment factor. + The default for this in 64-bit mode is 8, which causes problems with + SFmode register saves. */ +#define DWARF_CIE_DATA_ALIGNMENT -4 + +/* Correct the offset of automatic variables and arguments. Note that + the LARCH debug format wants all automatic variables and arguments + to be in terms of the virtual frame pointer (stack pointer before + any adjustment in the function), while the LARCH 3.0 linker wants + the frame pointer to be the stack pointer after the initial + adjustment. */ + +#define DEBUGGER_AUTO_OFFSET(X) \ + loongarch_debugger_offset (X, (HOST_WIDE_INT) 0) +#define DEBUGGER_ARG_OFFSET(OFFSET, X) \ + loongarch_debugger_offset (X, (HOST_WIDE_INT) OFFSET) + +/* Target machine storage layout */ + +#define BITS_BIG_ENDIAN 0 +#define BYTES_BIG_ENDIAN 0 +#define WORDS_BIG_ENDIAN 0 + +#define MAX_BITS_PER_WORD 64 + +/* Width of a word, in units (bytes). */ +#define UNITS_PER_WORD (TARGET_64BIT ? 8 : 4) +#ifndef IN_LIBGCC2 +#define MIN_UNITS_PER_WORD 4 +#endif + +/* For LARCH, width of a floating point register. */ +#define UNITS_PER_FPREG (TARGET_FLOAT64 ? 8 : 4) + +/* The number of consecutive floating-point registers needed to store the + largest format supported by the FPU. */ +#define MAX_FPRS_PER_FMT (TARGET_FLOAT64 || TARGET_SINGLE_FLOAT ? 1 : 2) + +/* The number of consecutive floating-point registers needed to store the + smallest format supported by the FPU. */ +#define MIN_FPRS_PER_FMT 1 + +/* The largest size of value that can be held in floating-point + registers and moved with a single instruction. */ +#define UNITS_PER_HWFPVALUE \ + (TARGET_SOFT_FLOAT_ABI ? 0 : MAX_FPRS_PER_FMT * UNITS_PER_FPREG) + +/* The largest size of value that can be held in floating-point + registers. */ +#define UNITS_PER_FPVALUE \ + (TARGET_SOFT_FLOAT_ABI ? 0 \ + : TARGET_SINGLE_FLOAT ? UNITS_PER_FPREG \ + : LONG_DOUBLE_TYPE_SIZE / BITS_PER_UNIT) + +/* The number of bytes in a double. */ +#define UNITS_PER_DOUBLE (TYPE_PRECISION (double_type_node) / BITS_PER_UNIT) + +/* Set the sizes of the core types. */ +#define SHORT_TYPE_SIZE 16 +#define INT_TYPE_SIZE 32 +#define LONG_TYPE_SIZE (TARGET_LONG64 ? 64 : 32) +#define LONG_LONG_TYPE_SIZE 64 + +#define FLOAT_TYPE_SIZE 32 +#define DOUBLE_TYPE_SIZE 64 +#define LONG_DOUBLE_TYPE_SIZE (TARGET_NEWABI ? 128 : 64) + +/* Define the sizes of fixed-point types. */ +#define SHORT_FRACT_TYPE_SIZE 8 +#define FRACT_TYPE_SIZE 16 +#define LONG_FRACT_TYPE_SIZE 32 +#define LONG_LONG_FRACT_TYPE_SIZE 64 + +#define SHORT_ACCUM_TYPE_SIZE 16 +#define ACCUM_TYPE_SIZE 32 +#define LONG_ACCUM_TYPE_SIZE 64 +/* FIXME. LONG_LONG_ACCUM_TYPE_SIZE should be 128 bits, but GCC + doesn't support 128-bit integers for LARCH32 currently. */ +#define LONG_LONG_ACCUM_TYPE_SIZE (TARGET_64BIT ? 128 : 64) + +/* long double is not a fixed mode, but the idea is that, if we + support long double, we also want a 128-bit integer type. */ +#define MAX_FIXED_MODE_SIZE LONG_DOUBLE_TYPE_SIZE + +/* Width in bits of a pointer. */ +#ifndef POINTER_SIZE +#define POINTER_SIZE ((TARGET_LONG64 && TARGET_64BIT) ? 64 : 32) +#endif + +/* Allocation boundary (in *bits*) for storing arguments in argument list. */ +#define PARM_BOUNDARY BITS_PER_WORD + +/* Allocation boundary (in *bits*) for the code of a function. */ +#define FUNCTION_BOUNDARY 32 + +/* Alignment of field after `int : 0' in a structure. */ +#define EMPTY_FIELD_BOUNDARY 32 + +/* Every structure's size must be a multiple of this. */ +/* 8 is observed right on a DECstation and on riscos 4.02. */ +#define STRUCTURE_SIZE_BOUNDARY 8 + +/* There is no point aligning anything to a rounder boundary than + LONG_DOUBLE_TYPE_SIZE. +*/ +#define BIGGEST_ALIGNMENT (LONG_DOUBLE_TYPE_SIZE) + +/* All accesses must be aligned. */ +#define STRICT_ALIGNMENT 1 + +/* Define this if you wish to imitate the way many other C compilers + handle alignment of bitfields and the structures that contain + them. + + The behavior is that the type written for a bit-field (`int', + `short', or other integer type) imposes an alignment for the + entire structure, as if the structure really did contain an + ordinary field of that type. In addition, the bit-field is placed + within the structure so that it would fit within such a field, + not crossing a boundary for it. + + Thus, on most machines, a bit-field whose type is written as `int' + would not cross a four-byte boundary, and would force four-byte + alignment for the whole structure. (The alignment used may not + be four bytes; it is controlled by the other alignment + parameters.) + + If the macro is defined, its definition should be a C expression; + a nonzero value for the expression enables this behavior. */ + +#define PCC_BITFIELD_TYPE_MATTERS 1 + +/* If defined, a C expression to compute the alignment for a static + variable. TYPE is the data type, and ALIGN is the alignment that + the object would ordinarily have. The value of this macro is used + instead of that alignment to align the object. + + If this macro is not defined, then ALIGN is used. + + One use of this macro is to increase alignment of medium-size + data to make it all fit in fewer cache lines. Another is to + cause character arrays to be word-aligned so that `strcpy' calls + that copy constants to character arrays can be done inline. */ + +#undef DATA_ALIGNMENT +#define DATA_ALIGNMENT(TYPE, ALIGN) \ + ((((ALIGN) < BITS_PER_WORD) \ + && (TREE_CODE (TYPE) == ARRAY_TYPE \ + || TREE_CODE (TYPE) == UNION_TYPE \ + || TREE_CODE (TYPE) == RECORD_TYPE)) ? BITS_PER_WORD : (ALIGN)) + +/* We need this for the same reason as DATA_ALIGNMENT, namely to cause + character arrays to be word-aligned so that `strcpy' calls that copy + constants to character arrays can be done inline, and 'strcmp' can be + optimised to use word loads. */ +#define LOCAL_ALIGNMENT(TYPE, ALIGN) \ + DATA_ALIGNMENT (TYPE, ALIGN) + +#define PAD_VARARGS_DOWN \ + (targetm.calls.function_arg_padding (TYPE_MODE (type), type) == PAD_DOWNWARD) + +/* Define if operations between registers always perform the operation + on the full register even if a narrower mode is specified. */ +#define WORD_REGISTER_OPERATIONS 1 + +/* When in 64-bit mode, move insns will sign extend SImode and CCmode + moves. All other references are zero extended. */ +#define LOAD_EXTEND_OP(MODE) \ + (TARGET_64BIT && ((MODE) == SImode || (MODE) == CCmode) \ + ? SIGN_EXTEND : ZERO_EXTEND) + +/* Define this macro if it is advisable to hold scalars in registers + in a wider mode than that declared by the program. In such cases, + the value is constrained to be within the bounds of the declared + type, but kept valid in the wider mode. The signedness of the + extension may differ from that of the type. */ + +#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \ + if (GET_MODE_CLASS (MODE) == MODE_INT \ + && GET_MODE_SIZE (MODE) < UNITS_PER_WORD) \ + { \ + if ((MODE) == SImode) \ + (UNSIGNEDP) = 0; \ + (MODE) = Pmode; \ + } + +/* Pmode is always the same as ptr_mode, but not always the same as word_mode. + Extensions of pointers to word_mode must be signed. */ +#define POINTERS_EXTEND_UNSIGNED false + +/* Define if loading short immediate values into registers sign extends. */ +#define SHORT_IMMEDIATES_SIGN_EXTEND 1 + +/* The [d]clz instructions have the natural values at 0. */ + +#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ + ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) + +/* Standard register usage. */ + +/* Number of hardware registers. We have: + + - 32 integer registers + - 32 floating point registers + - 8 condition code registers + - 2 fake registers: + - ARG_POINTER_REGNUM + - FRAME_POINTER_REGNUM +*/ + +#define FIRST_PSEUDO_REGISTER 74 + +/* By default, fix the kernel registers ($26 and $27), the global + pointer ($28) and the stack pointer ($29). This can change + depending on the command-line options. + + Regarding coprocessor registers: without evidence to the contrary, + it's best to assume that each coprocessor register has a unique + use. This can be overridden, in, e.g., loongarch_option_override or + TARGET_CONDITIONAL_REGISTER_USAGE should the assumption be + inappropriate for a particular target. */ + +#define FIXED_REGISTERS \ +{ \ + 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 1, 1, 1} + + +/* Set up this array for o32 by default. + + Note that we don't mark $31 as a call-clobbered register. The idea is + that it's really the call instructions themselves which clobber $31. + We don't care what the called function does with it afterwards. + + This approach makes it easier to implement sibcalls. Unlike normal + calls, sibcalls don't clobber $31, so the register reaches the + called function in tact. EPILOGUE_USES says that $31 is useful + to the called function. */ + +#define CALL_USED_REGISTERS \ +{ \ + 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1} + +/* Internal macros to classify a register number as to whether it's a + general purpose register, a floating point register, a + multiply/divide register, or a status register. */ + +#define GP_REG_FIRST 0 +#define GP_REG_LAST 31 +#define GP_REG_NUM (GP_REG_LAST - GP_REG_FIRST + 1) +#define GP_DBX_FIRST 0 + +#define FP_REG_FIRST 32 +#define FP_REG_LAST 63 +#define FP_REG_NUM (FP_REG_LAST - FP_REG_FIRST + 1) +#define FP_DBX_FIRST ((write_symbols == DBX_DEBUG) ? 38 : 32) + +/* The DWARF 2 CFA column which tracks the return address from a + signal handler context. This means that to maintain backwards + compatibility, no hard register can be assigned this column if it + would need to be handled by the DWARF unwinder. */ +#define DWARF_ALT_FRAME_RETURN_COLUMN 72 + +#define ST_REG_FIRST 64 +#define ST_REG_LAST 71 +#define ST_REG_NUM (ST_REG_LAST - ST_REG_FIRST + 1) + +#define GP_REG_P(REGNO) \ + ((unsigned int) ((int) (REGNO) - GP_REG_FIRST) < GP_REG_NUM) +#define M16_REG_P(REGNO) \ + (((REGNO) >= 2 && (REGNO) <= 7) || (REGNO) == 16 || (REGNO) == 17) +#define M16STORE_REG_P(REGNO) \ + (((REGNO) >= 2 && (REGNO) <= 7) || (REGNO) == 0 || (REGNO) == 17) +#define FP_REG_P(REGNO) \ + ((unsigned int) ((int) (REGNO) - FP_REG_FIRST) < FP_REG_NUM) +#define ST_REG_P(REGNO) \ + ((unsigned int) ((int) (REGNO) - ST_REG_FIRST) < ST_REG_NUM) + +#define FP_REG_RTX_P(X) (REG_P (X) && FP_REG_P (REGNO (X))) + + +#define HARD_REGNO_RENAME_OK(OLD_REG, NEW_REG) \ + loongarch_hard_regno_rename_ok (OLD_REG, NEW_REG) + +/* Select a register mode required for caller save of hard regno REGNO. */ +#define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE) \ + loongarch_hard_regno_caller_save_mode (REGNO, NREGS, MODE) + +/* Register to use for pushing function arguments. */ +#define STACK_POINTER_REGNUM (GP_REG_FIRST + 3) + +/* These two registers don't really exist: they get eliminated to either + the stack or hard frame pointer. */ +#define ARG_POINTER_REGNUM 72 +#define FRAME_POINTER_REGNUM 73 + +#define HARD_FRAME_POINTER_REGNUM \ + (GP_REG_FIRST + 22) + +/* FIXME: */ +/* #define HARD_FRAME_POINTER_IS_FRAME_POINTER (HARD_FRAME_POINTER_REGNUM == FRAME_POINTER_REGNUM) */ +/* #define HARD_FRAME_POINTER_IS_ARG_POINTER (HARD_FRAME_POINTER_REGNUM == ARG_POINTER_REGNUM) */ + +#define HARD_FRAME_POINTER_IS_FRAME_POINTER 0 +#define HARD_FRAME_POINTER_IS_ARG_POINTER 0 + +/* FIXME: */ +/* Register in which static-chain is passed to a function. */ +#define STATIC_CHAIN_REGNUM (GP_REG_FIRST + 20) /* $t8 */ + +#define LARCH_PROLOGUE_TEMP_REGNUM \ + (GP_REG_FIRST + 13) +#define LARCH_PROLOGUE_TEMP2_REGNUM \ + (GP_REG_FIRST + 12) +#define LARCH_EPILOGUE_TEMP_REGNUM \ + (GP_REG_FIRST + (12)) + +#define LARCH_PROLOGUE_TEMP(MODE) gen_rtx_REG (MODE, LARCH_PROLOGUE_TEMP_REGNUM) +#define LARCH_PROLOGUE_TEMP2(MODE) \ + gen_rtx_REG (MODE, LARCH_PROLOGUE_TEMP2_REGNUM) +#define LARCH_EPILOGUE_TEMP(MODE) gen_rtx_REG (MODE, LARCH_EPILOGUE_TEMP_REGNUM) + +/* Define this macro if it is as good or better to call a constant + function address than to call an address kept in a register. */ +#define NO_FUNCTION_CSE 1 + +#define THREAD_POINTER_REGNUM (GP_REG_FIRST + 2) + + +/* Define the classes of registers for register constraints in the + machine description. Also define ranges of constants. + + One of the classes must always be named ALL_REGS and include all hard regs. + If there is more than one class, another class must be named NO_REGS + and contain no registers. + + The name GENERAL_REGS must be the name of a class (or an alias for + another name such as ALL_REGS). This is the class of registers + that is allowed by "g" or "r" in a register constraint. + Also, registers outside this class are allocated only when + instructions express preferences for them. + + The classes must be numbered in nondecreasing order; that is, + a larger-numbered class must never be contained completely + in a smaller-numbered class. + + For any two classes, it is very desirable that there be another + class that represents their union. */ + +enum reg_class +{ + NO_REGS, /* no registers in set */ + SIBCALL_REGS, /* SIBCALL_REGS */ + JALR_REGS, /* JALR_REGS */ + GR_REGS, /* integer registers */ + LVZ_REGS, /* integer registers except for $r0 and $r1 for lvz. */ + FP_REGS, /* floating point registers */ + ST_REGS, /* status registers (fp status) */ + FRAME_REGS, /* $arg and $frame */ + ALL_REGS, /* all registers */ + LIM_REG_CLASSES /* max value + 1 */ +}; + +#define N_REG_CLASSES (int) LIM_REG_CLASSES + +#define GENERAL_REGS GR_REGS + +/* An initializer containing the names of the register classes as C + string constants. These names are used in writing some of the + debugging dumps. */ + +#define REG_CLASS_NAMES \ +{ \ + "NO_REGS", \ + "SIBCALL_REGS", \ + "JALR_REGS", \ + "GR_REGS", \ + "LVZ_REGS", \ + "FP_REGS", \ + "ST_REGS", \ + "FRAME_REGS", \ + "ALL_REGS" \ +} + +/* An initializer containing the contents of the register classes, + as integers which are bit masks. The Nth integer specifies the + contents of class N. The way the integer MASK is interpreted is + that register R is in the class if `MASK & (1 << R)' is 1. + + When the machine has more than 32 registers, an integer does not + suffice. Then the integers are replaced by sub-initializers, + braced groupings containing several integers. Each + sub-initializer must be suitable as an initializer for the type + `HARD_REG_SET' which is defined in `hard-reg-set.h'. */ + +#define REG_CLASS_CONTENTS \ +{ \ + { 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS */ \ + { 0x001ff000, 0x00000000, 0x00000000 }, /* SIBCALL_REGS */ \ + { 0xff9ffff0, 0x00000000, 0x00000000 }, /* JALR_REGS */ \ + { 0xffffffff, 0x00000000, 0x00000000 }, /* GR_REGS */ \ + { 0xfffffffc, 0x00000000, 0x00000000 }, /* LVZ_REGS */ \ + { 0x00000000, 0xffffffff, 0x00000000 }, /* FP_REGS */ \ + { 0x00000000, 0x00000000, 0x000000ff }, /* ST_REGS */ \ + { 0x00400000, 0x00000000, 0x00000200 }, /* FRAME_REGS */ \ + { 0xffffffff, 0xffffffff, 0x000003ff } /* ALL_REGS */ \ +} + + +/* A C expression whose value is a register class containing hard + register REGNO. In general there is more that one such class; + choose a class which is "minimal", meaning that no smaller class + also contains the register. */ + +#define REGNO_REG_CLASS(REGNO) loongarch_regno_to_class[ (REGNO) ] + +/* A macro whose definition is the name of the class to which a + valid base register must belong. A base register is one used in + an address which is the register value plus a displacement. */ + +#define BASE_REG_CLASS (GR_REGS) + +/* A macro whose definition is the name of the class to which a + valid index register must belong. An index register is one used + in an address where its value is either multiplied by a scale + factor or added to another register (as well as added to a + displacement). */ + +#define INDEX_REG_CLASS NO_REGS + +/* We generally want to put call-clobbered registers ahead of + call-saved ones. (IRA expects this.) */ + +#define REG_ALLOC_ORDER \ +{ /* Call-clobbered GPRs. */ \ + 12, 13, 14, 15, 16, 17, 18, 19, 20, 4, 5, 6, 7, 8, 9, 10, 11, 1, \ + /* The global pointer. This is call-clobbered for o32 and o64 \ + abicalls, call-saved for n32 and n64 abicalls, and a program \ + invariant otherwise. Putting it between the call-clobbered \ + and call-saved registers should cope with all eventualities. */ \ + /* Call-saved GPRs. */ \ + 23, 24, 25, 26, 27, 28, 29, 30, 31, \ + /* GPRs that can never be exposed to the register allocator. */ \ + 0, 2, 3, 21, 22, \ + /* Call-clobbered FPRs. */ \ + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, \ + 48, 49, 50, 51,52, 53, 54, 55, \ + /* FPRs that are usually call-saved. The odd ones are actually \ + call-clobbered for n32, but listing them ahead of the even \ + registers might encourage the register allocator to fragment \ + the available FPR pairs. We need paired FPRs to store long \ + doubles, so it isn't clear that using a different order \ + for n32 would be a win. */ \ + 56, 57, 58, 59, 60, 61, 62, 63, \ + /* None of the remaining classes have defined call-saved \ + registers. */ \ + 64, 65, 66, 67, 68, 69, 70, 71, 72, 73} + +/* True if VALUE is an unsigned 6-bit number. */ + +#define UIMM6_OPERAND(VALUE) \ + (((VALUE) & ~(unsigned HOST_WIDE_INT) 0x3f) == 0) + +/* True if VALUE is a signed 10-bit number. */ + +#define IMM10_OPERAND(VALUE) \ + ((unsigned HOST_WIDE_INT) (VALUE) + 0x200 < 0x400) + +/* True if VALUE is a signed 12-bit number. */ + +#define IMM12_OPERAND(VALUE) \ + ((unsigned HOST_WIDE_INT) (VALUE) + 0x800 < 0x1000) + +/* True if VALUE is a signed 16-bit number. */ + +#define IMM16_OPERAND(VALUE) \ + ((unsigned HOST_WIDE_INT) (VALUE) + 0x8000 < 0x10000) + + +/* True if VALUE is a signed 12-bit number. */ + +#define SMALL_OPERAND(VALUE) \ + ((unsigned HOST_WIDE_INT) (VALUE) + 0x800 < 0x1000) + +/* True if VALUE is an unsigned 12-bit number. */ + +#define SMALL_OPERAND_UNSIGNED(VALUE) \ + (((VALUE) & ~(unsigned HOST_WIDE_INT) 0xfff) == 0) + +/* True if VALUE can be loaded into a register using LUI. */ + +#define LUI_OPERAND(VALUE) \ + (((VALUE) | 0x7ffff000) == 0x7ffff000 \ + || ((VALUE) | 0x7ffff000) + 0x1000 == 0) + +/* True if VALUE can be loaded into a register using LUI. */ + +#define LU32I_OPERAND(VALUE) \ + ((((VALUE) | 0x7ffff00000000) == 0x7ffff00000000) \ + || ((VALUE) | 0x7ffff00000000) + 0x100000000 == 0) + +/* True if VALUE can be loaded into a register using LUI. */ + +#define LU52I_OPERAND(VALUE) \ + ((((VALUE) | 0xfff0000000000000) == 0xfff0000000000000)) + +/* Return a value X with the low 12 bits clear, and such that + VALUE - X is a signed 12-bit value. */ + +#define CONST_HIGH_PART(VALUE) \ + (((VALUE) + 0x800) & ~(unsigned HOST_WIDE_INT) 0xfff) + +#define CONST_LOW_PART(VALUE) \ + ((VALUE) - CONST_HIGH_PART (VALUE)) + +#define SMALL_INT(X) SMALL_OPERAND (INTVAL (X)) +#define SMALL_INT_UNSIGNED(X) SMALL_OPERAND_UNSIGNED (INTVAL (X)) +#define LUI_INT(X) LUI_OPERAND (INTVAL (X)) +#define LU32I_INT(X) LU32I_OPERAND (INTVAL (X)) +#define LU52I_INT(X) LU52I_OPERAND (INTVAL (X)) +#define ULARCH_12BIT_OFFSET_P(OFFSET) (IN_RANGE (OFFSET, -2048, 2047)) +#define LARCH_9BIT_OFFSET_P(OFFSET) (IN_RANGE (OFFSET, -256, 255)) +#define LISA_16BIT_OFFSET_P(OFFSET) (IN_RANGE (OFFSET, -32768, 32767)) +#define LISA_SHIFT_2_OFFSET_P(OFFSET) (((OFFSET) & 0x3) == 0) + +/* The HI and LO registers can only be reloaded via the general + registers. Condition code registers can only be loaded to the + general registers, and from the floating point registers. */ + +#define SECONDARY_INPUT_RELOAD_CLASS(CLASS, MODE, X) \ + loongarch_secondary_reload_class (CLASS, MODE, X, true) +#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, X) \ + loongarch_secondary_reload_class (CLASS, MODE, X, false) + +/* Return the maximum number of consecutive registers + needed to represent mode MODE in a register of class CLASS. */ + +#define CLASS_MAX_NREGS(CLASS, MODE) loongarch_class_max_nregs (CLASS, MODE) + +/* Stack layout; function entry, exit and calling. */ + +#define STACK_GROWS_DOWNWARD 1 + +#define FRAME_GROWS_DOWNWARD 1 + +#define RETURN_ADDR_RTX loongarch_return_addr + +/* Similarly, don't use the least-significant bit to tell pointers to + code from vtable index. */ + +#define TARGET_PTRMEMFUNC_VBIT_LOCATION ptrmemfunc_vbit_in_delta + +#define ELIMINABLE_REGS \ +{{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}, \ + { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},} + +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ + (OFFSET) = n_loongarch_initial_elimination_offset ((FROM), (TO)) + +/* Allocate stack space for arguments at the beginning of each function. */ +#define ACCUMULATE_OUTGOING_ARGS 1 + +/* The argument pointer always points to the first argument. */ +#define FIRST_PARM_OFFSET(FNDECL) 0 + +/* o32 and o64 reserve stack space for all argument registers. */ +#define REG_PARM_STACK_SPACE(FNDECL) \ + (TARGET_OLDABI \ + ? (MAX_ARGS_IN_REGISTERS * UNITS_PER_WORD) \ + : 0) + +/* Define this if it is the responsibility of the caller to + allocate the area reserved for arguments passed in registers. + If `ACCUMULATE_OUTGOING_ARGS' is also defined, the only effect + of this macro is to determine whether the space is included in + `crtl->outgoing_args_size'. */ +#define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) 1 + +#define STACK_BOUNDARY (TARGET_NEWABI ? 128 : 64) + +/* Symbolic macros for the registers used to return integer and floating + point values. */ + +#define GP_RETURN (GP_REG_FIRST + 4) +#define FP_RETURN ((TARGET_SOFT_FLOAT) ? GP_RETURN : (FP_REG_FIRST + 0)) + +#define MAX_ARGS_IN_REGISTERS (TARGET_OLDABI ? 4 : 8) + +/* Symbolic macros for the first/last argument registers. */ + +#define GP_ARG_FIRST (GP_REG_FIRST + 4) +#define GP_ARG_LAST (GP_ARG_FIRST + MAX_ARGS_IN_REGISTERS - 1) +#define FP_ARG_FIRST (FP_REG_FIRST + 0) +#define FP_ARG_LAST (FP_ARG_FIRST + MAX_ARGS_IN_REGISTERS - 1) + +/* 1 if N is a possible register number for function argument passing. + We have no FP argument registers when soft-float. Special handling + is required for O32 where only even numbered registers are used for + O32-FPXX and O32-FP64. */ + +#define FUNCTION_ARG_REGNO_P(N) \ + ((IN_RANGE((N), GP_ARG_FIRST, GP_ARG_LAST) \ + || (IN_RANGE((N), FP_ARG_FIRST, FP_ARG_LAST) \ + && (loongarch_abi != ABILP32 \ + || TARGET_FLOAT32 \ + || ((N) % 2 == 0)))) \ + && !fixed_regs[N]) + +/* This structure has to cope with two different argument allocation + schemes. Most LARCH ABIs view the arguments as a structure, of which + the first N words go in registers and the rest go on the stack. If I + < N, the Ith word might go in Ith integer argument register or in a + floating-point register. For these ABIs, we only need to remember + the offset of the current argument into the structure. + + So for the standard ABIs, the first N words are allocated to integer + registers, and loongarch_function_arg decides on an argument-by-argument + basis whether that argument should really go in an integer register, + or in a floating-point one. */ + +typedef struct loongarch_args { + /* Always true for varargs functions. Otherwise true if at least + one argument has been passed in an integer register. */ + int gp_reg_found; + + /* The number of arguments seen so far. */ + unsigned int arg_number; + + /* The number of integer registers used so far. This is the number + of words that have been added to the argument structure, limited + to MAX_ARGS_IN_REGISTERS. */ + unsigned int num_gprs; + + unsigned int num_fprs; + + /* The number of words passed on the stack. */ + unsigned int stack_words; + + /* On the loongarch16, we need to keep track of which floating point + arguments were passed in general registers, but would have been + passed in the FP regs if this were a 32-bit function, so that we + can move them to the FP regs if we wind up calling a 32-bit + function. We record this information in fp_code, encoded in base + four. A zero digit means no floating point argument, a one digit + means an SFmode argument, and a two digit means a DFmode argument, + and a three digit is not used. The low order digit is the first + argument. Thus 6 == 1 * 4 + 2 means a DFmode argument followed by + an SFmode argument. ??? A more sophisticated approach will be + needed if LARCH_ABI != ABILP32. */ + int fp_code; + + /* True if the function has a prototype. */ + int prototype; +} CUMULATIVE_ARGS; + +/* Initialize a variable CUM of type CUMULATIVE_ARGS + for a call to a function whose data type is FNTYPE. + For a library call, FNTYPE is 0. */ + +#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \ + memset (&(CUM), 0, sizeof (CUM)) + + +#define EPILOGUE_USES(REGNO) loongarch_epilogue_uses (REGNO) + +/* Treat LOC as a byte offset from the stack pointer and round it up + to the next fully-aligned offset. */ +#define LARCH_STACK_ALIGN(LOC) \ + (TARGET_NEWABI ? ROUND_UP ((LOC), 16) : ROUND_UP ((LOC), 8)) + + +/* Output assembler code to FILE to increment profiler label # LABELNO + for profiling a function entry. */ + +#define MCOUNT_NAME "_mcount" + +/* Emit rtl for profiling. Output assembler code to FILE + to call "_mcount" for profiling a function entry. */ +#define PROFILE_HOOK(LABEL) \ + { \ + rtx fun, ra; \ + ra = get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNUM); \ + fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_NAME); \ + emit_library_call (fun, LCT_NORMAL, VOIDmode, ra, Pmode); \ + } + +/* All the work done in PROFILE_HOOK, but still required. */ +#define FUNCTION_PROFILER(STREAM, LABELNO) do { } while (0) + + +/* The profiler preserves all interesting registers, including $31. */ +#define LARCH_SAVE_REG_FOR_PROFILING_P(REGNO) false + +/* No loongarch port has ever used the profiler counter word, so don't emit it + or the label for it. */ + +#define NO_PROFILE_COUNTERS 1 + +/* Define this macro if the code for function profiling should come + before the function prologue. Normally, the profiling code comes + after. */ + +/* #define PROFILE_BEFORE_PROLOGUE */ + +/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function, + the stack pointer does not matter. The value is tested only in + functions that have frame pointers. + No definition is equivalent to always zero. */ + +#define EXIT_IGNORE_STACK 1 + + +/* Trampolines are a block of code followed by two pointers. */ + +#define TRAMPOLINE_SIZE \ + (loongarch_trampoline_code_size () + GET_MODE_SIZE (ptr_mode) * 2) + +/* Forcing a 64-bit alignment for 32-bit targets allows us to load two + pointers from a single LUI base. */ + +#define TRAMPOLINE_ALIGNMENT 64 + +/* loongarch_trampoline_init calls this library function to flush + program and data caches. */ + +#ifndef CACHE_FLUSH_FUNC +#define CACHE_FLUSH_FUNC "_flush_cache" +#endif + +#define LARCH_ICACHE_SYNC(ADDR, SIZE) \ + /* Flush both caches. We need to flush the data cache in case \ + the system has a write-back cache. */ \ + emit_library_call (gen_rtx_SYMBOL_REF (Pmode, loongarch_cache_flush_func), \ + LCT_NORMAL, VOIDmode, ADDR, Pmode, SIZE, Pmode, \ + GEN_INT (3), TYPE_MODE (integer_type_node)) + + +/* Addressing modes, and classification of registers for them. */ + +#define REGNO_OK_FOR_INDEX_P(REGNO) 0 +#define REGNO_MODE_OK_FOR_BASE_P(REGNO, MODE) \ + loongarch_regno_mode_ok_for_base_p (REGNO, MODE, 1) + +/* Maximum number of registers that can appear in a valid memory address. */ + +#define MAX_REGS_PER_ADDRESS 1 + +/* Check for constness inline but use loongarch_legitimate_address_p + to check whether a constant really is an address. */ + +#define CONSTANT_ADDRESS_P(X) \ + (CONSTANT_P (X) && memory_address_p (SImode, X)) + +/* This handles the magic '..CURRENT_FUNCTION' symbol, which means + 'the start of the function that this code is output in'. */ + +#define ASM_OUTPUT_LABELREF(FILE,NAME) \ + do { \ + if (strcmp (NAME, "..CURRENT_FUNCTION") == 0) \ + asm_fprintf ((FILE), "%U%s", \ + XSTR (XEXP (DECL_RTL (current_function_decl), \ + 0), 0)); \ + else \ + asm_fprintf ((FILE), "%U%s", (NAME)); \ + } while (0) + +/* Flag to mark a function decl symbol that requires a long call. */ +#define SYMBOL_FLAG_LONG_CALL (SYMBOL_FLAG_MACH_DEP << 0) +#define SYMBOL_REF_LONG_CALL_P(X) \ + ((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_LONG_CALL) != 0) + +/* This flag marks functions that cannot be lazily bound. */ +#define SYMBOL_FLAG_BIND_NOW (SYMBOL_FLAG_MACH_DEP << 1) +#define SYMBOL_REF_BIND_NOW_P(RTX) \ + ((SYMBOL_REF_FLAGS (RTX) & SYMBOL_FLAG_BIND_NOW) != 0) + +/* True if we're generating a form of LARCH16 code in which jump tables + are stored in the text section and encoded as 16-bit PC-relative + offsets. This is only possible when general text loads are allowed, + since the table access itself will be an "lh" instruction. If the + PC-relative offsets grow too large, 32-bit offsets are used instead. */ + + +#define CASE_VECTOR_MODE (ptr_mode) + +/* Only use short offsets if their range will not overflow. */ +#define CASE_VECTOR_SHORTEN_MODE(MIN, MAX, BODY) \ + (ptr_mode ? HImode : SImode) + + +/* Define this as 1 if `char' should by default be signed; else as 0. */ +#ifndef DEFAULT_SIGNED_CHAR +#define DEFAULT_SIGNED_CHAR 1 +#endif + +/* Although LDC1 and SDC1 provide 64-bit moves on 32-bit targets, + we generally don't want to use them for copying arbitrary data. + A single N-word move is usually the same cost as N single-word moves. */ +#define MOVE_MAX UNITS_PER_WORD + +#define MAX_MOVE_MAX 8 + +/* Define this macro as a C expression which is nonzero if + accessing less than a word of memory (i.e. a `char' or a + `short') is no faster than accessing a word of memory, i.e., if + such access require more than one instruction or if there is no + difference in cost between byte and (aligned) word loads. + + On RISC machines, it tends to generate better code to define + this as 1, since it avoids making a QI or HI mode register. + +*/ +#define SLOW_BYTE_ACCESS (1) + +/* Standard LARCH integer shifts truncate the shift amount to the + width of the shifted operand. However, Loongson MMI shifts + do not truncate the shift amount at all. */ +#define SHIFT_COUNT_TRUNCATED (1) + + +/* Specify the machine mode that pointers have. + After generation of rtl, the compiler makes no further distinction + between pointers and any other objects of this machine mode. */ + +#ifndef Pmode +#define Pmode (TARGET_64BIT && TARGET_LONG64 ? DImode : SImode) +#endif + +/* Give call MEMs SImode since it is the "most permissive" mode + for both 32-bit and 64-bit targets. */ + +#define FUNCTION_MODE SImode + + +/* We allocate $fcc registers by hand and can't cope with moves of + CCmode registers to and from pseudos (or memory). */ +#define AVOID_CCMODE_COPIES + +/* A C expression for the cost of a branch instruction. A value of + 1 is the default; other values are interpreted relative to that. */ + +#define BRANCH_COST(speed_p, predictable_p) loongarch_branch_cost +#define LOGICAL_OP_NON_SHORT_CIRCUIT 0 + +/* The LARCH port has several functions that return an instruction count. + Multiplying the count by this value gives the number of bytes that + the instructions occupy. */ +#define BASE_INSN_LENGTH (4) + +/* The length of a NOP in bytes. */ +#define NOP_INSN_LENGTH (4) + +/* If defined, modifies the length assigned to instruction INSN as a + function of the context in which it is used. LENGTH is an lvalue + that contains the initially computed length of the insn and should + be updated with the correct length of the insn. */ +#define ADJUST_INSN_LENGTH(INSN, LENGTH) \ + ((LENGTH) = loongarch_adjust_insn_length ((INSN), (LENGTH))) + +/* Return the asm template for a conditional branch instruction. + OPCODE is the opcode's mnemonic and OPERANDS is the asm template for + its operands. */ +#define LARCH_BRANCH(OPCODE, OPERANDS) \ + OPCODE "\t" OPERANDS + +#define LARCH_BRANCH_C(OPCODE, OPERANDS) \ + OPCODE "%:\t" OPERANDS + +/* Return an asm string that forces INSN to be treated as an absolute + J or JAL instruction instead of an assembler macro. */ +#define LARCH_ABSOLUTE_JUMP(INSN) INSN + + +/* Control the assembler format that we output. */ + +/* Output to assembler file text saying following lines + may contain character constants, extra white space, comments, etc. */ + +#ifndef ASM_APP_ON +#define ASM_APP_ON " #APP\n" +#endif + +/* Output to assembler file text saying following lines + no longer contain unusual constructs. */ + +#ifndef ASM_APP_OFF +#define ASM_APP_OFF " #NO_APP\n" +#endif + +#define REGISTER_NAMES \ +{ "$r0", "$r1", "$r2", "$r3", "$r4", "$r5", "$r6", "$r7", \ + "$r8", "$r9", "$r10", "$r11", "$r12", "$r13", "$r14", "$r15", \ + "$r16", "$r17", "$r18", "$r19", "$r20", "$r21", "$r22", "$r23", \ + "$r24", "$r25", "$r26", "$r27", "$r28", "$r29", "$r30", "$r31", \ + "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", \ + "$f8", "$f9", "$f10", "$f11", "$f12", "$f13", "$f14", "$f15", \ + "$f16", "$f17", "$f18", "$f19", "$f20", "$f21", "$f22", "$f23", \ + "$f24", "$f25", "$f26", "$f27", "$f28", "$f29", "$f30", "$f31", \ + "$fcc0","$fcc1","$fcc2","$fcc3","$fcc4","$fcc5","$fcc6","$fcc7", \ + "$arg", "$frame"} + +/* List the "software" names for each register. Also list the numerical + names for $fp and $sp. */ + +#define ADDITIONAL_REGISTER_NAMES \ +{ \ + { "zero", 0 + GP_REG_FIRST }, \ + { "ra", 1 + GP_REG_FIRST }, \ + { "tp", 2 + GP_REG_FIRST }, \ + { "sp", 3 + GP_REG_FIRST }, \ + { "a0", 4 + GP_REG_FIRST }, \ + { "a1", 5 + GP_REG_FIRST }, \ + { "a2", 6 + GP_REG_FIRST }, \ + { "a3", 7 + GP_REG_FIRST }, \ + { "a4", 8 + GP_REG_FIRST }, \ + { "a5", 9 + GP_REG_FIRST }, \ + { "a6", 10 + GP_REG_FIRST }, \ + { "a7", 11 + GP_REG_FIRST }, \ + { "t0", 12 + GP_REG_FIRST }, \ + { "t1", 13 + GP_REG_FIRST }, \ + { "t2", 14 + GP_REG_FIRST }, \ + { "t3", 15 + GP_REG_FIRST }, \ + { "t4", 16 + GP_REG_FIRST }, \ + { "t5", 17 + GP_REG_FIRST }, \ + { "t6", 18 + GP_REG_FIRST }, \ + { "t7", 19 + GP_REG_FIRST }, \ + { "t8", 20 + GP_REG_FIRST }, \ + { "x", 21 + GP_REG_FIRST }, \ + { "fp", 22 + GP_REG_FIRST }, \ + { "s0", 23 + GP_REG_FIRST }, \ + { "s1", 24 + GP_REG_FIRST }, \ + { "s2", 25 + GP_REG_FIRST }, \ + { "s3", 26 + GP_REG_FIRST }, \ + { "s4", 27 + GP_REG_FIRST }, \ + { "s5", 28 + GP_REG_FIRST }, \ + { "s6", 29 + GP_REG_FIRST }, \ + { "s7", 30 + GP_REG_FIRST }, \ + { "s8", 31 + GP_REG_FIRST }, \ + { "v0", 4 + GP_REG_FIRST }, \ + { "v1", 5 + GP_REG_FIRST } \ +} + +#define DBR_OUTPUT_SEQEND(STREAM) \ +do \ + { \ + /* Emit a blank line after the delay slot for emphasis. */ \ + fputs ("\n", STREAM); \ + } \ +while (0) + +/* The LARCH implementation uses some labels for its own purpose. The + following lists what labels are created, and are all formed by the + pattern $L[a-z].*. The machine independent portion of GCC creates + labels matching: $L[A-Z][0-9]+ and $L[0-9]+. + + LM[0-9]+ Silicon Graphics/ECOFF stabs label before each stmt. + $Lb[0-9]+ Begin blocks for LARCH debug support + $Lc[0-9]+ Label for use in s operation. + $Le[0-9]+ End blocks for LARCH debug support */ + +#undef ASM_DECLARE_OBJECT_NAME +#define ASM_DECLARE_OBJECT_NAME(STREAM, NAME, DECL) \ + loongarch_declare_object (STREAM, NAME, "", ":\n") + +/* Globalizing directive for a label. */ +#define GLOBAL_ASM_OP "\t.globl\t" + +/* This says how to define a global common symbol. */ + +#define ASM_OUTPUT_ALIGNED_DECL_COMMON loongarch_output_aligned_decl_common + +/* This says how to define a local common symbol (i.e., not visible to + linker). */ + +#ifndef ASM_OUTPUT_ALIGNED_LOCAL +#define ASM_OUTPUT_ALIGNED_LOCAL(STREAM, NAME, SIZE, ALIGN) \ + loongarch_declare_common_object (STREAM, NAME, "\n\t.lcomm\t", SIZE, ALIGN, false) +#endif + +/* This says how to output an external. It would be possible not to + output anything and let undefined symbol become external. However + the assembler uses length information on externals to allocate in + data/sdata bss/sbss, thereby saving exec time. */ + +#undef ASM_OUTPUT_EXTERNAL +#define ASM_OUTPUT_EXTERNAL(STREAM,DECL,NAME) \ + loongarch_output_external(STREAM,DECL,NAME) + +/* This is how to declare a function name. The actual work of + emitting the label is moved to function_prologue, so that we can + get the line number correctly emitted before the .ent directive, + and after any .file directives. Define as empty so that the function + is not declared before the .ent directive elsewhere. */ + +#undef ASM_DECLARE_FUNCTION_NAME +#define ASM_DECLARE_FUNCTION_NAME(STREAM,NAME,DECL) \ + loongarch_declare_function_name(STREAM,NAME,DECL) + +/* This is how to store into the string LABEL + the symbol_ref name of an internal numbered label where + PREFIX is the class of label and NUM is the number within the class. + This is suitable for output with `assemble_name'. */ + +#undef ASM_GENERATE_INTERNAL_LABEL +#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM) \ + sprintf ((LABEL), "*%s%s%ld", (LOCAL_LABEL_PREFIX), (PREFIX), (long)(NUM)) + +/* Print debug labels as "foo = ." rather than "foo:" because they should + represent a byte pointer rather than an ISA-encoded address. This is + particularly important for code like: + + $LFBxxx = . + .cfi_startproc + ... + .section .gcc_except_table,... + ... + .uleb128 foo-$LFBxxx + + The .uleb128 requies $LFBxxx to match the FDE start address, which is + likewise a byte pointer rather than an ISA-encoded address. + + At the time of writing, this hook is not used for the function end + label: + + $LFExxx: + .end foo + + */ + +#define ASM_OUTPUT_DEBUG_LABEL(FILE, PREFIX, NUM) \ + fprintf (FILE, "%s%s%d = .\n", LOCAL_LABEL_PREFIX, PREFIX, NUM) + +/* This is how to output an element of a case-vector that is absolute. */ + +#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE) \ + fprintf (STREAM, "\t%s\t%sL%d\n", \ + ptr_mode == DImode ? ".dword" : ".word", \ + LOCAL_LABEL_PREFIX, \ + VALUE) + +/* This is how to output an element of a case-vector. We can make the + entries GP-relative when .gp(d)word is supported. */ + +#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL) \ +do { \ + if (TARGET_RTP_PIC) \ + { \ + /* Make the entry relative to the start of the function. */ \ + rtx fnsym = XEXP (DECL_RTL (current_function_decl), 0); \ + fprintf (STREAM, "\t%s\t%sL%d-", \ + Pmode == DImode ? ".dword" : ".word", \ + LOCAL_LABEL_PREFIX, VALUE); \ + assemble_name (STREAM, XSTR (fnsym, 0)); \ + fprintf (STREAM, "\n"); \ + } \ + else \ + fprintf (STREAM, "\t%s\t%sL%d-%sL%d\n", \ + ptr_mode == DImode ? ".dword" : ".word", \ + LOCAL_LABEL_PREFIX, VALUE, \ + LOCAL_LABEL_PREFIX, REL); \ +} while (0) + +/* Mark inline jump tables as data for the purpose of disassembly. For + simplicity embed the jump table's label number in the local symbol + produced so that multiple jump tables within a single function end + up marked with unique symbols. Retain the alignment setting from + `elfos.h' as we are replacing the definition from there. */ + +#undef ASM_OUTPUT_BEFORE_CASE_LABEL +#define ASM_OUTPUT_BEFORE_CASE_LABEL(STREAM, PREFIX, NUM, TABLE) \ + do \ + { \ + ASM_OUTPUT_ALIGN ((STREAM), 2); \ + if (JUMP_TABLES_IN_TEXT_SECTION) \ + loongarch_set_text_contents_type (STREAM, "__jump_", NUM, FALSE); \ + } \ + while (0) + +/* Reset text marking to code after an inline jump table. Like with + the beginning of a jump table use the label number to keep symbols + unique. */ + +#define ASM_OUTPUT_CASE_END(STREAM, NUM, TABLE) \ + do \ + if (JUMP_TABLES_IN_TEXT_SECTION) \ + loongarch_set_text_contents_type (STREAM, "__jend_", NUM, TRUE); \ + while (0) + +/* This is how to output an assembler line + that says to advance the location counter + to a multiple of 2**LOG bytes. */ + +#define ASM_OUTPUT_ALIGN(STREAM,LOG) \ + fprintf (STREAM, "\t.align\t%d\n", (LOG)) + +#define ASM_OUTPUT_ALIGN_WITH_NOP(STREAM,LOG) \ + fprintf (STREAM, "\t.align\t%d,54525952,4\n", (LOG)) + + +/* This is how to output an assembler line to advance the location + counter by SIZE bytes. */ + +#undef ASM_OUTPUT_SKIP +#define ASM_OUTPUT_SKIP(STREAM,SIZE) \ + fprintf (STREAM, "\t.space\t" HOST_WIDE_INT_PRINT_UNSIGNED"\n", (SIZE)) + +/* This is how to output a string. */ +#undef ASM_OUTPUT_ASCII +#define ASM_OUTPUT_ASCII loongarch_output_ascii + + +/* Default to -G 8 */ +#ifndef LARCH_DEFAULT_GVALUE +#define LARCH_DEFAULT_GVALUE 8 +#endif + +/* Define the strings to put out for each section in the object file. */ +#define TEXT_SECTION_ASM_OP "\t.text" /* instructions */ +#define DATA_SECTION_ASM_OP "\t.data" /* large data */ + +#undef READONLY_DATA_SECTION_ASM_OP +#define READONLY_DATA_SECTION_ASM_OP "\t.section\t.rodata" /* read-only data */ + +#define ASM_OUTPUT_REG_PUSH(STREAM,REGNO) \ +do \ + { \ + fprintf (STREAM, "\t%s\t%s,%s,-8\n\t%s\t%s,0(%s)\n", \ + TARGET_64BIT ? "daddiu" : "addiu", \ + reg_names[STACK_POINTER_REGNUM], \ + reg_names[STACK_POINTER_REGNUM], \ + TARGET_64BIT ? "sd" : "sw", \ + reg_names[REGNO], \ + reg_names[STACK_POINTER_REGNUM]); \ + } \ +while (0) + +#define ASM_OUTPUT_REG_POP(STREAM,REGNO) \ +do \ + { \ + loongarch_push_asm_switch (&loongarch_noreorder); \ + fprintf (STREAM, "\t%s\t%s,0(%s)\n\t%s\t%s,%s,8\n", \ + TARGET_64BIT ? "ld" : "lw", \ + reg_names[REGNO], \ + reg_names[STACK_POINTER_REGNUM], \ + TARGET_64BIT ? "daddu" : "addu", \ + reg_names[STACK_POINTER_REGNUM], \ + reg_names[STACK_POINTER_REGNUM]); \ + loongarch_pop_asm_switch (&loongarch_noreorder); \ + } \ +while (0) + +/* How to start an assembler comment. + The leading space is important (the loongarch native assembler requires it). */ +#ifndef ASM_COMMENT_START +#define ASM_COMMENT_START " #" +#endif + +#undef SIZE_TYPE +#define SIZE_TYPE (POINTER_SIZE == 64 ? "long unsigned int" : "unsigned int") + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE (POINTER_SIZE == 64 ? "long int" : "int") + +/* The minimum alignment of any expanded block move. */ +#define LARCH_MIN_MOVE_MEM_ALIGN 16 + +/* The maximum number of bytes that can be copied by one iteration of + a movmemsi loop; see loongarch_block_move_loop. */ +#define LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER \ + (UNITS_PER_WORD * 4) + +/* The maximum number of bytes that can be copied by a straight-line + implementation of movmemsi; see loongarch_block_move_straight. We want + to make sure that any loop-based implementation will iterate at + least twice. */ +#define LARCH_MAX_MOVE_BYTES_STRAIGHT \ + (LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER * 2) + +/* The base cost of a memcpy call, for MOVE_RATIO and friends. These + values were determined experimentally by benchmarking with CSiBE. +*/ +#define LARCH_CALL_RATIO 8 + +/* Any loop-based implementation of movmemsi will have at least + LARCH_MAX_MOVE_BYTES_STRAIGHT / UNITS_PER_WORD memory-to-memory + moves, so allow individual copies of fewer elements. + + When movmemsi is not available, use a value approximating + the length of a memcpy call sequence, so that move_by_pieces + will generate inline code if it is shorter than a function call. + Since move_by_pieces_ninsns counts memory-to-memory moves, but + we'll have to generate a load/store pair for each, halve the + value of LARCH_CALL_RATIO to take that into account. */ + +#define MOVE_RATIO(speed) \ + (HAVE_movmemsi \ + ? LARCH_MAX_MOVE_BYTES_STRAIGHT / MOVE_MAX \ + : LARCH_CALL_RATIO / 2) + +/* For CLEAR_RATIO, when optimizing for size, give a better estimate + of the length of a memset call, but use the default otherwise. */ + +#define CLEAR_RATIO(speed)\ + ((speed) ? 15 : LARCH_CALL_RATIO) + +/* This is similar to CLEAR_RATIO, but for a non-zero constant, so when + optimizing for size adjust the ratio to account for the overhead of + loading the constant and replicating it across the word. */ + +#define SET_RATIO(speed) \ + ((speed) ? 15 : LARCH_CALL_RATIO - 2) + +/* Since the bits of the _init and _fini function is spread across + many object files, each potentially with its own GP, we must assume + we need to load our GP. We don't preserve $gp or $ra, since each + init/fini chunk is supposed to initialize $gp, and crti/crtn + already take care of preserving $ra and, when appropriate, $gp. */ +#if (defined _ABI64 && _LARCH_SIM == _ABI64) +#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \ + asm (SECTION_OP "\n\ + la $t8, " USER_LABEL_PREFIX #FUNC "\n\ + jirl $ra, $t8, 0\n\ + " TEXT_SECTION_ASM_OP); +#endif +#ifndef HAVE_AS_TLS +#define HAVE_AS_TLS 0 +#endif + +#ifndef HAVE_AS_NAN +#define HAVE_AS_NAN 0 +#endif + +#ifndef USED_FOR_TARGET +/* Information about ".set noFOO; ...; .set FOO" blocks. */ +struct loongarch_asm_switch { + /* The FOO in the description above. */ + const char *name; + + /* The current block nesting level, or 0 if we aren't in a block. */ + int nesting_level; +}; + +extern const enum reg_class loongarch_regno_to_class[]; +extern const char *current_function_file; /* filename current function is in */ +extern int num_source_filenames; /* current .file # */ +extern int loongarch_dbx_regno[]; +extern int loongarch_dwarf_regno[]; +extern bool loongarch_split_p[]; +extern bool loongarch_use_pcrel_pool_p[]; +extern enum processor loongarch_arch; /* which cpu to codegen for */ +extern enum processor loongarch_tune; /* which cpu to schedule for */ +extern int loongarch_isa; /* architectural level */ +extern int loongarch_isa_rev; +extern const struct loongarch_cpu_info *loongarch_arch_info; +extern const struct loongarch_cpu_info *loongarch_tune_info; +extern unsigned int loongarch_base_compression_flags; + +/* Information about a function's frame layout. */ +struct GTY(()) loongarch_frame_info { + /* The size of the frame in bytes. */ + HOST_WIDE_INT total_size; + + /* The number of bytes allocated to variables. */ + HOST_WIDE_INT var_size; + + /* The number of bytes allocated to outgoing function arguments. */ + HOST_WIDE_INT args_size; + + /* The number of bytes allocated to the .cprestore slot, or 0 if there + is no such slot. */ + HOST_WIDE_INT cprestore_size; + + /* Bit X is set if the function saves or restores GPR X. */ + unsigned int mask; + + /* Likewise FPR X. */ + unsigned int fmask; + + /* Likewise doubleword accumulator X ($acX). */ + unsigned int acc_mask; + + /* The number of GPRs, FPRs, doubleword accumulators and COP0 + registers saved. */ + unsigned int num_gp; + unsigned int num_fp; + unsigned int num_acc; + unsigned int num_cop0_regs; + + /* The offset of the topmost GPR, FPR, accumulator and COP0-register + save slots from the top of the frame, or zero if no such slots are + needed. */ + HOST_WIDE_INT gp_save_offset; + HOST_WIDE_INT fp_save_offset; + HOST_WIDE_INT acc_save_offset; + HOST_WIDE_INT cop0_save_offset; + + /* Likewise, but giving offsets from the bottom of the frame. */ + HOST_WIDE_INT gp_sp_offset; + HOST_WIDE_INT fp_sp_offset; + HOST_WIDE_INT acc_sp_offset; + HOST_WIDE_INT cop0_sp_offset; + + /* Similar, but the value passed to _mcount. */ + HOST_WIDE_INT ra_fp_offset; + + /* The offset of arg_pointer_rtx from the bottom of the frame. */ + HOST_WIDE_INT arg_pointer_offset; + + /* The offset of hard_frame_pointer_rtx from the bottom of the frame. */ + HOST_WIDE_INT hard_frame_pointer_offset; + + /* How much the GPR save/restore routines adjust sp (or 0 if unused). */ + unsigned save_libcall_adjustment; + + /* Offset of virtual frame pointer from stack pointer/frame bottom */ + HOST_WIDE_INT frame_pointer_offset; +}; + +/* Enumeration for masked vectored (VI) and non-masked (EIC) interrupts. */ +enum loongarch_int_mask +{ + INT_MASK_EIC = -1, + INT_MASK_SW0 = 0, + INT_MASK_SW1 = 1, + INT_MASK_HW0 = 2, + INT_MASK_HW1 = 3, + INT_MASK_HW2 = 4, + INT_MASK_HW3 = 5, + INT_MASK_HW4 = 6, + INT_MASK_HW5 = 7 +}; + +/* Enumeration to mark the existence of the shadow register set. + SHADOW_SET_INTSTACK indicates a shadow register set with a valid stack + pointer. */ +enum loongarch_shadow_set +{ + SHADOW_SET_NO, + SHADOW_SET_YES, + SHADOW_SET_INTSTACK +}; + +struct GTY(()) machine_function { + /* The next floating-point condition-code register to allocate + for 8CC targets, relative to ST_REG_FIRST. */ + unsigned int next_fcc; + + /* The number of extra stack bytes taken up by register varargs. + This area is allocated by the callee at the very top of the frame. */ + int varargs_size; + + /* The current frame information, calculated by loongarch_compute_frame_info. */ + struct loongarch_frame_info frame; + + /* How many instructions it takes to load a label into $AT, or 0 if + this property hasn't yet been calculated. */ + unsigned int load_label_num_insns; + + /* True if loongarch_adjust_insn_length should ignore an instruction's + hazard attribute. */ + bool ignore_hazard_length_p; + + /* True if the whole function is suitable for .set noreorder and + .set nomacro. */ + bool all_noreorder_p; + + /* True if the function has "inflexible" and "flexible" references + to the global pointer. See loongarch_cfun_has_inflexible_gp_ref_p + and loongarch_cfun_has_flexible_gp_ref_p for details. */ + bool has_inflexible_gp_insn_p; + bool has_flexible_gp_insn_p; + + /* True if the function's prologue must load the global pointer + value into pic_offset_table_rtx and store the same value in + the function's cprestore slot (if any). Even if this value + is currently false, we may decide to set it to true later; + see loongarch_must_initialize_gp_p () for details. */ + bool must_initialize_gp_p; + + /* True if the current function must restore $gp after any potential + clobber. This value is only meaningful during the first post-epilogue + split_insns pass; see loongarch_must_initialize_gp_p () for details. */ + bool must_restore_gp_when_clobbered_p; + + /* True if this is an interrupt handler. */ + bool interrupt_handler_p; + + /* Records the way in which interrupts should be masked. Only used if + interrupts are not kept masked. */ + enum loongarch_int_mask int_mask; + + /* Records if this is an interrupt handler that uses shadow registers. */ + enum loongarch_shadow_set use_shadow_register_set; + + /* True if this is an interrupt handler that should keep interrupts + masked. */ + bool keep_interrupts_masked_p; + + /* True if this is an interrupt handler that should use DERET + instead of ERET. */ + bool use_debug_exception_return_p; + + /* True if at least one of the formal parameters to a function must be + written to the frame header (probably so its address can be taken). */ + bool does_not_use_frame_header; + + /* True if none of the functions that are called by this function need + stack space allocated for their arguments. */ + bool optimize_call_stack; + + /* True if one of the functions calling this function may not allocate + a frame header. */ + bool callers_may_not_allocate_frame; + + /* True if GCC stored callee saved registers in the frame header. */ + bool use_frame_header_for_callee_saved_regs; +}; +#endif + +/* Enable querying of DFA units. */ +#define CPU_UNITS_QUERY 0 + +/* As on most targets, we want the .eh_frame section to be read-only where + possible. And as on most targets, this means two things: + + (a) Non-locally-binding pointers must have an indirect encoding, + so that the addresses in the .eh_frame section itself become + locally-binding. + + (b) A shared library's .eh_frame section must encode locally-binding + pointers in a relative (relocation-free) form. + + However, LARCH has traditionally not allowed directives like: + + .long x-. + + in cases where "x" is in a different section, or is not defined in the + same assembly file. We are therefore unable to emit the PC-relative + form required by (b) at assembly time. + + Fortunately, the linker is able to convert absolute addresses into + PC-relative addresses on our behalf. Unfortunately, only certain + versions of the linker know how to do this for indirect pointers, + and for personality data. We must fall back on using writable + .eh_frame sections for shared libraries if the linker does not + support this feature. */ +#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL) \ + (((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_absptr) + +#define SWITCHABLE_TARGET 1 + +/* Several named LARCH patterns depend on Pmode. These patterns have the + form _si for Pmode == SImode and _di for Pmode == DImode. + Add the appropriate suffix to generator function NAME and invoke it + with arguments ARGS. */ +#define PMODE_INSN(NAME, ARGS) \ + (Pmode == SImode ? NAME ## _si ARGS : NAME ## _di ARGS) + +/* Load store bonding is not supported by fix_24k. The + performance can be degraded for those targets. Hence, do not bond for + fix_24k. */ +#define ENABLE_LD_ST_PAIRS \ + (TARGET_LOAD_STORE_PAIRS) + + +/* Do emit .note.GNU-stack by default. */ +#ifndef NEED_INDICATE_EXEC_STACK +#define NEED_INDICATE_EXEC_STACK 1 +#endif + +/***********************/ +/* N_LARCH-PORT */ +/***********************/ +/* The `Q' extension is not yet supported. */ +/* TODO: according to march */ +#define UNITS_PER_FP_REG (TARGET_DOUBLE_FLOAT ? 8 : 4) + +/* The largest type that can be passed in floating-point registers. */ +/* TODO: according to mabi */ +#define UNITS_PER_FP_ARG 8 + +/* Internal macros to classify an ISA register's type. */ + +#define GP_TEMP_FIRST (GP_REG_FIRST + 12) + +#define CALLEE_SAVED_REG_NUMBER(REGNO) \ + ((REGNO) >= 22 && (REGNO) <= 31 ? (REGNO) - 22 : -1) + +#define N_LARCH_PROLOGUE_TEMP_REGNUM (GP_TEMP_FIRST + 1) +#define N_LARCH_PROLOGUE_TEMP(MODE) gen_rtx_REG (MODE, N_LARCH_PROLOGUE_TEMP_REGNUM) + +#define LIBCALL_VALUE(MODE) \ + n_loongarch_function_value (NULL_TREE, NULL_TREE, MODE) + +#define FUNCTION_VALUE(VALTYPE, FUNC) \ + n_loongarch_function_value (VALTYPE, FUNC, VOIDmode) + +#define FRAME_GROWS_DOWNWARD 1 + +#define FUNCTION_VALUE_REGNO_P(N) ((N) == GP_RETURN || (N) == FP_RETURN) diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md new file mode 100644 index 0000000000000000000000000000000000000000..b13fba84030d384ad14030c78e641bea92ef23d7 --- /dev/null +++ b/gcc/config/loongarch/loongarch.md @@ -0,0 +1,4332 @@ +;; Loongarch.md Machine Description for LARCH based processors +;; Copyright (C) 1989-2018 Free Software Foundation, Inc. +;; Contributed by A. Lichnewsky, lich@inria.inria.fr +;; Changes by Michael Meissner, meissner@osf.org + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_enum "processor" [ + loongarch + loongarch64 + gs464v +]) + +(define_c_enum "unspec" [ + ;; Integer operations that are too cumbersome to describe directly. + UNSPEC_WSBH + UNSPEC_DSBH + UNSPEC_DSHD + + ;; Floating-point moves. + UNSPEC_LOAD_LOW + UNSPEC_LOAD_HIGH + UNSPEC_STORE_WORD + UNSPEC_MOVGR2FRH + UNSPEC_MOVFRH2GR + + ;; Floating-point environment. + UNSPEC_MOVFCSR2GR + UNSPEC_MOVGR2FCSR + + ;; GP manipulation. + UNSPEC_EH_RETURN + + ;; + UNSPEC_FRINT + UNSPEC_FCLASS + UNSPEC_BYTEPICK_W + UNSPEC_BYTEPICK_D + UNSPEC_BITREV_4B + UNSPEC_BITREV_8B + + ;; Symbolic accesses. + UNSPEC_LOAD_CALL + + ;; Blockage and synchronisation. + UNSPEC_BLOCKAGE + UNSPEC_DBAR + UNSPEC_IBAR + + ;; CPUCFG + UNSPEC_CPUCFG + UNSPEC_ASRTLE_D + UNSPEC_ASRTGT_D + + UNSPEC_CSRRD + UNSPEC_CSRWR + UNSPEC_CSRXCHG + UNSPEC_IOCSRRD + UNSPEC_IOCSRWR + + ;; cacop + UNSPEC_CACOP + + ;; pte + UNSPEC_LDDIR + UNSPEC_LDPTE + + ;; Cache manipulation. + UNSPEC_LARCH_CACHE + + ;; Interrupt handling. + UNSPEC_ERTN + UNSPEC_DI + UNSPEC_EHB + UNSPEC_RDPGPR + + ;; Used in a call expression in place of args_size. It's present for PIC + ;; indirect calls where it contains args_size and the function symbol. + UNSPEC_CALL_ATTR + + + ;; Stack checking. + UNSPEC_PROBE_STACK_RANGE + + ;; The `.insn' pseudo-op. + UNSPEC_INSN_PSEUDO + + ;; TLS + UNSPEC_TLS_GD + UNSPEC_TLS_LD + UNSPEC_TLS_LE + UNSPEC_TLS_IE + + UNSPEC_LU52I_D + + ;; FIXME: Stack tie + UNSPEC_TIE + + ;; CRC + UNSPEC_CRC + UNSPEC_CRCC +]) + +;; FIXME +(define_constants + [(RETURN_ADDR_REGNUM 1) + (T0_REGNUM 12) + (T1_REGNUM 13) + (S0_REGNUM 23) + (S1_REGNUM 24) + (S2_REGNUM 25) + + ;; PIC long branch sequences are never longer than 100 bytes. + (MAX_PIC_BRANCH_LENGTH 100) +]) + +(include "predicates.md") +(include "constraints.md") + +;; .................... +;; +;; Attributes +;; +;; .................... + +(define_attr "got" "unset,load" + (const_string "unset")) + +;; For jal instructions, this attribute is DIRECT when the target address +;; is symbolic and INDIRECT when it is a register. +(define_attr "jal" "unset,direct,indirect" + (const_string "unset")) + + +;; Classification of moves, extensions and truncations. Most values +;; are as for "type" (see below) but there are also the following +;; move-specific values: +;; +;; sll0 "sll DEST,SRC,0", which on 64-bit targets is guaranteed +;; to produce a sign-extended DEST, even if SRC is not +;; properly sign-extended +;; pick_ins BSTRPICK.W, BSTRPICK.D, BSTRINS.W or BSTRINS.D instruction +;; andi a single ANDI instruction +;; shift_shift a shift left followed by a shift right +;; +;; This attribute is used to determine the instruction's length and +;; scheduling type. For doubleword moves, the attribute always describes +;; the split instructions; in some cases, it is more appropriate for the +;; scheduling type to be "multi" instead. +(define_attr "move_type" + "unknown,load,fpload,store,fpstore,mgtf,mftg,imul,move,fmove, + const,signext,pick_ins,logical,arith,sll0,andi,shift_shift" + (const_string "unknown")) + +(define_attr "alu_type" "unknown,add,sub,not,nor,and,or,xor" + (const_string "unknown")) + +;; Main data type used by the insn +(define_attr "mode" "unknown,none,QI,HI,SI,DI,TI,OI,SF,DF,TF,FPSW" + (const_string "unknown")) + +;; True if the main data type is twice the size of a word. +(define_attr "dword_mode" "no,yes" + (cond [(and (eq_attr "mode" "DI,DF") + (not (match_test "TARGET_64BIT"))) + (const_string "yes") + + (and (eq_attr "mode" "TI,TF") + (match_test "TARGET_64BIT")) + (const_string "yes")] + (const_string "no"))) + +;; True if the main data type is four times of the size of a word. +(define_attr "qword_mode" "no,yes" + (cond [(and (eq_attr "mode" "TI,TF") + (not (match_test "TARGET_64BIT"))) + (const_string "yes")] + (const_string "no"))) + +;; True if the main data type is eight times of the size of a word. +(define_attr "oword_mode" "no,yes" + (cond [(and (eq_attr "mode" "OI") + (not (match_test "TARGET_64BIT"))) + (const_string "yes")] + (const_string "no"))) + +;; Attributes describing a sync loop. These loops have the form: +;; +;; if (RELEASE_BARRIER == YES) sync +;; 1: OLDVAL = *MEM +;; if ((OLDVAL & INCLUSIVE_MASK) != REQUIRED_OLDVAL) goto 2 +;; CMP = 0 [delay slot] +;; $TMP1 = OLDVAL & EXCLUSIVE_MASK +;; $TMP2 = INSN1 (OLDVAL, INSN1_OP2) +;; $TMP3 = INSN2 ($TMP2, INCLUSIVE_MASK) +;; $AT |= $TMP1 | $TMP3 +;; if (!commit (*MEM = $AT)) goto 1. +;; if (INSN1 != MOVE && INSN1 != LI) NEWVAL = $TMP3 [delay slot] +;; CMP = 1 +;; if (ACQUIRE_BARRIER == YES) sync +;; 2: +;; +;; where "$" values are temporaries and where the other values are +;; specified by the attributes below. Values are specified as operand +;; numbers and insns are specified as enums. If no operand number is +;; specified, the following values are used instead: +;; +;; - OLDVAL: $AT +;; - CMP: NONE +;; - NEWVAL: $AT +;; - INCLUSIVE_MASK: -1 +;; - REQUIRED_OLDVAL: OLDVAL & INCLUSIVE_MASK +;; - EXCLUSIVE_MASK: 0 +;; +;; MEM and INSN1_OP2 are required. +;; +;; Ideally, the operand attributes would be integers, with -1 meaning "none", +;; but the gen* programs don't yet support that. +(define_attr "sync_mem" "none,0,1,2,3,4,5" (const_string "none")) +(define_attr "sync_oldval" "none,0,1,2,3,4,5" (const_string "none")) +(define_attr "sync_cmp" "none,0,1,2,3,4,5" (const_string "none")) +(define_attr "sync_newval" "none,0,1,2,3,4,5" (const_string "none")) +(define_attr "sync_inclusive_mask" "none,0,1,2,3,4,5" (const_string "none")) +(define_attr "sync_exclusive_mask" "none,0,1,2,3,4,5" (const_string "none")) +(define_attr "sync_required_oldval" "none,0,1,2,3,4,5" (const_string "none")) +(define_attr "sync_insn1_op2" "none,0,1,2,3,4,5" (const_string "none")) +(define_attr "sync_insn1" "move,li,addu,addiu,subu,and,andi,or,ori,xor,xori" + (const_string "move")) +(define_attr "sync_insn2" "nop,and,xor,not" + (const_string "nop")) +;; Memory model specifier. +;; "0"-"9" values specify the operand that stores the memory model value. +;; "10" specifies MEMMODEL_ACQ_REL, +;; "11" specifies MEMMODEL_ACQUIRE. +(define_attr "sync_memmodel" "" (const_int 10)) + +;; Accumulator operand for madd patterns. +(define_attr "accum_in" "none,0,1,2,3,4,5" (const_string "none")) + +;; Classification of each insn. +;; branch conditional branch +;; jump unconditional jump +;; call unconditional call +;; load load instruction(s) +;; fpload floating point load +;; fpidxload floating point indexed load +;; store store instruction(s) +;; fpstore floating point store +;; fpidxstore floating point indexed store +;; prefetch memory prefetch (register + offset) +;; prefetchx memory indexed prefetch (register + register) +;; condmove conditional moves +;; mgtf move generate register to float register +;; mftg move float register to generate register +;; const load constant +;; arith integer arithmetic instructions +;; logical integer logical instructions +;; shift integer shift instructions +;; slt set less than instructions +;; signext sign extend instructions +;; clz the clz and clo instructions +;; trap trap if instructions +;; imul integer multiply 2 operands +;; imul3 integer multiply 3 operands +;; idiv3 integer divide 3 operands +;; move integer register move ({,D}ADD{,U} with rt = 0) +;; fmove floating point register move +;; fadd floating point add/subtract +;; fmul floating point multiply +;; fmadd floating point multiply-add +;; fdiv floating point divide +;; frdiv floating point reciprocal divide +;; fabs floating point absolute value +;; fneg floating point negation +;; fcmp floating point compare +;; fcvt floating point convert +;; fsqrt floating point square root +;; frsqrt floating point reciprocal square root +;; multi multiword sequence (or user asm statements) +;; atomic atomic memory update instruction +;; syncloop memory atomic operation implemented as a sync loop +;; nop no operation +;; ghost an instruction that produces no real code +(define_attr "type" + "unknown,branch,jump,call,load,fpload,fpidxload,store,fpstore,fpidxstore, + prefetch,prefetchx,condmove,mgtf,mftg,const,arith,logical, + shift,slt,signext,clz,trap,imul,imul3,idiv3,move, + fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcvt,fsqrt, + frsqrt,dspmac,dspmacsat,accext,accmod,dspalu,dspalusat, + multi,atomic,syncloop,nop,ghost" + (cond [(eq_attr "jal" "!unset") (const_string "call") + (eq_attr "got" "load") (const_string "load") + + (eq_attr "alu_type" "add,sub") (const_string "arith") + + (eq_attr "alu_type" "not,nor,and,or,xor") (const_string "logical") + + ;; If a doubleword move uses these expensive instructions, + ;; it is usually better to schedule them in the same way + ;; as the singleword form, rather than as "multi". + (eq_attr "move_type" "load") (const_string "load") + (eq_attr "move_type" "fpload") (const_string "fpload") + (eq_attr "move_type" "store") (const_string "store") + (eq_attr "move_type" "fpstore") (const_string "fpstore") + (eq_attr "move_type" "mgtf") (const_string "mgtf") + (eq_attr "move_type" "mftg") (const_string "mftg") + + ;; These types of move are always single insns. + (eq_attr "move_type" "imul") (const_string "imul") + (eq_attr "move_type" "fmove") (const_string "fmove") + (eq_attr "move_type" "signext") (const_string "signext") + (eq_attr "move_type" "pick_ins") (const_string "arith") + (eq_attr "move_type" "arith") (const_string "arith") + (eq_attr "move_type" "logical") (const_string "logical") + (eq_attr "move_type" "sll0") (const_string "shift") + (eq_attr "move_type" "andi") (const_string "logical") + + ;; These types of move are always split. + (eq_attr "move_type" "shift_shift") + (const_string "multi") + + ;; These types of move are split for octaword modes only. + (and (eq_attr "move_type" "move,const") + (eq_attr "oword_mode" "yes")) + (const_string "multi") + + ;; These types of move are split for quadword modes only. + (and (eq_attr "move_type" "move,const") + (eq_attr "qword_mode" "yes")) + (const_string "multi") + + ;; These types of move are split for doubleword modes only. + (and (eq_attr "move_type" "move,const") + (eq_attr "dword_mode" "yes")) + (const_string "multi") + (eq_attr "move_type" "move") (const_string "move") + (eq_attr "move_type" "const") (const_string "const") + (eq_attr "sync_mem" "!none") (const_string "syncloop")] + (const_string "unknown"))) + +(define_attr "compact_form" "always,maybe,never" + (cond [(eq_attr "jal" "direct") + (const_string "always") + (eq_attr "jal" "indirect") + (const_string "maybe") + (eq_attr "type" "jump") + (const_string "maybe")] + (const_string "never"))) + +;; Mode for conversion types (fcvt) +;; I2S integer to float single (SI/DI to SF) +;; I2D integer to float double (SI/DI to DF) +;; S2I float to integer (SF to SI/DI) +;; D2I float to integer (DF to SI/DI) +;; D2S double to float single +;; S2D float single to double + +(define_attr "cnv_mode" "unknown,I2S,I2D,S2I,D2I,D2S,S2D" + (const_string "unknown")) + +(define_attr "compression" "none,all" + (const_string "none")) + +;; The number of individual instructions that a non-branch pattern generates, +;; using units of BASE_INSN_LENGTH. +(define_attr "insn_count" "" + (cond [;; "Ghost" instructions occupy no space. + (eq_attr "type" "ghost") + (const_int 0) + + ;; Check for doubleword moves that are decomposed into two + ;; instructions. + (and (eq_attr "move_type" "mgtf,mftg,move") + (eq_attr "dword_mode" "yes")) + (const_int 2) + + ;; Check for quadword moves that are decomposed into four + ;; instructions. + (and (eq_attr "move_type" "mgtf,mftg,move") + (eq_attr "qword_mode" "yes")) + (const_int 4) + + ;; Check for Octaword moves that are decomposed into eight + ;; instructions. + (and (eq_attr "move_type" "mgtf,mftg,move") + (eq_attr "oword_mode" "yes")) + (const_int 8) + + ;; Constants, loads and stores are handled by external routines. + (and (eq_attr "move_type" "const") + (eq_attr "dword_mode" "yes")) + (symbol_ref "loongarch_split_const_insns (operands[1])") + (eq_attr "move_type" "const") + (symbol_ref "loongarch_const_insns (operands[1])") + (eq_attr "move_type" "load,fpload") + (symbol_ref "loongarch_load_store_insns (operands[1], insn)") + (eq_attr "move_type" "store,fpstore") + (symbol_ref "loongarch_load_store_insns (operands[0], insn)") + + (eq_attr "type" "idiv3") + (symbol_ref "loongarch_idiv_insns (GET_MODE (PATTERN (insn)))")] +(const_int 1))) + +;; Length of instruction in bytes. The default is derived from "insn_count", +;; but there are special cases for branches (which must be handled here) +;; and for compressed single instructions. + + + +(define_attr "length" "" + (cond [ + ;; Branch instructions have a range of [-0x20000,0x1fffc]. + ;; If a branch is outside this range, we have a choice of two + ;; sequences. + ;; + ;; For PIC, an out-of-range branch like: + ;; + ;; bne r1,r2,target + ;; + ;; becomes the equivalent of: + ;; + ;; beq r1,r2,1f + ;; la rd,target + ;; jirl zero,rd,0 + ;; 1: + ;; + ;; The non-PIC case is similar except that we use a direct + ;; jump instead of an la/jr pair. Since the target of this + ;; jump is an absolute 28-bit bit address (the other bits + ;; coming from the address of the delay slot) this form cannot + ;; cross a 256MB boundary. We could provide the option of + ;; using la/jr in this case too, but we do not do so at + ;; present. + ;; + ;; from the shorten_branches reference address. + (eq_attr "type" "branch") + (cond [;; Any variant can handle the 17-bit range. + (and (le (minus (match_dup 0) (pc)) (const_int 65532)) + (le (minus (pc) (match_dup 0)) (const_int 65534))) + (const_int 4) + + ;; The non-PIC case: branch, and J. + (match_test "TARGET_ABSOLUTE_JUMPS") + (const_int 8)] + + ;; Use MAX_PIC_BRANCH_LENGTH as a (gross) overestimate. + ;; loongarch_adjust_insn_length substitutes the correct length. + ;; + ;; Note that we can't simply use (symbol_ref ...) here + ;; because genattrtab needs to know the maximum length + ;; of an insn. + (const_int MAX_PIC_BRANCH_LENGTH)) + ] + (symbol_ref "get_attr_insn_count (insn) * BASE_INSN_LENGTH"))) + +;; Attribute describing the processor. +(define_enum_attr "cpu" "processor" + (const (symbol_ref "loongarch_tune"))) + +;; The type of hardware hazard associated with this instruction. +;; DELAY means that the next instruction cannot read the result +;; of this one. +(define_attr "hazard" "none,delay,forbidden_slot" + (const_string "none")) + +;; Can the instruction be put into a delay slot? +(define_attr "can_delay" "no,yes" + (if_then_else (and (eq_attr "type" "!branch,call,jump") + (eq_attr "hazard" "none") + (match_test "get_attr_insn_count (insn) == 1")) + (const_string "yes") + (const_string "no"))) + +;; Describe a user's asm statement. +(define_asm_attributes + [(set_attr "type" "multi") + (set_attr "can_delay" "no")]) + +;; This mode iterator allows 32-bit and 64-bit GPR patterns to be generated +;; from the same template. +(define_mode_iterator GPR [SI (DI "TARGET_64BIT")]) + +;; A copy of GPR that can be used when a pattern has two independent +;; modes. +(define_mode_iterator GPR2 [SI (DI "TARGET_64BIT")]) + +;; Likewise, but for XLEN-sized quantities. +(define_mode_iterator X [(SI "!TARGET_64BIT") (DI "TARGET_64BIT")]) + +(define_mode_iterator MOVEP1 [SI SF]) +(define_mode_iterator MOVEP2 [SI SF]) +(define_mode_iterator JOIN_MODE [HI + SI + (SF "TARGET_HARD_FLOAT") + (DF "TARGET_HARD_FLOAT + && TARGET_DOUBLE_FLOAT")]) + +;; This mode iterator allows :P to be used for patterns that operate on +;; pointer-sized quantities. Exactly one of the two alternatives will match. +(define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")]) + +;; This mode iterator allows :MOVECC to be used anywhere that a +;; conditional-move-type condition is needed. +(define_mode_iterator MOVECC [SI (DI "TARGET_64BIT") + (CC "TARGET_HARD_FLOAT")]) + +;; 32-bit integer moves for which we provide move patterns. +(define_mode_iterator IMOVE32 + [SI]) + +;; 64-bit modes for which we provide move patterns. +(define_mode_iterator MOVE64 + [DI DF]) + +;; 128-bit modes for which we provide move patterns on 64-bit targets. +(define_mode_iterator MOVE128 [TI TF]) + +;; This mode iterator allows the QI and HI extension patterns to be +;; defined from the same template. +(define_mode_iterator SHORT [QI HI]) + +;; Likewise the 64-bit truncate-and-shift patterns. +(define_mode_iterator SUBDI [QI HI SI]) + +;; This mode iterator allows the QI HI SI and DI extension patterns to be +(define_mode_iterator QHWD [QI HI SI (DI "TARGET_64BIT")]) + + +;; This mode iterator allows :ANYF to be used wherever a scalar or vector +;; floating-point mode is allowed. +(define_mode_iterator ANYF [(SF "TARGET_HARD_FLOAT") + (DF "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT")]) + +;; Like ANYF, but only applies to scalar modes. +(define_mode_iterator SCALARF [(SF "TARGET_HARD_FLOAT") + (DF "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT")]) + +;; A floating-point mode for which moves involving FPRs may need to be split. +(define_mode_iterator SPLITF + [(DF "!TARGET_64BIT && TARGET_DOUBLE_FLOAT") + (DI "!TARGET_64BIT && TARGET_DOUBLE_FLOAT") + (TF "TARGET_64BIT && TARGET_FLOAT64")]) + +;; In GPR templates, a string like "mul." will expand to "mul" in the +;; 32-bit "mul.w" and "mul.d" in the 64-bit version. +(define_mode_attr d [(SI "w") (DI "d")]) + +;; Same as d but upper-case. +(define_mode_attr D [(SI "") (DI "D")]) + +;; This attribute gives the length suffix for a load or store instruction. +;; The same suffixes work for zero and sign extensions. +(define_mode_attr size [(QI "b") (HI "h") (SI "w") (DI "d")]) +(define_mode_attr SIZE [(QI "B") (HI "H") (SI "W") (DI "D")]) + +;; This attributes gives the mode mask of a SHORT. +(define_mode_attr mask [(QI "0x00ff") (HI "0xffff")]) + +;; This attributes gives the size (bits) of a SHORT. +(define_mode_attr qi_hi [(QI "7") (HI "15")]) + +;; Mode attributes for GPR loads. +(define_mode_attr load [(SI "lw") (DI "ld")]) + +(define_mode_attr load_l [(SI "ld.w") (DI "ld.d")]) +;; Instruction names for stores. +(define_mode_attr store [(QI "sb") (HI "sh") (SI "sw") (DI "sd")]) + +;; Similarly for LARCH IV indexed FPR loads and stores. +(define_mode_attr floadx [(SF "fldx.s") (DF "fldx.d") (V2SF "fldx.d")]) +(define_mode_attr fstorex [(SF "fstx.s") (DF "fstx.d") (V2SF "fstx.d")]) + +;; Similarly for LOONGSON indexed GPR loads and stores. +(define_mode_attr loadx [(QI "ldx.b") + (HI "ldx.h") + (SI "ldx.w") + (DI "ldx.d")]) +(define_mode_attr storex [(QI "stx.b") + (HI "stx.h") + (SI "stx.w") + (DI "stx.d")]) + +;; This attribute gives the best constraint to use for registers of +;; a given mode. +(define_mode_attr reg [(SI "d") (DI "d") (CC "z")]) + +;; This attribute gives the format suffix for floating-point operations. +(define_mode_attr fmt [(SF "s") (DF "d")]) + +;; This attribute gives the upper-case mode name for one unit of a +;; floating-point mode or vector mode. +(define_mode_attr UNITMODE [(SF "SF") (DF "DF") (V2SF "SF")]) + +;; As above, but in lower case. +(define_mode_attr unitmode [(SF "sf") (DF "df") (V2SF "sf")]) + + +;; This attribute gives the integer mode that has half the size of +;; the controlling mode. +(define_mode_attr HALFMODE [(DF "SI") (DI "SI") (V2SF "SI") + (TF "DI")]) + +(define_mode_attr p [(SI "") (DI "d")]) + +;; This attribute works around the early SB-1 rev2 core "F2" erratum: +;; +;; In certain cases, div.s and div.ps may have a rounding error +;; and/or wrong inexact flag. +;; +;; Therefore, we only allow div.s if not working around SB-1 rev2 +;; errata or if a slight loss of precision is OK. +(define_mode_attr divide_condition + [DF (SF "flag_unsafe_math_optimizations")]) + +;; This attribute gives the conditions under which SQRT.fmt instructions +;; can be used. +(define_mode_attr sqrt_condition + [SF DF]) + +;; This code iterator allows signed and unsigned widening multiplications +;; to use the same template. +(define_code_iterator any_extend [sign_extend zero_extend]) + +;; This code iterator allows the two right shift instructions to be +;; generated from the same template. +(define_code_iterator any_shiftrt [ashiftrt lshiftrt]) + +;; This code iterator allows the three shift instructions to be generated +;; from the same template. +(define_code_iterator any_shift [ashift ashiftrt lshiftrt]) + +;; This code iterator allows unsigned and signed division to be generated +;; from the same template. +(define_code_iterator any_div [div udiv]) + +;; This code iterator allows unsigned and signed modulus to be generated +;; from the same template. +(define_code_iterator any_mod [mod umod]) + +;; This code iterator allows addition and subtraction to be generated +;; from the same template. +(define_code_iterator addsub [plus minus]) + +;; This code iterator allows addition and multiplication to be generated +;; from the same template. +(define_code_iterator addmul [plus mult]) + +;; This code iterator allows addition subtraction and multiplication to be generated +;; from the same template +(define_code_iterator addsubmul [plus minus mult]) + +;; This code iterator allows all native floating-point comparisons to be +;; generated from the same template. +(define_code_iterator fcond [unordered uneq unlt unle eq lt le ordered ltgt ne]) + +;; This code iterator is used for comparisons that can be implemented +;; by swapping the operands. +(define_code_iterator swapped_fcond [ge gt unge ungt]) + +;; Equality operators. +(define_code_iterator equality_op [eq ne]) + +;; These code iterators allow the signed and unsigned scc operations to use +;; the same template. +(define_code_iterator any_gt [gt gtu]) +(define_code_iterator any_ge [ge geu]) +(define_code_iterator any_lt [lt ltu]) +(define_code_iterator any_le [le leu]) + +(define_code_iterator any_return [return simple_return]) + +;; expands to an empty string when doing a signed operation and +;; "u" when doing an unsigned operation. +(define_code_attr u [(sign_extend "") (zero_extend "u") + (div "") (udiv "u") + (mod "") (umod "u") + (gt "") (gtu "u") + (ge "") (geu "u") + (lt "") (ltu "u") + (le "") (leu "u")]) + +;; is like except uppercase. +(define_code_attr U [(sign_extend "") (zero_extend "U")]) + +;; is like , but the signed form expands to "s" rather than "". +(define_code_attr su [(sign_extend "s") (zero_extend "u")]) + +;; expands to the name of the optab for a particular code. +(define_code_attr optab [(ashift "ashl") + (ashiftrt "ashr") + (lshiftrt "lshr") + (ior "ior") + (xor "xor") + (and "and") + (plus "add") + (minus "sub") + (mult "mul") + (return "return") + (simple_return "simple_return")]) + +;; expands to the name of the insn that implements a particular code. +(define_code_attr insn [(ashift "sll") + (ashiftrt "sra") + (lshiftrt "srl") + (ior "or") + (xor "xor") + (and "and") + (plus "addu") + (minus "subu")]) + +;; expands to the name of the insn that implements +;; a particular code to operate on immediate values. +(define_code_attr immediate_insn [(ior "ori") + (xor "xori") + (and "andi")]) + +;; is the c.cond.fmt condition associated with a particular code. +(define_code_attr fcond [(unordered "cun") + (uneq "cueq") + (unlt "cult") + (unle "cule") + (eq "ceq") + (lt "slt") + (le "sle") + (ordered "cor") + (ltgt "cne") + (ne "cune")]) + +;; Similar, but for swapped conditions. +(define_code_attr swapped_fcond [(ge "sle") + (gt "slt") + (unge "cule") + (ungt "cult")]) + +;; The value of the bit when the branch is taken for branch_bit patterns. +;; Comparison is always against zero so this depends on the operator. +(define_code_attr bbv [(eq "0") (ne "1")]) + +;; This is the inverse value of bbv. +(define_code_attr bbinv [(eq "1") (ne "0")]) + +;; The sel mnemonic to use depending on the condition test. +(define_code_attr sel [(eq "masknez") (ne "maskeqz")]) +(define_code_attr selinv [(eq "maskeqz") (ne "masknez")]) + +;; Pipeline descriptions. +;; +;; generic.md provides a fallback for processors without a specific +;; pipeline description. It is derived from the old define_function_unit +;; version and uses the "alu" and "imuldiv" units declared below. +;; +;; Some of the processor-specific files are also derived from old +;; define_function_unit descriptions and simply override the parts of +;; generic.md that don't apply. The other processor-specific files +;; are self-contained. +(define_automaton "alu,imuldiv") + +(define_cpu_unit "alu" "alu") +(define_cpu_unit "imuldiv" "imuldiv") + +;; Ghost instructions produce no real code and introduce no hazards. +;; They exist purely to express an effect on dataflow. +(define_insn_reservation "ghost" 0 + (eq_attr "type" "ghost") + "nothing") + +(include "generic.md") + +;; +;; .................... +;; +;; CONDITIONAL TRAPS +;; +;; .................... +;; + +(define_insn "trap" + [(trap_if (const_int 1) (const_int 0))] + "" +{ + return "break\t0"; +} + [(set_attr "type" "trap")]) + + + +;; +;; .................... +;; +;; ADDITION +;; +;; .................... +;; + +(define_insn "add3" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (plus:ANYF (match_operand:ANYF 1 "register_operand" "f") + (match_operand:ANYF 2 "register_operand" "f")))] + "" + "fadd.\t%0,%1,%2" + [(set_attr "type" "fadd") + (set_attr "mode" "")]) + +(define_expand "add3" + [(set (match_operand:GPR 0 "register_operand") + (plus:GPR (match_operand:GPR 1 "register_operand") + (match_operand:GPR 2 "arith_operand")))] + "") + +(define_insn "*add3" + [(set (match_operand:GPR 0 "register_operand" "=d,d") + (plus:GPR (match_operand:GPR 1 "register_operand" "d,d") + (match_operand:GPR 2 "arith_operand" "d,Q")))] + "" +{ + if (which_alternative == 0) + return "add.\t%0,%1,%2"; + else + return "addi.\t%0,%1,%2"; +} + [(set_attr "alu_type" "add") + (set_attr "compression" "*,*") + (set_attr "mode" "")]) + + +(define_insn "*addsi3_extended" + [(set (match_operand:DI 0 "register_operand" "=d,d") + (sign_extend:DI + (plus:SI (match_operand:SI 1 "register_operand" "d,d") + (match_operand:SI 2 "arith_operand" "d,Q"))))] + "TARGET_64BIT" + "@ + add.w\t%0,%1,%2 + addi.w\t%0,%1,%2" + [(set_attr "alu_type" "add") + (set_attr "mode" "SI")]) + + +;; +;; .................... +;; +;; SUBTRACTION +;; +;; .................... +;; + +(define_insn "sub3" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (minus:ANYF (match_operand:ANYF 1 "register_operand" "f") + (match_operand:ANYF 2 "register_operand" "f")))] + "" + "fsub.\t%0,%1,%2" + [(set_attr "type" "fadd") + (set_attr "mode" "")]) + +(define_insn "sub3" + [(set (match_operand:GPR 0 "register_operand" "=d") + (minus:GPR (match_operand:GPR 1 "register_operand" "d") + (match_operand:GPR 2 "register_operand" "d")))] + "" + "sub.\t%0,%1,%2" + [(set_attr "alu_type" "sub") + (set_attr "compression" "*") + (set_attr "mode" "")]) + +(define_insn "*subsi3_extended" + [(set (match_operand:DI 0 "register_operand" "=d") + (sign_extend:DI + (minus:SI (match_operand:SI 1 "register_operand" "d") + (match_operand:SI 2 "register_operand" "d"))))] + "TARGET_64BIT" + "sub.w\t%0,%1,%2" + [(set_attr "alu_type" "sub") + (set_attr "mode" "DI")]) + +;; +;; .................... +;; +;; MULTIPLICATION +;; +;; .................... +;; + +(define_expand "mul3" + [(set (match_operand:SCALARF 0 "register_operand") + (mult:SCALARF (match_operand:SCALARF 1 "register_operand") + (match_operand:SCALARF 2 "register_operand")))] + "" + "") + +(define_insn "*mul3" + [(set (match_operand:SCALARF 0 "register_operand" "=f") + (mult:SCALARF (match_operand:SCALARF 1 "register_operand" "f") + (match_operand:SCALARF 2 "register_operand" "f")))] + "" + "fmul.\t%0,%1,%2" + [(set_attr "type" "fmul") + (set_attr "mode" "")]) + +(define_insn "mul3" + [(set (match_operand:GPR 0 "register_operand" "=d") + (mult:GPR (match_operand:GPR 1 "register_operand" "d") + (match_operand:GPR 2 "register_operand" "d")))] + "" + "mul.\t%0,%1,%2" + [(set_attr "type" "imul3") + (set_attr "mode" "")]) + + + +(define_insn "mulsidi3_64bit" + [(set (match_operand:DI 0 "register_operand" "=d") + (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "d")) + (sign_extend:DI (match_operand:SI 2 "register_operand" "d"))))] + "" + "mul.d\t%0,%1,%2" + [(set_attr "type" "imul3") + (set_attr "mode" "DI")]) + + +;; FIXME; Copy from n_loongarch.md. +;;(define_insn "*mulsi3_extended" +;; [(set (match_operand:DI 0 "register_operand" "=r") +;; (sign_extend:DI +;; (mult:SI (match_operand:SI 1 "register_operand" " r") +;; (match_operand:SI 2 "register_operand" " r"))))] +;; "TARGET_64BIT" +;; "mulw\t%0,%1,%2" +;; [(set_attr "type" "imul") +;; (set_attr "mode" "SI")]) +;; +;;(define_insn "*mulsi3_extended2" +;; [(set (match_operand:DI 0 "register_operand" "=r") +;; (sign_extend:DI +;; (subreg:SI (mult:DI (match_operand:DI 1 "register_operand" " r") +;; (match_operand:DI 2 "register_operand" " r")) +;; 0)))] +;; "TARGET_64BIT" +;; "mulw\t%0,%1,%2" +;; [(set_attr "type" "imul") +;; (set_attr "mode" "SI")]) + + +;; +;; ........................ +;; +;; MULTIPLICATION HIGH-PART +;; +;; ........................ +;; + + +(define_expand "mulditi3" + [(set (match_operand:TI 0 "register_operand") + (mult:TI (any_extend:TI (match_operand:DI 1 "register_operand")) + (any_extend:TI (match_operand:DI 2 "register_operand"))))] + "TARGET_64BIT" +{ + rtx low = gen_reg_rtx (DImode); + emit_insn (gen_muldi3 (low, operands[1], operands[2])); + + rtx high = gen_reg_rtx (DImode); + emit_insn (gen_muldi3_highpart (high, operands[1], operands[2])); + + emit_move_insn (gen_lowpart (DImode, operands[0]), low); + emit_move_insn (gen_highpart (DImode, operands[0]), high); + DONE; +}) + +(define_insn "muldi3_highpart" + [(set (match_operand:DI 0 "register_operand" "=r") + (truncate:DI + (lshiftrt:TI + (mult:TI (any_extend:TI + (match_operand:DI 1 "register_operand" " r")) + (any_extend:TI + (match_operand:DI 2 "register_operand" " r"))) + (const_int 64))))] + "TARGET_64BIT" + "mulh.d\t%0,%1,%2" + [(set_attr "type" "imul") + (set_attr "mode" "DI")]) + +(define_expand "mulsidi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (mult:DI (any_extend:DI + (match_operand:SI 1 "register_operand" " r")) + (any_extend:DI + (match_operand:SI 2 "register_operand" " r"))))] + "!TARGET_64BIT" +{ + rtx temp = gen_reg_rtx (SImode); + emit_insn (gen_mulsi3 (temp, operands[1], operands[2])); + emit_insn (gen_mulsi3_highpart (loongarch_subword (operands[0], true), + operands[1], operands[2])); + emit_insn (gen_movsi (loongarch_subword (operands[0], false), temp)); + DONE; +}) + +(define_insn "mulsi3_highpart" + [(set (match_operand:SI 0 "register_operand" "=r") + (truncate:SI + (lshiftrt:DI + (mult:DI (any_extend:DI + (match_operand:SI 1 "register_operand" " r")) + (any_extend:DI + (match_operand:SI 2 "register_operand" " r"))) + (const_int 32))))] + "!TARGET_64BIT" + "mulh.w\t%0,%1,%2" + [(set_attr "type" "imul") + (set_attr "mode" "SI")]) + +;; Floating point multiply accumulate instructions. + +(define_expand "fma4" + [(set (match_operand:ANYF 0 "register_operand") + (fma:ANYF (match_operand:ANYF 1 "register_operand") + (match_operand:ANYF 2 "register_operand") + (match_operand:ANYF 3 "register_operand")))] + "TARGET_HARD_FLOAT") + +(define_insn "*fma4_madd4" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (fma:ANYF (match_operand:ANYF 1 "register_operand" "f") + (match_operand:ANYF 2 "register_operand" "f") + (match_operand:ANYF 3 "register_operand" "f")))] + "TARGET_HARD_FLOAT" + "fmadd.\t%0,%1,%2,%3" + [(set_attr "type" "fmadd") + (set_attr "mode" "")]) + +;; The fms, fnma, and fnms instructions can be used even when HONOR_NANS +;; is true because while IEEE 754-2008 requires the negate operation to +;; negate the sign of a NAN and the LARCH neg instruction does not do this, +;; the fma part of the instruction has no requirement on how the sign of +;; a NAN is handled and so the final sign bit of the entire operation is +;; undefined. + +(define_expand "fms4" + [(set (match_operand:ANYF 0 "register_operand") + (fma:ANYF (match_operand:ANYF 1 "register_operand") + (match_operand:ANYF 2 "register_operand") + (neg:ANYF (match_operand:ANYF 3 "register_operand"))))] + "TARGET_HARD_FLOAT") + + +(define_insn "*fms4_msub4" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (fma:ANYF (match_operand:ANYF 1 "register_operand" "f") + (match_operand:ANYF 2 "register_operand" "f") + (neg:ANYF (match_operand:ANYF 3 "register_operand" "f"))))] + "TARGET_HARD_FLOAT" + "fmsub.\t%0,%1,%2,%3" + [(set_attr "type" "fmadd") + (set_attr "mode" "")]) + +;; fnma is defined in GCC as (fma (neg op1) op2 op3) +;; (-op1 * op2) + op3 ==> -(op1 * op2) + op3 ==> -((op1 * op2) - op3) +;; The loongarch nmsub instructions implement -((op1 * op2) - op3) +;; This transformation means we may return the wrong signed zero +;; so we check HONOR_SIGNED_ZEROS. + +(define_expand "fnma4" + [(set (match_operand:ANYF 0 "register_operand") + (fma:ANYF (neg:ANYF (match_operand:ANYF 1 "register_operand")) + (match_operand:ANYF 2 "register_operand") + (match_operand:ANYF 3 "register_operand")))] + "TARGET_HARD_FLOAT && !HONOR_SIGNED_ZEROS (mode)") + +(define_insn "*fnma4_nmsub4" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (fma:ANYF (neg:ANYF (match_operand:ANYF 1 "register_operand" "f")) + (match_operand:ANYF 2 "register_operand" "f") + (match_operand:ANYF 3 "register_operand" "f")))] + "TARGET_HARD_FLOAT && !HONOR_SIGNED_ZEROS (mode)" + "fnmsub.\t%0,%1,%2,%3" + [(set_attr "type" "fmadd") + (set_attr "mode" "")]) + +;; fnms is defined as: (fma (neg op1) op2 (neg op3)) +;; ((-op1) * op2) - op3 ==> -(op1 * op2) - op3 ==> -((op1 * op2) + op3) +;; The loongarch nmadd instructions implement -((op1 * op2) + op3) +;; This transformation means we may return the wrong signed zero +;; so we check HONOR_SIGNED_ZEROS. + +(define_expand "fnms4" + [(set (match_operand:ANYF 0 "register_operand") + (fma:ANYF + (neg:ANYF (match_operand:ANYF 1 "register_operand")) + (match_operand:ANYF 2 "register_operand") + (neg:ANYF (match_operand:ANYF 3 "register_operand"))))] + "TARGET_HARD_FLOAT && !HONOR_SIGNED_ZEROS (mode)") + +(define_insn "*fnms4_nmadd4" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (fma:ANYF + (neg:ANYF (match_operand:ANYF 1 "register_operand" "f")) + (match_operand:ANYF 2 "register_operand" "f") + (neg:ANYF (match_operand:ANYF 3 "register_operand" "f"))))] + "TARGET_HARD_FLOAT && !HONOR_SIGNED_ZEROS (mode)" + "fnmadd.\t%0,%1,%2,%3" + [(set_attr "type" "fmadd") + (set_attr "mode" "")]) + +;; +;; .................... +;; +;; DIVISION and REMAINDER +;; +;; .................... +;; + +(define_expand "div3" + [(set (match_operand:ANYF 0 "register_operand") + (div:ANYF (match_operand:ANYF 1 "reg_or_1_operand") + (match_operand:ANYF 2 "register_operand")))] + "" +{ + if (const_1_operand (operands[1], mode)) + if (!(ISA_HAS_FP_RECIP_RSQRT (mode) + && flag_unsafe_math_optimizations)) + operands[1] = force_reg (mode, operands[1]); +}) + +;; These patterns work around the early SB-1 rev2 core "F1" erratum: +;; +;; If an mftg1 or dmftg1 happens to access the floating point register +;; file at the same time a long latency operation (div, sqrt, recip, +;; sqrt) iterates an intermediate result back through the floating +;; point register file bypass, then instead returning the correct +;; register value the mftg1 or dmftg1 operation returns the intermediate +;; result of the long latency operation. +;; +;; The workaround is to insert an unconditional 'mov' from/to the +;; long latency op destination register. + +(define_insn "*div3" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (div:ANYF (match_operand:ANYF 1 "register_operand" "f") + (match_operand:ANYF 2 "register_operand" "f")))] + "" +{ + return "fdiv.\t%0,%1,%2"; +} + [(set_attr "type" "fdiv") + (set_attr "mode" "") + (set_attr "insn_count" "1")]) + +(define_insn "*recip3" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (div:ANYF (match_operand:ANYF 1 "const_1_operand" "") + (match_operand:ANYF 2 "register_operand" "f")))] + "ISA_HAS_FP_RECIP_RSQRT (mode) && flag_unsafe_math_optimizations" +{ + return "frecip.\t%0,%2"; +} + [(set_attr "type" "frdiv") + (set_attr "mode" "") + (set_attr "insn_count" "1")]) + +;; Integer division and modulus. + +(define_insn "div3" + [(set (match_operand:GPR 0 "register_operand" "=&d") + (any_div:GPR (match_operand:GPR 1 "register_operand" "d") + (match_operand:GPR 2 "register_operand" "d")))] + "" + { + return loongarch_output_division ("div.\t%0,%1,%2", operands); + } + [(set_attr "type" "idiv3") + (set_attr "mode" "")]) + +(define_insn "mod3" + [(set (match_operand:GPR 0 "register_operand" "=&d") + (any_mod:GPR (match_operand:GPR 1 "register_operand" "d") + (match_operand:GPR 2 "register_operand" "d")))] + "" + { + return loongarch_output_division ("mod.\t%0,%1,%2", operands); + } + [(set_attr "type" "idiv3") + (set_attr "mode" "")]) + +;; +;; .................... +;; +;; SQUARE ROOT +;; +;; .................... + +;; These patterns work around the early SB-1 rev2 core "F1" erratum (see +;; "*div[sd]f3" comment for details). + +(define_insn "sqrt2" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (sqrt:ANYF (match_operand:ANYF 1 "register_operand" "f")))] + "" +{ + return "fsqrt.\t%0,%1"; +} + [(set_attr "type" "fsqrt") + (set_attr "mode" "") + (set_attr "insn_count" "1")]) + +(define_insn "*rsqrta" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (div:ANYF (match_operand:ANYF 1 "const_1_operand" "") + (sqrt:ANYF (match_operand:ANYF 2 "register_operand" "f"))))] + "ISA_HAS_FP_RECIP_RSQRT (mode) && flag_unsafe_math_optimizations" +{ + return "frsqrt.\t%0,%2"; +} + [(set_attr "type" "frsqrt") + (set_attr "mode" "") + (set_attr "insn_count" "1")]) + +(define_insn "*rsqrtb" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (sqrt:ANYF (div:ANYF (match_operand:ANYF 1 "const_1_operand" "") + (match_operand:ANYF 2 "register_operand" "f"))))] + "ISA_HAS_FP_RECIP_RSQRT (mode) && flag_unsafe_math_optimizations" +{ + return "frsqrt.\t%0,%2"; +} + [(set_attr "type" "frsqrt") + (set_attr "mode" "") + (set_attr "insn_count" "1")]) + +;; +;; .................... +;; +;; ABSOLUTE VALUE +;; +;; .................... + +;; Do not use the integer abs macro instruction, since that signals an +;; exception on -2147483648 (sigh). + +;; The "legacy" (as opposed to "2008") form of ABS.fmt is an arithmetic +;; instruction that treats all NaN inputs as invalid; it does not clear +;; their sign bit. We therefore can't use that form if the signs of +;; NaNs matter. + +(define_insn "abs2" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (abs:ANYF (match_operand:ANYF 1 "register_operand" "f")))] + "" + "fabs.\t%0,%1" + [(set_attr "type" "fabs") + (set_attr "mode" "")]) + +;; +;; ................... +;; +;; Count leading zeroes. +;; +;; ................... +;; + +(define_insn "clz2" + [(set (match_operand:GPR 0 "register_operand" "=d") + (clz:GPR (match_operand:GPR 1 "register_operand" "d")))] + "" + "clz.\t%0,%1" + [(set_attr "type" "clz") + (set_attr "mode" "")]) + +;; +;; ................... +;; +;; Count trailing zeroes. +;; +;; ................... +;; + +(define_insn "ctz2" + [(set (match_operand:GPR 0 "register_operand" "=d") + (ctz:GPR (match_operand:GPR 1 "register_operand" "d")))] + "" + "ctz.\t%0,%1" + [(set_attr "type" "clz") + (set_attr "mode" "")]) + + + +;; +;; .................... +;; +;; NEGATION and ONE'S COMPLEMENT +;; +;; .................... + +(define_insn "negsi2" + [(set (match_operand:SI 0 "register_operand" "=d") + (neg:SI (match_operand:SI 1 "register_operand" "d")))] + "" +{ + return "sub.w\t%0,%.,%1"; +} + [(set_attr "alu_type" "sub") + (set_attr "mode" "SI")]) + +(define_insn "negdi2" + [(set (match_operand:DI 0 "register_operand" "=d") + (neg:DI (match_operand:DI 1 "register_operand" "d")))] + "TARGET_64BIT" + "sub.d\t%0,%.,%1" + [(set_attr "alu_type" "sub") + (set_attr "mode" "DI")]) + +;; The "legacy" (as opposed to "2008") form of NEG.fmt is an arithmetic +;; instruction that treats all NaN inputs as invalid; it does not flip +;; their sign bit. We therefore can't use that form if the signs of +;; NaNs matter. + +(define_insn "neg2" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (neg:ANYF (match_operand:ANYF 1 "register_operand" "f")))] + "" + "fneg.\t%0,%1" + [(set_attr "type" "fneg") + (set_attr "mode" "")]) + +(define_insn "one_cmpl2" + [(set (match_operand:GPR 0 "register_operand" "=d") + (not:GPR (match_operand:GPR 1 "register_operand" "d")))] + "" +{ + return "nor\t%0,%.,%1"; +} + [(set_attr "alu_type" "not") + (set_attr "compression" "*") + (set_attr "mode" "")]) + + +;; +;; .................... +;; +;; LOGICAL +;; +;; .................... +;; + + +(define_expand "and3" + [(set (match_operand:GPR 0 "register_operand") + (and:GPR (match_operand:GPR 1 "register_operand") + (match_operand:GPR 2 "and_reg_operand")))]) + +;; The middle-end is not allowed to convert ANDing with 0xffff_ffff into a +;; zero_extendsidi2 because of TARGET_TRULY_NOOP_TRUNCATION, so handle these +;; here. Note that this variant does not trigger for SI mode because we +;; require a 64-bit HOST_WIDE_INT and 0xffff_ffff wouldn't be a canonical +;; sign-extended SImode value. +;; +;; These are possible combinations for operand 1 and 2. +;; (r=register, mem=memory, x=match, S=split): +;; +;; \ op1 r/EXT r/!EXT mem +;; op2 +;; +;; andi x x +;; 0xff x x x +;; 0xffff x x x +;; 0xffff_ffff x S x +;; low-bitmask x +;; register x x +;; register =op1 + +(define_insn "*and3" + [(set (match_operand:GPR 0 "register_operand" "=d,d,d,d,d,d,d") + (and:GPR (match_operand:GPR 1 "nonimmediate_operand" "o,o,W,d,d,d,d") + (match_operand:GPR 2 "and_operand" "Yb,Yh,Yw,K,Yx,Yw,d")))] + " and_operands_ok (mode, operands[1], operands[2])" +{ + int len; + + switch (which_alternative) + { + case 0: + operands[1] = gen_lowpart (QImode, operands[1]); + return "ld.bu\t%0,%1"; + case 1: + operands[1] = gen_lowpart (HImode, operands[1]); + return "ld.hu\t%0,%1"; + case 2: + operands[1] = gen_lowpart (SImode, operands[1]); + if (loongarch_14bit_shifted_offset_address_p (XEXP (operands[1], 0), SImode)) + return "ldptr.w\t%0,%1\n\tbstrins.d\t%0,$zero,63,32"; + else if (loongarch_12bit_offset_address_p (XEXP (operands[1], 0), SImode)) + return "ld.wu\t%0,%1"; + else + gcc_unreachable (); + case 3: + return "andi\t%0,%1,%x2"; + case 4: + len = low_bitmask_len (mode, INTVAL (operands[2])); + operands[2] = GEN_INT (len-1); + return "bstrpick.\t%0,%1,%2,0"; + case 5: + return "#"; + case 6: + return "and\t%0,%1,%2"; + default: + gcc_unreachable (); + } +} + [(set_attr "move_type" "load,load,load,andi,pick_ins,shift_shift,logical") + (set_attr "compression" "*,*,*,*,*,*,*") + (set_attr "mode" "")]) + +(define_expand "ior3" + [(set (match_operand:GPR 0 "register_operand") + (ior:GPR (match_operand:GPR 1 "register_operand") + (match_operand:GPR 2 "uns_arith_operand")))] + "" +{ +}) + +(define_insn "*ior3" + [(set (match_operand:GPR 0 "register_operand" "=d,d") + (ior:GPR (match_operand:GPR 1 "register_operand" "d,d") + (match_operand:GPR 2 "uns_arith_operand" "d,K")))] + "" + "@ + or\t%0,%1,%2 + ori\t%0,%1,%x2" + [(set_attr "alu_type" "or") + (set_attr "compression" "*,*") + (set_attr "mode" "")]) + +(define_insn "*iorhi3" + [(set (match_operand:HI 0 "register_operand" "=d,d") + (ior:HI (match_operand:HI 1 "register_operand" "d,d") + (match_operand:HI 2 "uns_arith_operand" "K,d")))] + "" + "@ + ori\t%0,%1,%x2 + or\t%0,%1,%2" + [(set_attr "alu_type" "or") + (set_attr "mode" "HI")]) + +(define_expand "xor3" + [(set (match_operand:GPR 0 "register_operand") + (xor:GPR (match_operand:GPR 1 "register_operand") + (match_operand:GPR 2 "uns_arith_operand")))] + "" + "") + +(define_insn "*xor3" + [(set (match_operand:GPR 0 "register_operand" "=d,d") + (xor:GPR (match_operand:GPR 1 "register_operand" "d,d") + (match_operand:GPR 2 "uns_arith_operand" "d,K")))] + "" + "@ + xor\t%0,%1,%2 + xori\t%0,%1,%x2" + [(set_attr "alu_type" "xor") + (set_attr "compression" "*,*") + (set_attr "mode" "")]) + + +(define_insn "*nor3" + [(set (match_operand:GPR 0 "register_operand" "=d") + (and:GPR (not:GPR (match_operand:GPR 1 "register_operand" "d")) + (not:GPR (match_operand:GPR 2 "register_operand" "d"))))] + "" + "nor\t%0,%1,%2" + [(set_attr "alu_type" "nor") + (set_attr "mode" "")]) + +;; +;; .................... +;; +;; TRUNCATION +;; +;; .................... + + + +(define_insn "truncdfsf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (float_truncate:SF (match_operand:DF 1 "register_operand" "f")))] + "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT" + "fcvt.s.d\t%0,%1" + [(set_attr "type" "fcvt") + (set_attr "cnv_mode" "D2S") + (set_attr "mode" "SF")]) + +;; Integer truncation patterns. Truncating SImode values to smaller +;; modes is a no-op, as it is for most other GCC ports. Truncating +;; DImode values to SImode is not a no-op for TARGET_64BIT since we +;; need to make sure that the lower 32 bits are properly sign-extended +;; (see TARGET_TRULY_NOOP_TRUNCATION). Truncating DImode values into modes +;; smaller than SImode is equivalent to two separate truncations: +;; +;; A B +;; DI ---> HI == DI ---> SI ---> HI +;; DI ---> QI == DI ---> SI ---> QI +;; +;; Step A needs a real instruction but step B does not. + +(define_insn "truncdisi2" + [(set (match_operand:SI 0 "nonimmediate_operand" "=d,ZC,m") + (truncate:SI (match_operand:DI 1 "register_operand" "d,d,d")))] + "TARGET_64BIT" + "@ + slli.w\t%0,%1,0 + stptr.w\t%1,%0 + st.w\t%1,%0" + [(set_attr "move_type" "sll0,store,store") + (set_attr "mode" "SI")]) + +(define_insn "truncdi2" + [(set (match_operand:SHORT 0 "nonimmediate_operand" "=d,m") + (truncate:SHORT (match_operand:DI 1 "register_operand" "d,d")))] + "TARGET_64BIT" + "@ + slli.w\t%0,%1,0 + st.\t%1,%0" + [(set_attr "move_type" "sll0,store") + (set_attr "mode" "SI")]) + +;; Combiner patterns to optimize shift/truncate combinations. + +(define_insn "*ashr_trunc" + [(set (match_operand:SUBDI 0 "register_operand" "=d") + (truncate:SUBDI + (ashiftrt:DI (match_operand:DI 1 "register_operand" "d") + (match_operand:DI 2 "const_arith_operand" ""))))] + "TARGET_64BIT && IN_RANGE (INTVAL (operands[2]), 32, 63)" + "srai.d\t%0,%1,%2" + [(set_attr "type" "shift") + (set_attr "mode" "")]) + +(define_insn "*lshr32_trunc" + [(set (match_operand:SUBDI 0 "register_operand" "=d") + (truncate:SUBDI + (lshiftrt:DI (match_operand:DI 1 "register_operand" "d") + (const_int 32))))] + "TARGET_64BIT" + "srai.d\t%0,%1,32" + [(set_attr "type" "shift") + (set_attr "mode" "")]) + + + +;; +;; .................... +;; +;; ZERO EXTENSION +;; +;; .................... + +;; Extension insns. + +(define_expand "zero_extendsidi2" + [(set (match_operand:DI 0 "register_operand") + (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))] + "TARGET_64BIT") + +(define_insn "*zero_extendsidi2_dext" + [(set (match_operand:DI 0 "register_operand" "=d,d,d") + (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "d,ZC,W")))] + "TARGET_64BIT" + "@ + bstrpick.d\t%0,%1,31,0 + ldptr.w\t%0,%1\n\tlu32i.d\t%0,0 + ld.wu\t%0,%1" + [(set_attr "move_type" "arith,load,load") + (set_attr "mode" "DI") + (set_attr "insn_count" "1,2,1")]) + +(define_insn "*zero_extendsidi2_internal" + [(set (match_operand:DI 0 "register_operand" "=d,d,d") + (subreg:DI (match_operand:SI 1 "nonimmediate_operand" "d,ZC,W") 0))] + "TARGET_64BIT" + "@ + bstrpick.d\t%0,%1,31,0 + ldptr.w\t%0,%1\n\tlu32i.d\t%0,0 + ld.wu\t%0,%1" + [(set_attr "move_type" "arith,load,load") + (set_attr "mode" "DI") + (set_attr "insn_count" "1,2,1")]) +;; See the comment before the *and3 pattern why this is generated by +;; combine. + +(define_expand "zero_extend2" + [(set (match_operand:GPR 0 "register_operand") + (zero_extend:GPR (match_operand:SHORT 1 "nonimmediate_operand")))] + "" +{ +}) + +(define_insn "*zero_extend2" + [(set (match_operand:GPR 0 "register_operand" "=d,d") + (zero_extend:GPR + (match_operand:SHORT 1 "nonimmediate_operand" "d,m")))] + "" +{ + switch (which_alternative) + { + case 0: + return "bstrpick.\t%0,%1,,0"; + case 1: + return "ld.u\t%0,%1"; + default: + gcc_unreachable (); + } +} + [(set_attr "move_type" "pick_ins,load") + (set_attr "compression" "*,*") + (set_attr "mode" "")]) + + +(define_expand "zero_extendqihi2" + [(set (match_operand:HI 0 "register_operand") + (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand")))] + "" +{ +}) + +(define_insn "*zero_extendqihi2" + [(set (match_operand:HI 0 "register_operand" "=d,d") + (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "d,m")))] + "" + "@ + andi\t%0,%1,0x00ff + ld.bu\t%0,%1" + [(set_attr "move_type" "andi,load") + (set_attr "mode" "HI")]) + +;; Combiner patterns to optimize truncate/zero_extend combinations. + +(define_insn "*zero_extend_trunc" + [(set (match_operand:GPR 0 "register_operand" "=d") + (zero_extend:GPR + (truncate:SHORT (match_operand:DI 1 "register_operand" "d"))))] + "TARGET_64BIT" + "bstrpick.\t%0,%1,,0" + [(set_attr "move_type" "pick_ins") + (set_attr "mode" "")]) + +(define_insn "*zero_extendhi_truncqi" + [(set (match_operand:HI 0 "register_operand" "=d") + (zero_extend:HI + (truncate:QI (match_operand:DI 1 "register_operand" "d"))))] + "TARGET_64BIT" + "andi\t%0,%1,0xff" + [(set_attr "alu_type" "and") + (set_attr "mode" "HI")]) + +;; +;; .................... +;; +;; SIGN EXTENSION +;; +;; .................... + +;; Extension insns. +;; Those for integer source operand are ordered widest source type first. + +;; When TARGET_64BIT, all SImode integer and accumulator registers +;; should already be in sign-extended form (see TARGET_TRULY_NOOP_TRUNCATION +;; and truncdisi2). We can therefore get rid of register->register +;; instructions if we constrain the source to be in the same register as +;; the destination. +;; +;; Only the pre-reload scheduler sees the type of the register alternatives; +;; we split them into nothing before the post-reload scheduler runs. +;; These alternatives therefore have type "move" in order to reflect +;; what happens if the two pre-reload operands cannot be tied, and are +;; instead allocated two separate GPRs. We don't distinguish between +;; the GPR and LO cases because we don't usually know during pre-reload +;; scheduling whether an operand will be LO or not. +(define_insn_and_split "extendsidi2" + [(set (match_operand:DI 0 "register_operand" "=d,d,d") + (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "0,ZC,m")))] + "TARGET_64BIT" + "@ + # + ldptr.w\t%0,%1 + ld.w\t%0,%1" + "&& reload_completed && register_operand (operands[1], VOIDmode)" + [(const_int 0)] +{ + emit_note (NOTE_INSN_DELETED); + DONE; +} + [(set_attr "move_type" "move,load,load") + (set_attr "mode" "DI")]) + +(define_expand "extend2" + [(set (match_operand:GPR 0 "register_operand") + (sign_extend:GPR (match_operand:SHORT 1 "nonimmediate_operand")))] + "") + + +(define_insn "*extend2_se" + [(set (match_operand:GPR 0 "register_operand" "=d,d") + (sign_extend:GPR + (match_operand:SHORT 1 "nonimmediate_operand" "d,m")))] + "" + "@ + ext.w.\t%0,%1 + ld.\t%0,%1" + [(set_attr "move_type" "signext,load") + (set_attr "mode" "")]) + +(define_expand "extendqihi2" + [(set (match_operand:HI 0 "register_operand") + (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand")))] + "") + +(define_insn "*extendqihi2_seb" + [(set (match_operand:HI 0 "register_operand" "=d,d") + (sign_extend:HI + (match_operand:QI 1 "nonimmediate_operand" "d,m")))] + "" + "@ + ext.w.b\t%0,%1 + ld.b\t%0,%1" + [(set_attr "move_type" "signext,load") + (set_attr "mode" "SI")]) + +;; Combiner patterns for truncate/sign_extend combinations. The SI versions +;; use the shift/truncate patterns. + +(define_insn_and_split "*extenddi_truncate" + [(set (match_operand:DI 0 "register_operand" "=d") + (sign_extend:DI + (truncate:SHORT (match_operand:DI 1 "register_operand" "d"))))] + "TARGET_64BIT" + "#" + "&& reload_completed" + [(set (match_dup 2) + (ashift:DI (match_dup 1) + (match_dup 3))) + (set (match_dup 0) + (ashiftrt:DI (match_dup 2) + (match_dup 3)))] +{ + operands[2] = gen_lowpart (DImode, operands[0]); + operands[3] = GEN_INT (BITS_PER_WORD - GET_MODE_BITSIZE (mode)); +} + [(set_attr "move_type" "shift_shift") + (set_attr "mode" "DI")]) + +(define_insn_and_split "*extendsi_truncate" + [(set (match_operand:SI 0 "register_operand" "=d") + (sign_extend:SI + (truncate:SHORT (match_operand:DI 1 "register_operand" "d"))))] + "TARGET_64BIT" + "#" + "&& reload_completed" + [(set (match_dup 2) + (ashift:DI (match_dup 1) + (match_dup 3))) + (set (match_dup 0) + (truncate:SI (ashiftrt:DI (match_dup 2) + (match_dup 3))))] +{ + operands[2] = gen_lowpart (DImode, operands[0]); + operands[3] = GEN_INT (BITS_PER_WORD - GET_MODE_BITSIZE (mode)); +} + [(set_attr "move_type" "shift_shift") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*extendhi_truncateqi" + [(set (match_operand:HI 0 "register_operand" "=d") + (sign_extend:HI + (truncate:QI (match_operand:DI 1 "register_operand" "d"))))] + "TARGET_64BIT" + "#" + "&& reload_completed" + [(set (match_dup 2) + (ashift:DI (match_dup 1) + (const_int 56))) + (set (match_dup 0) + (truncate:HI (ashiftrt:DI (match_dup 2) + (const_int 56))))] +{ + operands[2] = gen_lowpart (DImode, operands[0]); +} + [(set_attr "move_type" "shift_shift") + (set_attr "mode" "SI")]) + +(define_insn "extendsfdf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (float_extend:DF (match_operand:SF 1 "register_operand" "f")))] + "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT" + "fcvt.d.s\t%0,%1" + [(set_attr "type" "fcvt") + (set_attr "cnv_mode" "S2D") + (set_attr "mode" "DF")]) + +;; +;; .................... +;; +;; CONVERSIONS +;; +;; .................... + +(define_expand "fix_truncdfsi2" + [(set (match_operand:SI 0 "register_operand") + (fix:SI (match_operand:DF 1 "register_operand")))] + "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT" +"" +) + +(define_insn "fix_truncdfsi2_insn" + [(set (match_operand:SI 0 "register_operand" "=f") + (fix:SI (match_operand:DF 1 "register_operand" "f")))] + "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT" + "ftintrz.w.d %0,%1" + [(set_attr "type" "fcvt") + (set_attr "mode" "DF") + (set_attr "cnv_mode" "D2I")]) + + +(define_expand "fix_truncsfsi2" + [(set (match_operand:SI 0 "register_operand") + (fix:SI (match_operand:SF 1 "register_operand")))] + "TARGET_HARD_FLOAT" +"" +) + +(define_insn "fix_truncsfsi2_insn" + [(set (match_operand:SI 0 "register_operand" "=f") + (fix:SI (match_operand:SF 1 "register_operand" "f")))] + "TARGET_HARD_FLOAT" + "ftintrz.w.s %0,%1" + [(set_attr "type" "fcvt") + (set_attr "mode" "SF") + (set_attr "cnv_mode" "S2I")]) + + +(define_insn "fix_truncdfdi2" + [(set (match_operand:DI 0 "register_operand" "=f") + (fix:DI (match_operand:DF 1 "register_operand" "f")))] + "TARGET_HARD_FLOAT && TARGET_FLOAT64 && TARGET_DOUBLE_FLOAT" + "ftintrz.l.d %0,%1" + [(set_attr "type" "fcvt") + (set_attr "mode" "DF") + (set_attr "cnv_mode" "D2I")]) + + +(define_insn "fix_truncsfdi2" + [(set (match_operand:DI 0 "register_operand" "=f") + (fix:DI (match_operand:SF 1 "register_operand" "f")))] + "TARGET_HARD_FLOAT && TARGET_FLOAT64 && TARGET_DOUBLE_FLOAT" + "ftintrz.l.s %0,%1" + [(set_attr "type" "fcvt") + (set_attr "mode" "SF") + (set_attr "cnv_mode" "S2I")]) + + +(define_insn "floatsidf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (float:DF (match_operand:SI 1 "register_operand" "f")))] + "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT" + "ffint.d.w\t%0,%1" + [(set_attr "type" "fcvt") + (set_attr "mode" "DF") + (set_attr "cnv_mode" "I2D")]) + + +(define_insn "floatdidf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (float:DF (match_operand:DI 1 "register_operand" "f")))] + "TARGET_HARD_FLOAT && TARGET_FLOAT64 && TARGET_DOUBLE_FLOAT" + "ffint.d.l\t%0,%1" + [(set_attr "type" "fcvt") + (set_attr "mode" "DF") + (set_attr "cnv_mode" "I2D")]) + + +(define_insn "floatsisf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (float:SF (match_operand:SI 1 "register_operand" "f")))] + "TARGET_HARD_FLOAT" + "ffint.s.w\t%0,%1" + [(set_attr "type" "fcvt") + (set_attr "mode" "SF") + (set_attr "cnv_mode" "I2S")]) + + +(define_insn "floatdisf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (float:SF (match_operand:DI 1 "register_operand" "f")))] + "TARGET_HARD_FLOAT && TARGET_FLOAT64 && TARGET_DOUBLE_FLOAT" + "ffint.s.l\t%0,%1" + [(set_attr "type" "fcvt") + (set_attr "mode" "SF") + (set_attr "cnv_mode" "I2S")]) + + +(define_expand "fixuns_truncdfsi2" + [(set (match_operand:SI 0 "register_operand") + (unsigned_fix:SI (match_operand:DF 1 "register_operand")))] + "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT" +{ + rtx reg1 = gen_reg_rtx (DFmode); + rtx reg2 = gen_reg_rtx (DFmode); + rtx reg3 = gen_reg_rtx (SImode); + rtx_code_label *label1 = gen_label_rtx (); + rtx_code_label *label2 = gen_label_rtx (); + rtx test; + REAL_VALUE_TYPE offset; + + real_2expN (&offset, 31, DFmode); + + if (reg1) /* Turn off complaints about unreached code. */ + { + loongarch_emit_move (reg1, const_double_from_real_value (offset, DFmode)); + do_pending_stack_adjust (); + + test = gen_rtx_GE (VOIDmode, operands[1], reg1); + emit_jump_insn (gen_cbranchdf4 (test, operands[1], reg1, label1)); + + emit_insn (gen_fix_truncdfsi2 (operands[0], operands[1])); + emit_jump_insn (gen_rtx_SET (pc_rtx, + gen_rtx_LABEL_REF (VOIDmode, label2))); + emit_barrier (); + + emit_label (label1); + loongarch_emit_move (reg2, gen_rtx_MINUS (DFmode, operands[1], reg1)); + loongarch_emit_move (reg3, GEN_INT (trunc_int_for_mode + (BITMASK_HIGH, SImode))); + + emit_insn (gen_fix_truncdfsi2 (operands[0], reg2)); + emit_insn (gen_iorsi3 (operands[0], operands[0], reg3)); + + emit_label (label2); + + /* Allow REG_NOTES to be set on last insn (labels don't have enough + fields, and can't be used for REG_NOTES anyway). */ + emit_use (stack_pointer_rtx); + DONE; + } +}) + + +(define_expand "fixuns_truncdfdi2" + [(set (match_operand:DI 0 "register_operand") + (unsigned_fix:DI (match_operand:DF 1 "register_operand")))] + "TARGET_HARD_FLOAT && TARGET_64BIT && TARGET_DOUBLE_FLOAT" +{ + rtx reg1 = gen_reg_rtx (DFmode); + rtx reg2 = gen_reg_rtx (DFmode); + rtx reg3 = gen_reg_rtx (DImode); + rtx_code_label *label1 = gen_label_rtx (); + rtx_code_label *label2 = gen_label_rtx (); + rtx test; + REAL_VALUE_TYPE offset; + + real_2expN (&offset, 63, DFmode); + + loongarch_emit_move (reg1, const_double_from_real_value (offset, DFmode)); + do_pending_stack_adjust (); + + test = gen_rtx_GE (VOIDmode, operands[1], reg1); + emit_jump_insn (gen_cbranchdf4 (test, operands[1], reg1, label1)); + + emit_insn (gen_fix_truncdfdi2 (operands[0], operands[1])); + emit_jump_insn (gen_rtx_SET (pc_rtx, gen_rtx_LABEL_REF (VOIDmode, label2))); + emit_barrier (); + + emit_label (label1); + loongarch_emit_move (reg2, gen_rtx_MINUS (DFmode, operands[1], reg1)); + loongarch_emit_move (reg3, GEN_INT (BITMASK_HIGH)); + emit_insn (gen_ashldi3 (reg3, reg3, GEN_INT (32))); + + emit_insn (gen_fix_truncdfdi2 (operands[0], reg2)); + emit_insn (gen_iordi3 (operands[0], operands[0], reg3)); + + emit_label (label2); + + /* Allow REG_NOTES to be set on last insn (labels don't have enough + fields, and can't be used for REG_NOTES anyway). */ + emit_use (stack_pointer_rtx); + DONE; +}) + + +(define_expand "fixuns_truncsfsi2" + [(set (match_operand:SI 0 "register_operand") + (unsigned_fix:SI (match_operand:SF 1 "register_operand")))] + "TARGET_HARD_FLOAT" +{ + rtx reg1 = gen_reg_rtx (SFmode); + rtx reg2 = gen_reg_rtx (SFmode); + rtx reg3 = gen_reg_rtx (SImode); + rtx_code_label *label1 = gen_label_rtx (); + rtx_code_label *label2 = gen_label_rtx (); + rtx test; + REAL_VALUE_TYPE offset; + + real_2expN (&offset, 31, SFmode); + + loongarch_emit_move (reg1, const_double_from_real_value (offset, SFmode)); + do_pending_stack_adjust (); + + test = gen_rtx_GE (VOIDmode, operands[1], reg1); + emit_jump_insn (gen_cbranchsf4 (test, operands[1], reg1, label1)); + + emit_insn (gen_fix_truncsfsi2 (operands[0], operands[1])); + emit_jump_insn (gen_rtx_SET (pc_rtx, gen_rtx_LABEL_REF (VOIDmode, label2))); + emit_barrier (); + + emit_label (label1); + loongarch_emit_move (reg2, gen_rtx_MINUS (SFmode, operands[1], reg1)); + loongarch_emit_move (reg3, GEN_INT (trunc_int_for_mode + (BITMASK_HIGH, SImode))); + + emit_insn (gen_fix_truncsfsi2 (operands[0], reg2)); + emit_insn (gen_iorsi3 (operands[0], operands[0], reg3)); + + emit_label (label2); + + /* Allow REG_NOTES to be set on last insn (labels don't have enough + fields, and can't be used for REG_NOTES anyway). */ + emit_use (stack_pointer_rtx); + DONE; +}) + + +(define_expand "fixuns_truncsfdi2" + [(set (match_operand:DI 0 "register_operand") + (unsigned_fix:DI (match_operand:SF 1 "register_operand")))] + "TARGET_HARD_FLOAT && TARGET_64BIT && TARGET_DOUBLE_FLOAT" +{ + rtx reg1 = gen_reg_rtx (SFmode); + rtx reg2 = gen_reg_rtx (SFmode); + rtx reg3 = gen_reg_rtx (DImode); + rtx_code_label *label1 = gen_label_rtx (); + rtx_code_label *label2 = gen_label_rtx (); + rtx test; + REAL_VALUE_TYPE offset; + + real_2expN (&offset, 63, SFmode); + + loongarch_emit_move (reg1, const_double_from_real_value (offset, SFmode)); + do_pending_stack_adjust (); + + test = gen_rtx_GE (VOIDmode, operands[1], reg1); + emit_jump_insn (gen_cbranchsf4 (test, operands[1], reg1, label1)); + + emit_insn (gen_fix_truncsfdi2 (operands[0], operands[1])); + emit_jump_insn (gen_rtx_SET (pc_rtx, gen_rtx_LABEL_REF (VOIDmode, label2))); + emit_barrier (); + + emit_label (label1); + loongarch_emit_move (reg2, gen_rtx_MINUS (SFmode, operands[1], reg1)); + loongarch_emit_move (reg3, GEN_INT (BITMASK_HIGH)); + emit_insn (gen_ashldi3 (reg3, reg3, GEN_INT (32))); + + emit_insn (gen_fix_truncsfdi2 (operands[0], reg2)); + emit_insn (gen_iordi3 (operands[0], operands[0], reg3)); + + emit_label (label2); + + /* Allow REG_NOTES to be set on last insn (labels don't have enough + fields, and can't be used for REG_NOTES anyway). */ + emit_use (stack_pointer_rtx); + DONE; +}) + +;; +;; .................... +;; +;; DATA MOVEMENT +;; +;; .................... + +(define_expand "extzv" + [(set (match_operand:GPR 0 "register_operand") + (zero_extract:GPR (match_operand:GPR 1 "register_operand") + (match_operand 2 "const_int_operand") + (match_operand 3 "const_int_operand")))] + "" +{ + if (!loongarch_use_ins_ext_p (operands[1], INTVAL (operands[2]), + INTVAL (operands[3]))) + FAIL; +}) + +(define_insn "*extzv" + [(set (match_operand:GPR 0 "register_operand" "=d") + (zero_extract:GPR (match_operand:GPR 1 "register_operand" "d") + (match_operand 2 "const_int_operand" "") + (match_operand 3 "const_int_operand" "")))] + "loongarch_use_ins_ext_p (operands[1], INTVAL (operands[2]), + INTVAL (operands[3]))" +{ + operands[2] = GEN_INT (INTVAL (operands[2]) + INTVAL (operands[3]) -1 ); + return "bstrpick.\t%0,%1,%2,%3"; +} + [(set_attr "type" "arith") + (set_attr "mode" "")]) + +(define_expand "insv" + [(set (zero_extract:GPR (match_operand:GPR 0 "register_operand") + (match_operand 1 "const_int_operand") + (match_operand 2 "const_int_operand")) + (match_operand:GPR 3 "reg_or_0_operand"))] + "" +{ + if (!loongarch_use_ins_ext_p (operands[0], INTVAL (operands[1]), + INTVAL (operands[2]))) + FAIL; +}) + +(define_insn "*insv" + [(set (zero_extract:GPR (match_operand:GPR 0 "register_operand" "+d") + (match_operand:SI 1 "const_int_operand" "") + (match_operand:SI 2 "const_int_operand" "")) + (match_operand:GPR 3 "reg_or_0_operand" "dJ"))] + "loongarch_use_ins_ext_p (operands[0], INTVAL (operands[1]), + INTVAL (operands[2]))" +{ + operands[1] = GEN_INT (INTVAL (operands[1]) + INTVAL (operands[2]) -1 ); + return "bstrins.\t%0,%z3,%1,%2"; +} + [(set_attr "type" "arith") + (set_attr "mode" "")]) + +;; Allow combine to split complex const_int load sequences, using operand 2 +;; to store the intermediate results. See move_operand for details. +(define_split + [(set (match_operand:GPR 0 "register_operand") + (match_operand:GPR 1 "splittable_const_int_operand")) + (clobber (match_operand:GPR 2 "register_operand"))] + "" + [(const_int 0)] +{ + loongarch_move_integer (operands[2], operands[0], INTVAL (operands[1])); + DONE; +}) + +;; 64-bit integer moves + +;; Unlike most other insns, the move insns can't be split with +;; different predicates, because register spilling and other parts of +;; the compiler, have memoized the insn number already. + +(define_expand "movdi" + [(set (match_operand:DI 0 "") + (match_operand:DI 1 ""))] + "" +{ + if (loongarch_legitimize_move (DImode, operands[0], operands[1])) + DONE; +}) + + +(define_insn "*movdi_32bit" + [(set (match_operand:DI 0 "nonimmediate_operand" "=d,d,d,ZC,d,m,*f,*f,*d,*m") + (match_operand:DI 1 "move_operand" "d,i,ZC,d,m,d,*J*d,*m,*f,*f"))] + "!TARGET_64BIT + && (register_operand (operands[0], DImode) + || reg_or_0_operand (operands[1], DImode))" + { return loongarch_output_move (operands[0], operands[1]); } + [(set_attr "move_type" "move,const,load,store,load,store,mgtf,fpload,mftg,fpstore") + (set (attr "mode") + (if_then_else (eq_attr "move_type" "imul") + (const_string "SI") + (const_string "DI")))]) + + +(define_insn "*movdi_64bit" + [(set (match_operand:DI 0 "nonimmediate_operand" "=d,d,d,ZC,d,m,*f,*f,*d,*m") + (match_operand:DI 1 "move_operand" "d,Yd,ZC,dJ,m,dJ,*d*J,*m,*f,*f"))] + "TARGET_64BIT + && (register_operand (operands[0], DImode) + || reg_or_0_operand (operands[1], DImode))" + { return loongarch_output_move (operands[0], operands[1]); } + [(set_attr "move_type" "move,const,load,store,load,store,mgtf,fpload,mftg,fpstore") + (set_attr "mode" "DI")]) + +;; 32-bit Integer moves + +;; Unlike most other insns, the move insns can't be split with +;; different predicates, because register spilling and other parts of +;; the compiler, have memoized the insn number already. + +(define_expand "mov" + [(set (match_operand:IMOVE32 0 "") + (match_operand:IMOVE32 1 ""))] + "" +{ + if (loongarch_legitimize_move (mode, operands[0], operands[1])) + DONE; +}) + +;; The difference between these two is whether or not ints are allowed +;; in FP registers (off by default, use -mdebugh to enable). + +(define_insn "*mov_internal" + [(set (match_operand:IMOVE32 0 "nonimmediate_operand" "=d,d,d,ZC,d,m,*f,*f,*d,*m,*d,*z") + (match_operand:IMOVE32 1 "move_operand" "d,Yd,ZC,dJ,m,dJ,*d*J,*m,*f,*f,*z,*d"))] + "(register_operand (operands[0], mode) + || reg_or_0_operand (operands[1], mode))" + { return loongarch_output_move (operands[0], operands[1]); } + [(set_attr "move_type" "move,const,load,store,load,store,mgtf,fpload,mftg,fpstore,mftg,mgtf") + (set_attr "compression" "all,*,*,*,*,*,*,*,*,*,*,*") + (set_attr "mode" "SI")]) + + + +;; LARCH supports loading and storing a floating point register from +;; the sum of two general registers. We use two versions for each of +;; these four instructions: one where the two general registers are +;; SImode, and one where they are DImode. This is because general +;; registers will be in SImode when they hold 32-bit values, but, +;; since the 32-bit values are always sign extended, the [ls][wd]xc1 +;; instructions will still work correctly. + +;; ??? Perhaps it would be better to support these instructions by +;; modifying TARGET_LEGITIMATE_ADDRESS_P and friends. However, since +;; these instructions can only be used to load and store floating +;; point registers, that would probably cause trouble in reload. + +(define_insn "*_" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (mem:ANYF (plus:P (match_operand:P 1 "register_operand" "d") + (match_operand:P 2 "register_operand" "d"))))] + "" + "\t%0,%1,%2" + [(set_attr "type" "fpidxload") + (set_attr "mode" "")]) + +(define_insn "*_" + [(set (mem:ANYF (plus:P (match_operand:P 1 "register_operand" "d") + (match_operand:P 2 "register_operand" "d"))) + (match_operand:ANYF 0 "register_operand" "f"))] + "TARGET_HARD_FLOAT" + "\t%0,%1,%2" + [(set_attr "type" "fpidxstore") + (set_attr "mode" "")]) + +;; LoongArch index address load and store. +(define_insn "*_" + [(set (match_operand:GPR 0 "register_operand" "=d") + (mem:GPR + (plus:P (match_operand:P 1 "register_operand" "d") + (match_operand:P 2 "register_operand" "d"))))] + "" + "\t%0,%1,%2" + [(set_attr "type" "load") + (set_attr "mode" "")]) + +(define_insn "*_" + [(set (mem:GPR (plus:P (match_operand:P 1 "register_operand" "d") + (match_operand:P 2 "register_operand" "d"))) + (match_operand:GPR 0 "register_operand" "d"))] + "" + "\t%0,%1,%2" + [(set_attr "type" "store") + (set_attr "mode" "")]) + +;; SHORT mode sign_extend. +(define_insn "*extend__" + [(set (match_operand:GPR 0 "register_operand" "=d") + (sign_extend:GPR + (mem:SHORT + (plus:P (match_operand:P 1 "register_operand" "d") + (match_operand:P 2 "register_operand" "d")))))] + "" + "\t%0,%1,%2" + [(set_attr "type" "load") + (set_attr "mode" "")]) + +(define_insn "*extend_" + [(set (mem:SHORT (plus:P (match_operand:P 1 "register_operand" "d") + (match_operand:P 2 "register_operand" "d"))) + (match_operand:SHORT 0 "register_operand" "d"))] + "" + "\t%0,%1,%2" + [(set_attr "type" "store") + (set_attr "mode" "SI")]) + + +;; 16-bit Integer moves + +;; Unlike most other insns, the move insns can't be split with +;; different predicates, because register spilling and other parts of +;; the compiler, have memoized the insn number already. +;; Unsigned loads are used because LOAD_EXTEND_OP returns ZERO_EXTEND. + +(define_expand "movhi" + [(set (match_operand:HI 0 "") + (match_operand:HI 1 ""))] + "" +{ + if (loongarch_legitimize_move (HImode, operands[0], operands[1])) + DONE; +}) + +(define_insn "*movhi_internal" + [(set (match_operand:HI 0 "nonimmediate_operand" "=d,d,d,d,m") + (match_operand:HI 1 "move_operand" "d,Yd,I,m,dJ"))] + "(register_operand (operands[0], HImode) + || reg_or_0_operand (operands[1], HImode))" + { return loongarch_output_move (operands[0], operands[1]); } + [(set_attr "move_type" "move,const,const,load,store") + (set_attr "compression" "all,all,*,*,*") + (set_attr "mode" "HI")]) + +;; 8-bit Integer moves + +;; Unlike most other insns, the move insns can't be split with +;; different predicates, because register spilling and other parts of +;; the compiler, have memoized the insn number already. +;; Unsigned loads are used because LOAD_EXTEND_OP returns ZERO_EXTEND. + +(define_expand "movqi" + [(set (match_operand:QI 0 "") + (match_operand:QI 1 ""))] + "" +{ + if (loongarch_legitimize_move (QImode, operands[0], operands[1])) + DONE; +}) + +(define_insn "*movqi_internal" + [(set (match_operand:QI 0 "nonimmediate_operand" "=d,d,d,m") + (match_operand:QI 1 "move_operand" "d,I,m,dJ"))] + "(register_operand (operands[0], QImode) + || reg_or_0_operand (operands[1], QImode))" + { return loongarch_output_move (operands[0], operands[1]); } + [(set_attr "move_type" "move,const,load,store") + (set_attr "compression" "all,*,*,*") + (set_attr "mode" "QI")]) + +;; 32-bit floating point moves + +(define_expand "movsf" + [(set (match_operand:SF 0 "") + (match_operand:SF 1 ""))] + "" +{ + if (loongarch_legitimize_move (SFmode, operands[0], operands[1])) + DONE; +}) + +(define_insn "*movsf_hardfloat" + [(set (match_operand:SF 0 "nonimmediate_operand" "=f,f,f,m,m,*f,*d,*d,*d,*m") + (match_operand:SF 1 "move_operand" "f,G,m,f,G,*d,*f,*G*d,*m,*d"))] + "TARGET_HARD_FLOAT + && (register_operand (operands[0], SFmode) + || reg_or_0_operand (operands[1], SFmode))" + { return loongarch_output_move (operands[0], operands[1]); } + [(set_attr "move_type" "fmove,mgtf,fpload,fpstore,store,mgtf,mftg,move,load,store") + (set_attr "mode" "SF")]) + +(define_insn "*movsf_softfloat" + [(set (match_operand:SF 0 "nonimmediate_operand" "=d,d,m") + (match_operand:SF 1 "move_operand" "Gd,m,d"))] + "TARGET_SOFT_FLOAT + && (register_operand (operands[0], SFmode) + || reg_or_0_operand (operands[1], SFmode))" + { return loongarch_output_move (operands[0], operands[1]); } + [(set_attr "move_type" "move,load,store") + (set_attr "mode" "SF")]) + + +;; 64-bit floating point moves + +(define_expand "movdf" + [(set (match_operand:DF 0 "") + (match_operand:DF 1 ""))] + "" +{ + if (loongarch_legitimize_move (DFmode, operands[0], operands[1])) + DONE; +}) + +(define_insn "*movdf_hardfloat" + [(set (match_operand:DF 0 "nonimmediate_operand" "=f,f,f,m,m,*f,*d,*d,*d,*m") + (match_operand:DF 1 "move_operand" "f,G,m,f,G,*d,*f,*d*G,*m,*d"))] + "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT + && (register_operand (operands[0], DFmode) + || reg_or_0_operand (operands[1], DFmode))" + { return loongarch_output_move (operands[0], operands[1]); } + [(set_attr "move_type" "fmove,mgtf,fpload,fpstore,store,mgtf,mftg,move,load,store") + (set_attr "mode" "DF")]) + +(define_insn "*movdf_softfloat" + [(set (match_operand:DF 0 "nonimmediate_operand" "=d,d,m") + (match_operand:DF 1 "move_operand" "dG,m,dG"))] + "(TARGET_SOFT_FLOAT || TARGET_SINGLE_FLOAT) + && (register_operand (operands[0], DFmode) + || reg_or_0_operand (operands[1], DFmode))" + { return loongarch_output_move (operands[0], operands[1]); } + [(set_attr "move_type" "move,load,store") + (set_attr "mode" "DF")]) + + +;; 128-bit integer moves + +(define_expand "movti" + [(set (match_operand:TI 0) + (match_operand:TI 1))] + "TARGET_64BIT" +{ + if (loongarch_legitimize_move (TImode, operands[0], operands[1])) + DONE; +}) + +(define_insn "*movti" + [(set (match_operand:TI 0 "nonimmediate_operand" "=d,d,d,m") + (match_operand:TI 1 "move_operand" "d,i,m,dJ"))] + "TARGET_64BIT + && (register_operand (operands[0], TImode) + || reg_or_0_operand (operands[1], TImode))" + { return loongarch_output_move (operands[0], operands[1]); } + [(set_attr "move_type" "move,const,load,store") + (set (attr "mode") + (if_then_else (eq_attr "move_type" "imul") + (const_string "SI") + (const_string "TI")))]) + + +;; 128-bit floating point moves + +(define_expand "movtf" + [(set (match_operand:TF 0) + (match_operand:TF 1))] + "TARGET_64BIT" +{ + if (loongarch_legitimize_move (TFmode, operands[0], operands[1])) + DONE; +}) + +;; This pattern handles both hard- and soft-float cases. +(define_insn "*movtf" + [(set (match_operand:TF 0 "nonimmediate_operand" "=d,d,m,f,d,f,m") + (match_operand:TF 1 "move_operand" "dG,m,dG,dG,f,m,f"))] + "TARGET_64BIT + && (register_operand (operands[0], TFmode) + || reg_or_0_operand (operands[1], TFmode))" + "#" + [(set_attr "move_type" "move,load,store,mgtf,mftg,fpload,fpstore") + (set_attr "mode" "TF")]) + + +(define_split + [(set (match_operand:MOVE64 0 "nonimmediate_operand") + (match_operand:MOVE64 1 "move_operand"))] + "reload_completed && loongarch_split_move_insn_p (operands[0], operands[1], insn)" + [(const_int 0)] +{ + loongarch_split_move_insn (operands[0], operands[1], curr_insn); + DONE; +}) + +(define_split + [(set (match_operand:MOVE128 0 "nonimmediate_operand") + (match_operand:MOVE128 1 "move_operand"))] + "reload_completed && loongarch_split_move_insn_p (operands[0], operands[1], insn)" + [(const_int 0)] +{ + loongarch_split_move_insn (operands[0], operands[1], curr_insn); + DONE; +}) + +;; Emit a doubleword move in which exactly one of the operands is +;; a floating-point register. We can't just emit two normal moves +;; because of the constraints imposed by the FPU register model; +;; see loongarch_cannot_change_mode_class for details. Instead, we keep +;; the FPR whole and use special patterns to refer to each word of +;; the other operand. + +(define_expand "move_doubleword_fpr" + [(set (match_operand:SPLITF 0) + (match_operand:SPLITF 1))] + "" +{ + if (FP_REG_RTX_P (operands[0])) + { + rtx low = loongarch_subword (operands[1], 0); + rtx high = loongarch_subword (operands[1], 1); + emit_insn (gen_load_low (operands[0], low)); + if (!TARGET_64BIT) + emit_insn (gen_movgr2frh (operands[0], high, operands[0])); + else + emit_insn (gen_load_high (operands[0], high, operands[0])); + } + else + { + rtx low = loongarch_subword (operands[0], 0); + rtx high = loongarch_subword (operands[0], 1); + emit_insn (gen_store_word (low, operands[1], const0_rtx)); + if (!TARGET_64BIT) + emit_insn (gen_movfrh2gr (high, operands[1])); + else + emit_insn (gen_store_word (high, operands[1], const1_rtx)); + } + DONE; +}) + +;; Load the low word of operand 0 with operand 1. +(define_insn "load_low" + [(set (match_operand:SPLITF 0 "register_operand" "=f,f") + (unspec:SPLITF [(match_operand: 1 "general_operand" "dJ,m")] + UNSPEC_LOAD_LOW))] + "TARGET_HARD_FLOAT" +{ + operands[0] = loongarch_subword (operands[0], 0); + return loongarch_output_move (operands[0], operands[1]); +} + [(set_attr "move_type" "mgtf,fpload") + (set_attr "mode" "")]) + +;; Load the high word of operand 0 from operand 1, preserving the value +;; in the low word. +(define_insn "load_high" + [(set (match_operand:SPLITF 0 "register_operand" "=f,f") + (unspec:SPLITF [(match_operand: 1 "general_operand" "dJ,m") + (match_operand:SPLITF 2 "register_operand" "0,0")] + UNSPEC_LOAD_HIGH))] + "TARGET_HARD_FLOAT" +{ + operands[0] = loongarch_subword (operands[0], 1); + return loongarch_output_move (operands[0], operands[1]); +} + [(set_attr "move_type" "mgtf,fpload") + (set_attr "mode" "")]) + +;; Store one word of operand 1 in operand 0. Operand 2 is 1 to store the +;; high word and 0 to store the low word. +(define_insn "store_word" + [(set (match_operand: 0 "nonimmediate_operand" "=d,m") + (unspec: [(match_operand:SPLITF 1 "register_operand" "f,f") + (match_operand 2 "const_int_operand")] + UNSPEC_STORE_WORD))] + "TARGET_HARD_FLOAT" +{ + operands[1] = loongarch_subword (operands[1], INTVAL (operands[2])); + return loongarch_output_move (operands[0], operands[1]); +} + [(set_attr "move_type" "mftg,fpstore") + (set_attr "mode" "")]) + +;; Move operand 1 to the high word of operand 0 using movgr2frh, preserving the +;; value in the low word. +(define_insn "movgr2frh" + [(set (match_operand:SPLITF 0 "register_operand" "=f") + (unspec:SPLITF [(match_operand: 1 "reg_or_0_operand" "dJ") + (match_operand:SPLITF 2 "register_operand" "0")] + UNSPEC_MOVGR2FRH))] + "TARGET_HARD_FLOAT && ISA_HAS_MXFRH" + "movgr2frh.w\t%z1,%0" + [(set_attr "move_type" "mgtf") + (set_attr "mode" "")]) + +;; Move high word of operand 1 to operand 0 using movfrh2gr. +(define_insn "movfrh2gr" + [(set (match_operand: 0 "register_operand" "=d") + (unspec: [(match_operand:SPLITF 1 "register_operand" "f")] + UNSPEC_MOVFRH2GR))] + "TARGET_HARD_FLOAT && ISA_HAS_MXFRH" + "movfrh2gr.s\t%0,%1" + [(set_attr "move_type" "mftg") + (set_attr "mode" "")]) + +;; Expand in-line code to clear the instruction cache between operand[0] and +;; operand[1]. +(define_expand "clear_cache" + [(match_operand 0 "pmode_register_operand") + (match_operand 1 "pmode_register_operand")] + "" + " +{ + emit_insn (gen_ibar (const0_rtx)); + DONE; +}") + +(define_insn "ibar" + [(unspec_volatile:SI [(match_operand 0 "const_uimm15_operand")] UNSPEC_IBAR)] + "" + "ibar\t%0") + +(define_insn "dbar" + [(unspec_volatile:SI [(match_operand 0 "const_uimm15_operand")] UNSPEC_DBAR)] + "" + "dbar\t%0") + + + +;; Privileged state instruction + +(define_insn "cpucfg" + [(set (match_operand:SI 0 "register_operand" "=d") + (unspec_volatile:SI [(match_operand:SI 1 "register_operand" "d")] + UNSPEC_CPUCFG))] + "" + "cpucfg\t%0,%1" + [(set_attr "type" "load") + (set_attr "mode" "SI")]) + +(define_insn "asrtle_d" + [(unspec_volatile:DI [(match_operand:DI 0 "register_operand" "d") + (match_operand:DI 1 "register_operand" "d")] + UNSPEC_ASRTLE_D)] + "TARGET_64BIT" + "asrtle.d\t%0,%1" + [(set_attr "type" "load") + (set_attr "mode" "DI")]) + +(define_insn "asrtgt_d" + [(unspec_volatile:DI [(match_operand:DI 0 "register_operand" "d") + (match_operand:DI 1 "register_operand" "d")] + UNSPEC_ASRTGT_D)] + "TARGET_64BIT" + "asrtgt.d\t%0,%1" + [(set_attr "type" "load") + (set_attr "mode" "DI")]) + +(define_insn "

csrrd" + [(set (match_operand:GPR 0 "register_operand" "=d") + (unspec_volatile:GPR [(match_operand 1 "const_uimm14_operand")] + UNSPEC_CSRRD))] + "" + "csrrd\t%0,%1" + [(set_attr "type" "load") + (set_attr "mode" "")]) + +(define_insn "

csrwr" + [(set (match_operand:GPR 0 "register_operand" "=d") + (unspec_volatile:GPR + [(match_operand:GPR 1 "register_operand" "0") + (match_operand 2 "const_uimm14_operand")] + UNSPEC_CSRWR))] + "" + "csrwr\t%0,%2" + [(set_attr "type" "store") + (set_attr "mode" "")]) + +(define_insn "

csrxchg" + [(set (match_operand:GPR 0 "register_operand" "=d") + (unspec_volatile:GPR + [(match_operand:GPR 1 "register_operand" "0") + (match_operand:GPR 2 "register_operand" "q") + (match_operand 3 "const_uimm14_operand")] + UNSPEC_CSRXCHG))] + "" + "csrxchg\t%0,%2,%3" + [(set_attr "type" "load") + (set_attr "mode" "")]) + +(define_insn "iocsrrd_" + [(set (match_operand:QHWD 0 "register_operand" "=d") + (unspec_volatile:QHWD [(match_operand:SI 1 "register_operand" "d")] + UNSPEC_IOCSRRD))] + "" + "iocsrrd.\t%0,%1" + [(set_attr "type" "load") + (set_attr "mode" "")]) + +(define_insn "iocsrwr_" + [(unspec_volatile:QHWD [(match_operand:QHWD 0 "register_operand" "d") + (match_operand:SI 1 "register_operand" "d")] + UNSPEC_IOCSRWR)] + "" + "iocsrwr.\t%0,%1" + [(set_attr "type" "load") + (set_attr "mode" "")]) + +(define_insn "

cacop" + [(unspec_volatile:X [(match_operand 0 "const_uimm5_operand") + (match_operand:X 1 "register_operand" "d") + (match_operand 2 "const_imm12_operand")] + UNSPEC_CACOP)] + "" + "cacop\t%0,%1,%2" + [(set_attr "type" "load") + (set_attr "mode" "")]) + +(define_insn "

lddir" + [(unspec_volatile:X [(match_operand:X 0 "register_operand" "d") + (match_operand:X 1 "register_operand" "d") + (match_operand 2 "const_uimm5_operand")] + UNSPEC_LDDIR)] + "" + "lddir\t%0,%1,%2" + [(set_attr "type" "load") + (set_attr "mode" "")]) + +(define_insn "

ldpte" + [(unspec_volatile:X [(match_operand:X 0 "register_operand" "d") + (match_operand 1 "const_uimm5_operand")] + UNSPEC_LDPTE)] + "" + "ldpte\t%0,%1" + [(set_attr "type" "load") + (set_attr "mode" "")]) + + +;; Block moves, see loongarch.c for more details. +;; Argument 0 is the destination +;; Argument 1 is the source +;; Argument 2 is the length +;; Argument 3 is the alignment + +(define_expand "movmemsi" + [(parallel [(set (match_operand:BLK 0 "general_operand") + (match_operand:BLK 1 "general_operand")) + (use (match_operand:SI 2 "")) + (use (match_operand:SI 3 "const_int_operand"))])] + " !TARGET_MEMCPY" +{ + if (loongarch_expand_block_move (operands[0], operands[1], operands[2])) + DONE; + else + FAIL; +}) + +;; +;; .................... +;; +;; SHIFTS +;; +;; .................... + +(define_expand "3" + [(set (match_operand:GPR 0 "register_operand") + (any_shift:GPR (match_operand:GPR 1 "register_operand") + (match_operand:SI 2 "arith_operand")))] + "" +{ +}) + +(define_insn "*3" + [(set (match_operand:GPR 0 "register_operand" "=d") + (any_shift:GPR (match_operand:GPR 1 "register_operand" "d") + (match_operand:SI 2 "arith_operand" "dI")))] + "" +{ + if (CONST_INT_P (operands[2])) + { + operands[2] = GEN_INT (INTVAL (operands[2]) + & (GET_MODE_BITSIZE (mode) - 1)); + return "i.\t%0,%1,%2"; + } else + return ".\t%0,%1,%2"; +} + [(set_attr "type" "shift") + (set_attr "compression" "none") + (set_attr "mode" "")]) + +(define_insn "*si3_extend" + [(set (match_operand:DI 0 "register_operand" "=d") + (sign_extend:DI + (any_shift:SI (match_operand:SI 1 "register_operand" "d") + (match_operand:SI 2 "arith_operand" "dI"))))] + "TARGET_64BIT" +{ + if (CONST_INT_P (operands[2])) + { + operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f); + return "i.w\t%0,%1,%2"; + } else + return ".w\t%0,%1,%2"; +} + [(set_attr "type" "shift") + (set_attr "mode" "SI")]) + +(define_insn "zero_extend_ashift1" + [ (set (match_operand:DI 0 "register_operand" "=d") + (and:DI (ashift:DI (subreg:DI (match_operand:SI 1 "register_operand" "d") 0) + (match_operand 2 "const_immlsa_operand" "")) + (match_operand 3 "shift_mask_operand" "")))] +"" +"bstrpick.d\t%0,%1,31,0\n\talsl.d\t%0,%0,$zero,%2" +[(set_attr "type" "arith") + (set_attr "mode" "DI") + (set_attr "insn_count" "2")]) + +(define_insn "zero_extend_ashift2" + [ (set (match_operand:DI 0 "register_operand" "=d") + (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "d") + (match_operand 2 "const_immlsa_operand" "")) + (match_operand 3 "shift_mask_operand" "")))] +"" +"bstrpick.d\t%0,%1,31,0\n\talsl.d\t%0,%0,$zero,%2" +[(set_attr "type" "arith") + (set_attr "mode" "DI") + (set_attr "insn_count" "2")]) + +(define_insn "alsl_paired1" + [(set (match_operand:DI 0 "register_operand" "=&d") + (plus:DI (and:DI (ashift:DI (subreg:DI (match_operand:SI 1 "register_operand" "d") 0) + (match_operand 2 "const_immlsa_operand" "")) + (match_operand 3 "shift_mask_operand" "")) + (match_operand:DI 4 "register_operand" "d")))] + "" + "bstrpick.d\t%0,%1,31,0\n\talsl.d\t%0,%0,%4,%2" + [(set_attr "type" "arith") + (set_attr "mode" "DI") + (set_attr "insn_count" "2")]) + +(define_insn "alsl_paired2" + [(set (match_operand:DI 0 "register_operand" "=&d") + (plus:DI (match_operand:DI 1 "register_operand" "d") + (and:DI (ashift:DI (match_operand:DI 2 "register_operand" "d") + (match_operand 3 "const_immlsa_operand" "")) + (match_operand 4 "shift_mask_operand" ""))))] + "" + "bstrpick.d\t%0,%2,31,0\n\talsl.d\t%0,%0,%1,%3" + [(set_attr "type" "arith") + (set_attr "mode" "DI") + (set_attr "insn_count" "2")]) + +(define_insn "alsl_" + [(set (match_operand:GPR 0 "register_operand" "=d") + (plus:GPR (ashift:GPR (match_operand:GPR 1 "register_operand" "d") + (match_operand 2 "const_immlsa_operand" "")) + (match_operand:GPR 3 "register_operand" "d")))] + "ISA_HAS_LSA" + "alsl.\t%0,%1,%3,%2" + [(set_attr "type" "arith") + (set_attr "mode" "")]) + +(define_insn "rotr3" + [(set (match_operand:GPR 0 "register_operand" "=d") + (rotatert:GPR (match_operand:GPR 1 "register_operand" "d") + (match_operand:SI 2 "arith_operand" "dI")))] + "" +{ + if (CONST_INT_P (operands[2])) + { + return "rotri.\t%0,%1,%2"; + } else + return "rotr.\t%0,%1,%2"; +} + [(set_attr "type" "shift") + (set_attr "mode" "")]) + +(define_insn "bswaphi2" + [(set (match_operand:HI 0 "register_operand" "=d") + (bswap:HI (match_operand:HI 1 "register_operand" "d")))] + "" + "revb.2h\t%0,%1" + [(set_attr "type" "shift")]) + +(define_insn_and_split "bswapsi2" + [(set (match_operand:SI 0 "register_operand" "=d") + (bswap:SI (match_operand:SI 1 "register_operand" "d")))] + "" + "#" + "" + [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_WSBH)) + (set (match_dup 0) (rotatert:SI (match_dup 0) (const_int 16)))] + "" + [(set_attr "insn_count" "2")]) + +(define_insn_and_split "bswapdi2" + [(set (match_operand:DI 0 "register_operand" "=d") + (bswap:DI (match_operand:DI 1 "register_operand" "d")))] + "TARGET_64BIT" + "#" + "" + [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_DSBH)) + (set (match_dup 0) (unspec:DI [(match_dup 0)] UNSPEC_DSHD))] + "" + [(set_attr "insn_count" "2")]) + +(define_insn "wsbh" + [(set (match_operand:SI 0 "register_operand" "=d") + (unspec:SI [(match_operand:SI 1 "register_operand" "d")] UNSPEC_WSBH))] + "" + "revb.2h\t%0,%1" + [(set_attr "type" "shift")]) + +(define_insn "dsbh" + [(set (match_operand:DI 0 "register_operand" "=d") + (unspec:DI [(match_operand:DI 1 "register_operand" "d")] UNSPEC_DSBH))] + "TARGET_64BIT" + "revb.4h\t%0,%1" + [(set_attr "type" "shift")]) + +(define_insn "dshd" + [(set (match_operand:DI 0 "register_operand" "=d") + (unspec:DI [(match_operand:DI 1 "register_operand" "d")] UNSPEC_DSHD))] + "TARGET_64BIT" + "revh.d\t%0,%1" + [(set_attr "type" "shift")]) + +;; +;; .................... +;; +;; CONDITIONAL BRANCHES +;; +;; .................... + +;; Conditional branches on floating-point equality tests. + +(define_insn "*branch_fp_CCmode" + [(set (pc) + (if_then_else + (match_operator 1 "equality_operator" + [(match_operand:CC 2 "register_operand" "z") + (const_int 0)]) + (label_ref (match_operand 0 "" "")) + (pc)))] + "TARGET_HARD_FLOAT" +{ + return loongarch_output_conditional_branch (insn, operands, + LARCH_BRANCH ("b%F1", "%Z2%0"), + LARCH_BRANCH ("b%W1", "%Z2%0")); +} + [(set_attr "type" "branch")]) + +(define_insn "*branch_fp_inverted_CCmode" + [(set (pc) + (if_then_else + (match_operator 1 "equality_operator" + [(match_operand:CC 2 "register_operand" "z") + (const_int 0)]) + (pc) + (label_ref (match_operand 0 "" ""))))] + "TARGET_HARD_FLOAT" +{ + return loongarch_output_conditional_branch (insn, operands, + LARCH_BRANCH ("b%W1", "%Z2%0"), + LARCH_BRANCH ("b%F1", "%Z2%0")); +} + [(set_attr "type" "branch")]) + +;; Conditional branches on ordered comparisons with zero. + +(define_insn "*branch_order" + [(set (pc) + (if_then_else + (match_operator 1 "order_operator" + [(match_operand:GPR 2 "register_operand" "d,d") + (match_operand:GPR 3 "reg_or_0_operand" "J,d")]) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + { return loongarch_output_order_conditional_branch (insn, operands, false); } + [(set_attr "type" "branch") + (set_attr "compact_form" "maybe,always") + (set_attr "hazard" "forbidden_slot")]) + +(define_insn "*branch_order_inverted" + [(set (pc) + (if_then_else + (match_operator 1 "order_operator" + [(match_operand:GPR 2 "register_operand" "d,d") + (match_operand:GPR 3 "reg_or_0_operand" "J,d")]) + (pc) + (label_ref (match_operand 0 "" ""))))] + "" + { return loongarch_output_order_conditional_branch (insn, operands, true); } + [(set_attr "type" "branch") + (set_attr "compact_form" "maybe,always") + (set_attr "hazard" "forbidden_slot")]) + +;; Conditional branch on equality comparison. + +(define_insn "*branch_equality" + [(set (pc) + (if_then_else + (match_operator 1 "equality_operator" + [(match_operand:GPR 2 "register_operand" "d") + (match_operand:GPR 3 "reg_or_0_operand" "dJ")]) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + { return loongarch_output_equal_conditional_branch (insn, operands, false); } + [(set_attr "type" "branch") + (set_attr "compact_form" "maybe") + (set_attr "hazard" "forbidden_slot")]) + + +(define_insn "*branch_equality_inverted" + [(set (pc) + (if_then_else + (match_operator 1 "equality_operator" + [(match_operand:GPR 2 "register_operand" "d") + (match_operand:GPR 3 "reg_or_0_operand" "dJ")]) + (pc) + (label_ref (match_operand 0 "" ""))))] + "" + { return loongarch_output_equal_conditional_branch (insn, operands, true); } + [(set_attr "type" "branch") + (set_attr "compact_form" "maybe") + (set_attr "hazard" "forbidden_slot")]) + + +(define_expand "cbranch4" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand:GPR 1 "register_operand") + (match_operand:GPR 2 "nonmemory_operand")]) + (label_ref (match_operand 3 "")) + (pc)))] + "" +{ + loongarch_expand_conditional_branch (operands); + DONE; +}) + +(define_expand "cbranch4" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand:SCALARF 1 "register_operand") + (match_operand:SCALARF 2 "register_operand")]) + (label_ref (match_operand 3 "")) + (pc)))] + "" +{ + loongarch_expand_conditional_branch (operands); + DONE; +}) + +;; Used to implement built-in functions. +(define_expand "condjump" + [(set (pc) + (if_then_else (match_operand 0) + (label_ref (match_operand 1)) + (pc)))]) + + + +;; +;; .................... +;; +;; SETTING A REGISTER FROM A COMPARISON +;; +;; .................... + +;; Destination is always set in SI mode. + +(define_expand "cstore4" + [(set (match_operand:SI 0 "register_operand") + (match_operator:SI 1 "loongarch_cstore_operator" + [(match_operand:GPR 2 "register_operand") + (match_operand:GPR 3 "nonmemory_operand")]))] + "" +{ + loongarch_expand_scc (operands); + DONE; +}) + +(define_insn "*seq_zero_" + [(set (match_operand:GPR2 0 "register_operand" "=d") + (eq:GPR2 (match_operand:GPR 1 "register_operand" "d") + (const_int 0)))] + "" + "sltui\t%0,%1,1" + [(set_attr "type" "slt") + (set_attr "mode" "")]) + + +(define_insn "*sne_zero_" + [(set (match_operand:GPR2 0 "register_operand" "=d") + (ne:GPR2 (match_operand:GPR 1 "register_operand" "d") + (const_int 0)))] + "" + "sltu\t%0,%.,%1" + [(set_attr "type" "slt") + (set_attr "mode" "")]) + +(define_insn "*sgt_" + [(set (match_operand:GPR2 0 "register_operand" "=d") + (any_gt:GPR2 (match_operand:GPR 1 "register_operand" "d") + (match_operand:GPR 2 "reg_or_0_operand" "dJ")))] + "" + "slt\t%0,%z2,%1" + [(set_attr "type" "slt") + (set_attr "mode" "")]) + + +(define_insn "*sge_" + [(set (match_operand:GPR2 0 "register_operand" "=d") + (any_ge:GPR2 (match_operand:GPR 1 "register_operand" "d") + (const_int 1)))] + "" + "slti\t%0,%.,%1" + [(set_attr "type" "slt") + (set_attr "mode" "")]) + +(define_insn "*slt_" + [(set (match_operand:GPR2 0 "register_operand" "=d") + (any_lt:GPR2 (match_operand:GPR 1 "register_operand" "d") + (match_operand:GPR 2 "arith_operand" "dI")))] + "" +{ + if (CONST_INT_P (operands[2])) + { + return "slti\t%0,%1,%2"; + } else + return "slt\t%0,%1,%2"; +} + [(set_attr "type" "slt") + (set_attr "mode" "")]) + + +(define_insn "*sle_" + [(set (match_operand:GPR2 0 "register_operand" "=d") + (any_le:GPR2 (match_operand:GPR 1 "register_operand" "d") + (match_operand:GPR 2 "sle_operand" "")))] + "" +{ + operands[2] = GEN_INT (INTVAL (operands[2]) + 1); + return "slti\t%0,%1,%2"; +} + [(set_attr "type" "slt") + (set_attr "mode" "")]) + + +;; +;; .................... +;; +;; FLOATING POINT COMPARISONS +;; +;; .................... + +(define_insn "s__using_CCmode" + [(set (match_operand:CC 0 "register_operand" "=z") + (fcond:CC (match_operand:SCALARF 1 "register_operand" "f") + (match_operand:SCALARF 2 "register_operand" "f")))] + "" + "fcmp..\t%Z0%1,%2" + [(set_attr "type" "fcmp") + (set_attr "mode" "FPSW")]) + +(define_insn "s__using_CCmode" + [(set (match_operand:CC 0 "register_operand" "=z") + (swapped_fcond:CC (match_operand:SCALARF 1 "register_operand" "f") + (match_operand:SCALARF 2 "register_operand" "f")))] + "" + "fcmp..\t%Z0%2,%1" + [(set_attr "type" "fcmp") + (set_attr "mode" "FPSW")]) + +;; +;; .................... +;; +;; UNCONDITIONAL BRANCHES +;; +;; .................... + +;; Unconditional branches. + +(define_expand "jump" + [(set (pc) + (label_ref (match_operand 0)))]) + +(define_insn "*jump_absolute" + [(set (pc) + (label_ref (match_operand 0)))] + "TARGET_ABSOLUTE_JUMPS" +{ + return LARCH_ABSOLUTE_JUMP ("b\t%l0"); +} + [(set_attr "type" "branch") + (set_attr "compact_form" "maybe")]) + +(define_insn "*jump_pic" + [(set (pc) + (label_ref (match_operand 0)))] + "!TARGET_ABSOLUTE_JUMPS" +{ + return "b\t%0"; +} + [(set_attr "type" "branch") + (set_attr "compact_form" "maybe")]) + + + +(define_expand "indirect_jump" + [(set (pc) (match_operand 0 "register_operand"))] + "" +{ + operands[0] = force_reg (Pmode, operands[0]); + emit_jump_insn (PMODE_INSN (gen_indirect_jump, (operands[0]))); + DONE; +}) + +(define_insn "indirect_jump_" + [(set (pc) (match_operand:P 0 "register_operand" "d"))] + "" + { + return "jirl\t$zero,%0,0"; + } + [(set_attr "type" "jump") + (set_attr "mode" "none")]) + +(define_expand "tablejump" + [(set (pc) + (match_operand 0 "register_operand")) + (use (label_ref (match_operand 1 "")))] + "" +{ + if (flag_pic) + operands[0] = expand_simple_binop (Pmode, PLUS, operands[0], + gen_rtx_LABEL_REF (Pmode, operands[1]), + NULL_RTX, 0, OPTAB_DIRECT); + emit_jump_insn (PMODE_INSN (gen_tablejump, (operands[0], operands[1]))); + DONE; +}) + +(define_insn "tablejump_" + [(set (pc) + (match_operand:P 0 "register_operand" "d")) + (use (label_ref (match_operand 1 "" "")))] + "" + { + return "jirl\t$zero,%0,0"; + } + [(set_attr "type" "jump") + (set_attr "mode" "none")]) + + +;; +;; .................... +;; +;; Function prologue/epilogue +;; +;; .................... +;; + +(define_expand "prologue" + [(const_int 1)] + "" +{ + n_loongarch_expand_prologue (); + DONE; +}) + +;; Block any insns from being moved before this point, since the +;; profiling call to mcount can use various registers that aren't +;; saved or used to pass arguments. + +(define_insn "blockage" + [(unspec_volatile [(const_int 0)] UNSPEC_BLOCKAGE)] + "" + "" + [(set_attr "type" "ghost") + (set_attr "mode" "none")]) + +(define_insn "probe_stack_range_" + [(set (match_operand:P 0 "register_operand" "=d") + (unspec_volatile:P [(match_operand:P 1 "register_operand" "0") + (match_operand:P 2 "register_operand" "d")] + UNSPEC_PROBE_STACK_RANGE))] + "" + { return loongarch_output_probe_stack_range (operands[0], operands[2]); } + [(set_attr "type" "unknown") + (set_attr "can_delay" "no") + (set_attr "mode" "")]) + +(define_expand "epilogue" + [(const_int 2)] + "" +{ + n_loongarch_expand_epilogue (false); + DONE; +}) + +(define_expand "sibcall_epilogue" + [(const_int 2)] + "" +{ + n_loongarch_expand_epilogue (true); + DONE; +}) + +;; Trivial return. Make it look like a normal return insn as that +;; allows jump optimizations to work better. + +(define_expand "return" + [(simple_return)] + "loongarch_can_use_return_insn ()" + { }) + +(define_expand "simple_return" + [(simple_return)] + "" + { }) + +(define_insn "*" + [(any_return)] + "" + { + operands[0] = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); + return "jirl\t$zero,%0,0"; + } + [(set_attr "type" "jump") + (set_attr "mode" "none")]) + +;; Normal return. + +(define_insn "_internal" + [(any_return) + (use (match_operand 0 "pmode_register_operand" ""))] + "" + { + return "jirl\t$zero,%0,0"; + } + [(set_attr "type" "jump") + (set_attr "mode" "none")]) + +;; Exception return. +(define_insn "loongarch_ertn" + [(return) + (unspec_volatile [(const_int 0)] UNSPEC_ERTN)] + "" + "ertn" + [(set_attr "type" "trap") + (set_attr "mode" "none")]) + +;; Disable interrupts. +(define_insn "loongarch_di" + [(unspec_volatile [(const_int 0)] UNSPEC_DI)] + "" + "di" + [(set_attr "type" "trap") + (set_attr "mode" "none")]) + +;; Execution hazard barrier. +(define_insn "loongarch_ehb" + [(unspec_volatile [(const_int 0)] UNSPEC_EHB)] + "" + "ehb" + [(set_attr "type" "trap") + (set_attr "mode" "none")]) + +;; Read GPR from previous shadow register set. +(define_insn "loongarch_rdpgpr_" + [(set (match_operand:P 0 "register_operand" "=d") + (unspec_volatile:P [(match_operand:P 1 "register_operand" "d")] + UNSPEC_RDPGPR))] + "" + "rdpgpr\t%0,%1" + [(set_attr "type" "move") + (set_attr "mode" "")]) + +;; This is used in compiling the unwind routines. +(define_expand "eh_return" + [(use (match_operand 0 "general_operand"))] + "" +{ + if (GET_MODE (operands[0]) != word_mode) + operands[0] = convert_to_mode (word_mode, operands[0], 0); + if (TARGET_64BIT) + emit_insn (gen_eh_set_lr_di (operands[0])); + else + emit_insn (gen_eh_set_lr_si (operands[0])); + DONE; +}) + +;; Clobber the return address on the stack. We can't expand this +;; until we know where it will be put in the stack frame. + +(define_insn "eh_set_lr_si" + [(unspec [(match_operand:SI 0 "register_operand" "d")] UNSPEC_EH_RETURN) + (clobber (match_scratch:SI 1 "=&d"))] + "! TARGET_64BIT" + "#") + +(define_insn "eh_set_lr_di" + [(unspec [(match_operand:DI 0 "register_operand" "d")] UNSPEC_EH_RETURN) + (clobber (match_scratch:DI 1 "=&d"))] + "TARGET_64BIT" + "#") + +(define_split + [(unspec [(match_operand 0 "register_operand")] UNSPEC_EH_RETURN) + (clobber (match_scratch 1))] + "reload_completed" + [(const_int 0)] +{ + loongarch_set_return_address (operands[0], operands[1]); + DONE; +}) + + + +;; +;; .................... +;; +;; FUNCTION CALLS +;; +;; .................... + + +;; Sibling calls. All these patterns use jump instructions. + +;; If TARGET_SIBCALLS, call_insn_operand will only accept constant +;; addresses if a direct jump is acceptable. Since the 'S' constraint +;; is defined in terms of call_insn_operand, the same is true of the +;; constraints. + +;; When we use an indirect jump, we need a register that will be +;; preserved by the epilogue. + +(define_expand "sibcall" + [(parallel [(call (match_operand 0 "") + (match_operand 1 "")) + (use (match_operand 2 "")) ;; next_arg_reg + (use (match_operand 3 ""))])] ;; struct_value_size_rtx + "TARGET_SIBCALLS" +{ + rtx target = loongarch_legitimize_call_address (XEXP (operands[0], 0)); + + emit_call_insn (gen_sibcall_internal (target, operands[1])); + DONE; +}) + +(define_insn "sibcall_internal" + [(call (mem:SI (match_operand 0 "call_insn_operand" "j,c,a,t,h")) + (match_operand 1 "" ""))] + "TARGET_SIBCALLS && SIBLING_CALL_P (insn)" +{ + switch (which_alternative) + { + case 0: + return "jr\t%0"; + case 1: + if (loongarch_cmodel_var == LARCH_CMODEL_LARGE) + return "pcaddu18i\t$t0,(%%pcrel(%0+0x20000))>>18\n\tjirl\t$zero,$t0,%%pcrel(%0+4)-(%%pcrel(%0+4+0x20000)>>18<<18)"; + else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) + return "la.local\t$t0,$t1,%1\n\tjr\t$t0"; + else + return "b\t%0"; + case 2: + if (loongarch_cmodel_var == LARCH_CMODEL_TINY_STATIC) + return "b\t%0"; + else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) + return "la.global\t$t0,$t1,%1\n\tjr\t$t0"; + else + return "la.global\t$t0,%0\n\tjr\t$t0"; + case 3: + if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) + return "la.global\t$t0,$t1,%1\n\tjr\t$t0"; + else + return "la.global\t$t0,%0\n\tjr\t$t0"; + case 4: + if (loongarch_cmodel_var == LARCH_CMODEL_NORMAL || loongarch_cmodel_var == LARCH_CMODEL_TINY) + return "b\t%%plt(%0)"; + else if (loongarch_cmodel_var == LARCH_CMODEL_LARGE) + return "pcaddu18i\t$t0,(%%plt(%0)+0x20000)>>18\n\tjirl\t$zero,$t0,%%plt(%0)+4-((%%plt(%0)+(4+0x20000))>>18<<18)"; + else + sorry ("cmodel extreme and tiny static not support plt."); + default: + gcc_unreachable (); + } +} + [(set_attr "jal" "indirect,direct,direct,direct,direct")]) + +(define_expand "sibcall_value" + [(parallel [(set (match_operand 0 "") + (call (match_operand 1 "") + (match_operand 2 ""))) + (use (match_operand 3 ""))])] ;; next_arg_reg + "TARGET_SIBCALLS" +{ + rtx target = loongarch_legitimize_call_address (XEXP (operands[1], 0)); + + /* Handle return values created by loongarch_return_fpr_pair. */ + if (GET_CODE (operands[0]) == PARALLEL && XVECLEN (operands[0], 0) == 2) + { + emit_call_insn (gen_sibcall_value_multiple_internal (XEXP (XVECEXP (operands[0], 0, 0), 0), + target, operands[2], XEXP (XVECEXP (operands[0], 0, 1), 0))); + } + else + { + /* Handle return values created by loongarch_return_fpr_single. */ + if (GET_CODE (operands[0]) == PARALLEL && XVECLEN (operands[0], 0) == 1) + operands[0] = XEXP (XVECEXP (operands[0], 0, 0), 0); + + emit_call_insn (gen_sibcall_value_internal (operands[0], target, operands[2])); + } + DONE; +}) + +(define_insn "sibcall_value_internal" + [(set (match_operand 0 "register_operand" "") + (call (mem:SI (match_operand 1 "call_insn_operand" "j,c,a,t,h")) + (match_operand 2 "" "")))] + "TARGET_SIBCALLS && SIBLING_CALL_P (insn)" +{ + switch (which_alternative) + { + case 0: + return "jr\t%1"; + case 1: + if (loongarch_cmodel_var == LARCH_CMODEL_LARGE) + return "pcaddu18i\t$t0,%%pcrel(%1+0x20000)>>18\n\tjirl\t$zero,$t0,%%pcrel(%1+4)-((%%pcrel(%1+4+0x20000))>>18<<18)"; + else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) + return "la.local\t$t0,$t1,%1\n\tjr\t$t0"; + else + return "b\t%1"; + case 2: + if (loongarch_cmodel_var == LARCH_CMODEL_TINY_STATIC) + return "b\t%1"; + else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) + return "la.global\t$t0,$t1,%1\n\tjr\t$t0"; + else + return "la.global\t$t0,%1\n\tjr\t$t0"; + case 3: + if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) + return "la.global\t$t0,$t1,%1\n\tjr\t$t0"; + else + return "la.global\t$t0,%1\n\tjr\t$t0"; + case 4: + if (loongarch_cmodel_var == LARCH_CMODEL_NORMAL || loongarch_cmodel_var == LARCH_CMODEL_TINY) + return " b\t%%plt(%1)"; + else if (loongarch_cmodel_var == LARCH_CMODEL_LARGE) + return "pcaddu18i\t$t0,(%%plt(%1)+0x20000)>>18\n\tjirl\t$zero,$t0,%%plt(%1)+4-((%%plt(%1)+(4+0x20000))>>18<<18)"; + else + sorry ("loongarch cmodel extreme and tiny-static not support plt."); + default: + gcc_unreachable (); + } +} + [(set_attr "jal" "indirect,direct,direct,direct,direct")]) + +(define_insn "sibcall_value_multiple_internal" + [(set (match_operand 0 "register_operand" "") + (call (mem:SI (match_operand 1 "call_insn_operand" "j,c,a,t,h")) + (match_operand 2 "" ""))) + (set (match_operand 3 "register_operand" "") + (call (mem:SI (match_dup 1)) + (match_dup 2)))] + "TARGET_SIBCALLS && SIBLING_CALL_P (insn)" +{ + switch (which_alternative) + { + case 0: + return "jr\t%1"; + case 1: + if (loongarch_cmodel_var == LARCH_CMODEL_LARGE) + return "pcaddu18i\t$t0,%%pcrel(%1+0x20000)>>18\n\tjirl\t$zero,$t0,%%pcrel(%1+4)-(%%pcrel(%1+4+0x20000)>>18<<18)"; + else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) + return "la.local\t$t0,$t1,%1\n\tjr\t$t0"; + else + return "b\t%1"; + case 2: + if (loongarch_cmodel_var == LARCH_CMODEL_TINY_STATIC) + return "b\t%1"; + else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) + return "la.global\t$t0,$t1,%1\n\tjr\t$t0"; + else + return "la.global\t$t0,%1\n\tjr\t$t0"; + case 3: + if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) + return "la.global\t$t0,$t1,%1\n\tjr\t$t0"; + else + return "la.global\t$t0,%1\n\tjr\t$t0"; + case 4: + if (loongarch_cmodel_var == LARCH_CMODEL_NORMAL || loongarch_cmodel_var == LARCH_CMODEL_TINY) + return "b\t%%plt(%1)"; + else if (loongarch_cmodel_var == LARCH_CMODEL_LARGE) + return "pcaddu18i\t$t0,(%%plt(%1)+0x20000)>>18\n\tjirl\t$zero,$t0,%%plt(%1)+4-((%%plt(%1)+(4+0x20000))>>18<<18)"; + else + sorry ("loongarch cmodel extreme and tiny-static not support plt."); + default: + gcc_unreachable (); + } +} + [(set_attr "jal" "indirect,direct,direct,direct,direct")]) + +(define_expand "call" + [(parallel [(call (match_operand 0 "") + (match_operand 1 "")) + (use (match_operand 2 "")) ;; next_arg_reg + (use (match_operand 3 ""))])] ;; struct_value_size_rtx + "" +{ + rtx target = loongarch_legitimize_call_address (XEXP (operands[0], 0)); + + emit_call_insn (gen_call_internal (target, operands[1])); + DONE; +}) +;; In the last case, we can generate the individual instructions with +;; a define_split. There are several things to be wary of: +;; +;; - We can't expose the load of $gp before reload. If we did, +;; it might get removed as dead, but reload can introduce new +;; uses of $gp by rematerializing constants. +;; +;; - We shouldn't restore $gp after calls that never return. +;; It isn't valid to insert instructions between a noreturn +;; call and the following barrier. +;; +;; - The splitter deliberately changes the liveness of $gp. The unsplit +;; instruction preserves $gp and so have no effect on its liveness. +;; But once we generate the separate insns, it becomes obvious that +;; $gp is not live on entry to the call. +;; + +(define_insn "call_internal" + [(call (mem:SI (match_operand 0 "call_insn_operand" "e,c,a,t,h")) + (match_operand 1 "" "")) + (clobber (reg:SI RETURN_ADDR_REGNUM))] + "" +{ + switch (which_alternative) + { + case 0: + return "jirl\t$ra,%0,0"; + case 1: + if (loongarch_cmodel_var == LARCH_CMODEL_LARGE) + return "pcaddu18i\t$ra,%%pcrel(%0+0x20000)>>18\n\tjirl\t$ra,$ra,%%pcrel(%0+4)-(%%pcrel(%0+4+0x20000)>>18<<18)"; + else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) + return "la.local\t$ra,$t0,%1\n\tjirl\t$ra,$ra,0"; + else + return "bl\t%0"; + case 2: + if (loongarch_cmodel_var == LARCH_CMODEL_TINY_STATIC) + return "bl\t%0"; + else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) + return "la.global\t$ra,$t0,%1\n\tjirl\t$ra,$ra,0"; + else + return "la.global\t$ra,%0\n\tjirl\t$ra,$ra,0"; + case 3: + if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) + return "la.global\t$ra,$t0,%1\n\tjirl\t$ra,$ra,0"; + else + return "la.global\t$ra,%0\n\tjirl\t$ra,$ra,0"; + case 4: + if (loongarch_cmodel_var == LARCH_CMODEL_LARGE) + return "pcaddu18i\t$ra,(%%plt(%0)+0x20000)>>18\n\tjirl\t$ra,$ra,%%plt(%0)+4-((%%plt(%0)+(4+0x20000))>>18<<18)"; + else if (loongarch_cmodel_var == LARCH_CMODEL_NORMAL || loongarch_cmodel_var == LARCH_CMODEL_TINY) + return "bl\t%%plt(%0)"; + else + sorry ("cmodel extreme and tiny-static not support plt."); + default: + gcc_unreachable (); + } +} + [(set_attr "jal" "indirect,direct,direct,direct,direct") + (set_attr "insn_count" "1,2,3,3,2")]) + + +(define_expand "call_value" + [(parallel [(set (match_operand 0 "") + (call (match_operand 1 "") + (match_operand 2 ""))) + (use (match_operand 3 ""))])] ;; next_arg_reg + "" +{ + rtx target = loongarch_legitimize_call_address (XEXP (operands[1], 0)); + /* Handle return values created by loongarch_return_fpr_pair. */ + if (GET_CODE (operands[0]) == PARALLEL && XVECLEN (operands[0], 0) == 2) + emit_call_insn (gen_call_value_multiple_internal (XEXP (XVECEXP (operands[0], 0, 0), 0), + target, operands[2], XEXP (XVECEXP (operands[0], 0, 1), 0))); + else + { + /* Handle return values created by loongarch_return_fpr_single. */ + if (GET_CODE (operands[0]) == PARALLEL && XVECLEN (operands[0], 0) == 1) + operands[0] = XEXP (XVECEXP (operands[0], 0, 0), 0); + + emit_call_insn (gen_call_value_internal (operands[0], target, operands[2])); + } + DONE; +}) + +;; See comment for call_internal. +(define_insn "call_value_internal" + [(set (match_operand 0 "register_operand" "") + (call (mem:SI (match_operand 1 "call_insn_operand" "e,c,a,t,h")) + (match_operand 2 "" ""))) + (clobber (reg:SI RETURN_ADDR_REGNUM))] + "" +{ + switch (which_alternative) + { + case 0: + return "jirl\t$ra,%1,0"; + case 1: + if (loongarch_cmodel_var == LARCH_CMODEL_LARGE) + return "pcaddu18i\t$ra,%%pcrel(%1+0x20000)>>18\n\tjirl\t$ra,$ra,%%pcrel(%1+4)-(%%pcrel(%1+4+0x20000)>>18<<18)"; + else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) + return "la.local\t$ra,$t0,%1\n\tjirl\t$ra,$ra,0"; + else + return "bl\t%1"; + case 2: + if (loongarch_cmodel_var == LARCH_CMODEL_TINY_STATIC) + return "bl\t%1"; + else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) + return "la.global\t$ra,$t0,%1\n\tjirl\t$ra,$ra,0"; + else + return "la.global\t$ra,%1\n\tjirl\t$ra,$ra,0"; + case 3: + if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) + return "la.global\t$ra,$t0,%1\n\tjirl\t$ra,$ra,0"; + else + return "la.global\t$ra,%1\n\tjirl\t$ra,$ra,0"; + case 4: + if (loongarch_cmodel_var == LARCH_CMODEL_LARGE) + return "pcaddu18i\t$ra,(%%plt(%1)+0x20000)>>18\n\tjirl\t$ra,$ra,%%plt(%1)+4-((%%plt(%1)+(4+0x20000))>>18<<18)"; + else if (loongarch_cmodel_var == LARCH_CMODEL_NORMAL || loongarch_cmodel_var == LARCH_CMODEL_TINY) + return "bl\t%%plt(%1)"; + else + sorry ("loongarch cmodel extreme and tiny-static not support plt."); + default: + gcc_unreachable (); + } +} + [(set_attr "jal" "indirect,direct,direct,direct,direct") + (set_attr "insn_count" "1,2,3,3,2")]) + +;; See comment for call_internal. +(define_insn "call_value_multiple_internal" + [(set (match_operand 0 "register_operand" "") + (call (mem:SI (match_operand 1 "call_insn_operand" "e,c,a,t,h")) + (match_operand 2 "" ""))) + (set (match_operand 3 "register_operand" "") + (call (mem:SI (match_dup 1)) + (match_dup 2))) + (clobber (reg:SI RETURN_ADDR_REGNUM))] + "" +{ + switch (which_alternative) + { + case 0: + return "jirl\t$ra,%1,0"; + case 1: + if (loongarch_cmodel_var == LARCH_CMODEL_LARGE) + return "pcaddu18i\t$ra,%%pcrel(%1+0x20000)>>18\n\tjirl\t$ra,$ra,%%pcrel(%1+4)-(%%pcrel(%1+4+0x20000)>>18<<18)"; + else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) + return "la.local\t$ra,$t0,%1\n\tjirl\t$ra,$ra,0"; + else + return "bl\t%1"; + case 2: + if (loongarch_cmodel_var == LARCH_CMODEL_TINY_STATIC) + return "bl\t%1"; + else if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) + return "la.global\t$ra,$t0,%1\n\tjirl\t$ra,$ra,0 "; + else + return "la.global\t$ra,%1\n\tjirl\t$ra,$ra,0"; + case 3: + if (loongarch_cmodel_var == LARCH_CMODEL_EXTREME) + return "la.global\t$ra,$t0,%1\n\tjirl\t$ra,$ra,0"; + else + return "la.global\t$ra,%1\n\tjirl\t$ra,$ra,0"; + case 4: + if (loongarch_cmodel_var == LARCH_CMODEL_LARGE) + return "pcaddu18i\t$ra,(%%plt(%1)+0x20000)>>18\n\tjirl\t$ra,$ra,%%plt(%1)+4-((%%plt(%1)+(4+0x20000))>>18<<18)"; + else if (loongarch_cmodel_var == LARCH_CMODEL_NORMAL || loongarch_cmodel_var == LARCH_CMODEL_TINY) + return "bl\t%%plt(%1)"; + else + sorry ("loongarch cmodel extreme and tiny-static not support plt."); + default: + gcc_unreachable (); + } +} + [(set_attr "jal" "indirect,direct,direct,direct,direct") + (set_attr "insn_count" "1,2,3,3,2")]) + + +;; Call subroutine returning any type. + +(define_expand "untyped_call" + [(parallel [(call (match_operand 0 "") + (const_int 0)) + (match_operand 1 "") + (match_operand 2 "")])] + "" +{ + int i; + + emit_call_insn (gen_call (operands[0], const0_rtx, NULL, const0_rtx)); + + for (i = 0; i < XVECLEN (operands[2], 0); i++) + { + rtx set = XVECEXP (operands[2], 0, i); + loongarch_emit_move (SET_DEST (set), SET_SRC (set)); + } + + emit_insn (gen_blockage ()); + DONE; +}) + +;; +;; .................... +;; +;; MISC. +;; +;; .................... +;; + + +(define_insn "*prefetch_indexed_" + [(prefetch (plus:P (match_operand:P 0 "register_operand" "d") + (match_operand:P 1 "register_operand" "d")) + (match_operand 2 "const_int_operand" "n") + (match_operand 3 "const_int_operand" "n"))] + "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT" +{ + operands[2] = loongarch_prefetch_cookie (operands[2], operands[3]); + return "prefx\t%2,%1(%0)"; +} + [(set_attr "type" "prefetchx")]) + +(define_insn "nop" + [(const_int 0)] + "" + "nop" + [(set_attr "type" "nop") + (set_attr "mode" "none")]) + +;; Like nop, but commented out when outside a .set noreorder block. +(define_insn "hazard_nop" + [(const_int 1)] + "" + { + return "#nop"; + } + [(set_attr "type" "nop")]) + +;; The `.insn' pseudo-op. +(define_insn "insn_pseudo" + [(unspec_volatile [(const_int 0)] UNSPEC_INSN_PSEUDO)] + "" + ".insn" + [(set_attr "mode" "none") + (set_attr "insn_count" "0")]) + +;; Conditional move instructions. + +(define_insn "*sel_using_" + [(set (match_operand:GPR 0 "register_operand" "=d,d") + (if_then_else:GPR + (equality_op:GPR2 (match_operand:GPR2 1 "register_operand" "d,d") + (const_int 0)) + (match_operand:GPR 2 "reg_or_0_operand" "d,J") + (match_operand:GPR 3 "reg_or_0_operand" "J,d")))] + "register_operand (operands[2], mode) + != register_operand (operands[3], mode)" + "@ + \t%0,%2,%1 + \t%0,%3,%1" + [(set_attr "type" "condmove") + (set_attr "mode" "")]) + +;; sel.fmt copies the 3rd argument when the 1st is non-zero and the 2nd +;; argument if the 1st is zero. This means operand 2 and 3 are +;; inverted in the instruction. + +;; FIXME: fsel +(define_insn "*sel" + [(set (match_operand:SCALARF 0 "register_operand" "=f") + (if_then_else:SCALARF + (ne:CC (match_operand:CC 1 "register_operand" "z") + (const_int 0)) + (match_operand:SCALARF 2 "reg_or_0_operand" "f") + (match_operand:SCALARF 3 "reg_or_0_operand" "f")))] + "" + "fsel\t%0,%3,%2,%1" + [(set_attr "type" "condmove") + (set_attr "mode" "")]) + +;; These are the main define_expand's used to make conditional moves. + +(define_expand "movcc" + [(set (match_dup 4) (match_operand 1 "comparison_operator")) + (set (match_operand:GPR 0 "register_operand") + (if_then_else:GPR (match_dup 5) + (match_operand:GPR 2 "reg_or_0_operand") + (match_operand:GPR 3 "reg_or_0_operand")))] + "" +{ + if (!INTEGRAL_MODE_P (GET_MODE (XEXP (operands[1], 0)))) + FAIL; + + if (loongarch_expand_conditional_move (operands)) + DONE; + else + FAIL; +}) + +;; FIXME: fsel +(define_expand "movcc" + [(set (match_dup 4) (match_operand 1 "comparison_operator")) + (set (match_operand:SCALARF 0 "register_operand") + (if_then_else:SCALARF (match_dup 5) + (match_operand:SCALARF 2 "reg_or_0_operand") + (match_operand:SCALARF 3 "reg_or_0_operand")))] + "" +{ + if (!FLOAT_MODE_P (GET_MODE (XEXP (operands[1], 0)))) + FAIL; + + if (loongarch_expand_conditional_move (operands)) + DONE; + else + FAIL; +}) + +(define_split + [(match_operand 0 "small_data_pattern")] + "reload_completed" + [(match_dup 0)] + { operands[0] = loongarch_rewrite_small_data (operands[0]); }) + +;; Thread-Local Storage + +(define_insn "got_load_tls_gd" + [(set (match_operand:P 0 "register_operand" "=r") + (unspec:P + [(match_operand:P 1 "symbolic_operand" "")] + UNSPEC_TLS_GD))] + "" + "la.tls.gd\t%0,%1" + [(set_attr "got" "load") + (set_attr "mode" "")]) + +(define_insn "got_load_tls_ld" + [(set (match_operand:P 0 "register_operand" "=r") + (unspec:P + [(match_operand:P 1 "symbolic_operand" "")] + UNSPEC_TLS_LD))] + "" + "la.tls.ld\t%0,%1" + [(set_attr "got" "load") + (set_attr "mode" "")]) + +(define_insn "got_load_tls_le" + [(set (match_operand:P 0 "register_operand" "=r") + (unspec:P + [(match_operand:P 1 "symbolic_operand" "")] + UNSPEC_TLS_LE))] + "" + "la.tls.le\t%0,%1" + [(set_attr "got" "load") + (set_attr "mode" "")]) + +(define_insn "got_load_tls_ie" + [(set (match_operand:P 0 "register_operand" "=r") + (unspec:P + [(match_operand:P 1 "symbolic_operand" "")] + UNSPEC_TLS_IE))] + "" + "la.tls.ie\t%0,%1" + [(set_attr "got" "load") + (set_attr "mode" "")]) + +(define_insn "loongarch_movfcsr2gr" + [(set (match_operand:SI 0 "register_operand" "=d") + (unspec_volatile:SI [(match_operand 1 "const_uimm5_operand")] UNSPEC_MOVFCSR2GR))] + "TARGET_HARD_FLOAT" + "movfcsr2gr\t%0,$r%1") + +(define_insn "loongarch_movgr2fcsr" + [(unspec_volatile [(match_operand 0 "const_uimm5_operand") + (match_operand:SI 1 "register_operand" "d")] + UNSPEC_MOVGR2FCSR)] + "TARGET_HARD_FLOAT" + "movgr2fcsr\t$r%0,%1") + + +;; Match paired HI/SI/SF/DFmode load/stores. +(define_insn "*join2_load_store" + [(set (match_operand:JOIN_MODE 0 "nonimmediate_operand" "=d,f,m,m,d,ZC") + (match_operand:JOIN_MODE 1 "nonimmediate_operand" "m,m,d,f,ZC,d")) + (set (match_operand:JOIN_MODE 2 "nonimmediate_operand" "=d,f,m,m,d,ZC") + (match_operand:JOIN_MODE 3 "nonimmediate_operand" "m,m,d,f,ZC,d"))] + "ENABLE_LD_ST_PAIRS && reload_completed" + { + bool load_p = (which_alternative == 0 || which_alternative == 1); + /* Reg-renaming pass reuses base register if it is dead after bonded loads. + Hardware does not bond those loads, even when they are consecutive. + However, order of the loads need to be checked for correctness. */ + if (!load_p || !reg_overlap_mentioned_p (operands[0], operands[1])) + { + output_asm_insn (loongarch_output_move (operands[0], operands[1]), + operands); + output_asm_insn (loongarch_output_move (operands[2], operands[3]), + &operands[2]); + } + else + { + output_asm_insn (loongarch_output_move (operands[2], operands[3]), + &operands[2]); + output_asm_insn (loongarch_output_move (operands[0], operands[1]), + operands); + } + return ""; + } + [(set_attr "move_type" "load,fpload,store,fpstore,load,store") + (set_attr "insn_count" "2,2,2,2,2,2")]) + +;; 2 HI/SI/SF/DF loads are joined. +;; P5600 does not support bonding of two LBs, hence QI mode is not included. +;; The loads must be non-volatile as they might be reordered at the time of asm +;; generation. +(define_peephole2 + [(set (match_operand:JOIN_MODE 0 "register_operand") + (match_operand:JOIN_MODE 1 "non_volatile_mem_operand")) + (set (match_operand:JOIN_MODE 2 "register_operand") + (match_operand:JOIN_MODE 3 "non_volatile_mem_operand"))] + "ENABLE_LD_ST_PAIRS + && loongarch_load_store_bonding_p (operands, mode, true)" + [(parallel [(set (match_dup 0) + (match_dup 1)) + (set (match_dup 2) + (match_dup 3))])] + "") + +;; 2 HI/SI/SF/DF stores are joined. +;; P5600 does not support bonding of two SBs, hence QI mode is not included. +(define_peephole2 + [(set (match_operand:JOIN_MODE 0 "memory_operand") + (match_operand:JOIN_MODE 1 "register_operand")) + (set (match_operand:JOIN_MODE 2 "memory_operand") + (match_operand:JOIN_MODE 3 "register_operand"))] + "ENABLE_LD_ST_PAIRS + && loongarch_load_store_bonding_p (operands, mode, false)" + [(parallel [(set (match_dup 0) + (match_dup 1)) + (set (match_dup 2) + (match_dup 3))])] + "") + +;; Match paired HImode loads. +(define_insn "*join2_loadhi" + [(set (match_operand:SI 0 "register_operand" "=r") + (any_extend:SI (match_operand:HI 1 "non_volatile_mem_operand" "m"))) + (set (match_operand:SI 2 "register_operand" "=r") + (any_extend:SI (match_operand:HI 3 "non_volatile_mem_operand" "m")))] + "ENABLE_LD_ST_PAIRS && reload_completed" + { + /* Reg-renaming pass reuses base register if it is dead after bonded loads. + Hardware does not bond those loads, even when they are consecutive. + However, order of the loads need to be checked for correctness. */ + if (!reg_overlap_mentioned_p (operands[0], operands[1])) + { + output_asm_insn ("ld.h\t%0,%1", operands); + output_asm_insn ("ld.h\t%2,%3", operands); + } + else + { + output_asm_insn ("ld.h\t%2,%3", operands); + output_asm_insn ("ld.h\t%0,%1", operands); + } + + return ""; + } + [(set_attr "move_type" "load") + (set_attr "insn_count" "2")]) + + +;; 2 HI loads are joined. +(define_peephole2 + [(set (match_operand:SI 0 "register_operand") + (any_extend:SI (match_operand:HI 1 "non_volatile_mem_operand"))) + (set (match_operand:SI 2 "register_operand") + (any_extend:SI (match_operand:HI 3 "non_volatile_mem_operand")))] + "ENABLE_LD_ST_PAIRS + && loongarch_load_store_bonding_p (operands, HImode, true)" + [(parallel [(set (match_dup 0) + (any_extend:SI (match_dup 1))) + (set (match_dup 2) + (any_extend:SI (match_dup 3)))])] + "") + + +;; Logical AND NOT. +(define_insn "loongson_gsandn" + [(set (match_operand:GPR 0 "register_operand" "=r") + (and:GPR + (not:GPR (match_operand:GPR 1 "register_operand" "r")) + (match_operand:GPR 2 "register_operand" "r")))] + "" + "andn\t%0,%2,%1" + [(set_attr "type" "logical")]) + +;; Logical AND NOT. +(define_insn "loongson_gsorn" + [(set (match_operand:GPR 0 "register_operand" "=r") + (ior:GPR + (not:GPR (match_operand:GPR 1 "register_operand" "r")) + (match_operand:GPR 2 "register_operand" "r")))] + "" + "orn\t%0,%2,%1" + [(set_attr "type" "logical")]) + +(define_insn "smax3" + [(set (match_operand:SCALARF 0 "register_operand" "=f") + (smax:SCALARF (match_operand:SCALARF 1 "register_operand" "f") + (match_operand:SCALARF 2 "register_operand" "f")))] + "TARGET_HARD_FLOAT" + "fmax.\t%0,%1,%2" + [(set_attr "type" "fmove") + (set_attr "mode" "")]) + +(define_insn "smin3" + [(set (match_operand:SCALARF 0 "register_operand" "=f") + (smin:SCALARF (match_operand:SCALARF 1 "register_operand" "f") + (match_operand:SCALARF 2 "register_operand" "f")))] + "TARGET_HARD_FLOAT" + "fmin.\t%0,%1,%2" + [(set_attr "type" "fmove") + (set_attr "mode" "")]) + +(define_insn "smaxa3" + [(set (match_operand:SCALARF 0 "register_operand" "=f") + (if_then_else:SCALARF + (gt (abs:SCALARF (match_operand:SCALARF 1 "register_operand" "f")) + (abs:SCALARF (match_operand:SCALARF 2 "register_operand" "f"))) + (match_dup 1) + (match_dup 2)))] + "TARGET_HARD_FLOAT" + "fmaxa.\t%0,%1,%2" + [(set_attr "type" "fmove") + (set_attr "mode" "")]) + +(define_insn "smina3" + [(set (match_operand:SCALARF 0 "register_operand" "=f") + (if_then_else:SCALARF + (lt (abs:SCALARF (match_operand:SCALARF 1 "register_operand" "f")) + (abs:SCALARF (match_operand:SCALARF 2 "register_operand" "f"))) + (match_dup 1) + (match_dup 2)))] + "TARGET_HARD_FLOAT" + "fmina.\t%0,%1,%2" + [(set_attr "type" "fmove") + (set_attr "mode" "")]) + +(define_insn "frint_" + [(set (match_operand:SCALARF 0 "register_operand" "=f") + (unspec:SCALARF [(match_operand:SCALARF 1 "register_operand" "f")] + UNSPEC_FRINT))] + "" + "frint.\t%0,%1" + [(set_attr "type" "fcvt") + (set_attr "mode" "")]) + +(define_insn "fclass_" + [(set (match_operand:SCALARF 0 "register_operand" "=f") + (unspec:SCALARF [(match_operand:SCALARF 1 "register_operand" "f")] + UNSPEC_FCLASS))] + "" + "fclass.\t%0,%1" + [(set_attr "type" "unknown") + (set_attr "mode" "")]) + +(define_insn "bytepick_w" + [(set (match_operand:SI 0 "register_operand" "=d") + (unspec:SI [(match_operand:SI 1 "register_operand" "d") + (match_operand:SI 2 "register_operand" "d") + (match_operand:SI 3 "const_0_to_3_operand" "n")] + UNSPEC_BYTEPICK_W))] + "" + "bytepick.w\t%0,%1,%2,%z3" + [(set_attr "type" "dspalu") + (set_attr "mode" "SI")]) + +(define_insn "bytepick_d" + [(set (match_operand:DI 0 "register_operand" "=d") + (unspec:DI [(match_operand:DI 1 "register_operand" "d") + (match_operand:DI 2 "register_operand" "d") + (match_operand:DI 3 "const_0_to_7_operand" "n")] + UNSPEC_BYTEPICK_D))] + "" + "bytepick.d\t%0,%1,%2,%z3" + [(set_attr "type" "dspalu") + (set_attr "mode" "DI")]) + +(define_insn "bitrev_4b" + [(set (match_operand:SI 0 "register_operand" "=d") + (unspec:SI [(match_operand:SI 1 "register_operand" "d")] + UNSPEC_BITREV_4B))] + "" + "bitrev.4b\t%0,%1" + [(set_attr "type" "unknown") + (set_attr "mode" "SI")]) + +(define_insn "bitrev_8b" + [(set (match_operand:DI 0 "register_operand" "=d") + (unspec:DI [(match_operand:DI 1 "register_operand" "d")] + UNSPEC_BITREV_8B))] + "" + "bitrev.8b\t%0,%1" + [(set_attr "type" "unknown") + (set_attr "mode" "DI")]) + + + +(define_expand "lu32i_d" + [(set (match_operand:DI 0 "register_operand") + (ior:DI + (zero_extend:DI + (subreg:SI (match_dup 0) 0)) + (match_operand:DI 1 "const_lu32i_operand")))] + "TARGET_64BIT" +{}) + +(define_insn "*lu32i_d" + [(set (match_operand:DI 0 "register_operand" "=d") + (ior:DI + (zero_extend:DI + (subreg:SI (match_operand:DI 1 "register_operand" "0") 0)) + (match_operand:DI 2 "const_lu32i_operand" "u")))] + "TARGET_64BIT" + "lu32i.d\t%0,%X2>>32" + [(set_attr "type" "arith") + (set_attr "mode" "DI")]) + +(define_insn "lu52i_d" + [(set (match_operand:DI 0 "register_operand" "=d") + (unspec:DI [(match_operand:DI 1 "register_operand" "d") + (match_operand:DI 2 "const_lu52i_operand" "v")] + UNSPEC_LU52I_D))] + "TARGET_64BIT" + "lu52i.d\t%0,%1,%X2>>52" + [(set_attr "type" "arith") + (set_attr "mode" "DI")]) + +;;(define_insn "*lu32i_d" +;; [(set (match_operand:DI 0 "register_operand" "+d") +;; (ior:DI +;; (zero_extend:DI +;; (subreg:SI (match_dup 0) 0)) +;; (match_operand:DI 1 "const_lu32i_operand" "u")))] +;; "TARGET_64BIT" +;; "lu32i.d\t%0,%1>>32" +;; [(set_attr "type" "arith") +;; (set_attr "mode" "DI")]) + +(define_mode_iterator QHSD [QI HI SI DI]) + +(define_insn "crc_w__w" + [(set (match_operand:SI 0 "register_operand" "=d") + (unspec:SI [(match_operand:QHSD 1 "register_operand" "d") + (match_operand:SI 2 "register_operand" "d")] + UNSPEC_CRC))] + "" + "crc.w..w\t%0,%1,%2" + [(set_attr "type" "unknown") + (set_attr "mode" "")]) + +(define_insn "crcc_w__w" + [(set (match_operand:SI 0 "register_operand" "=d") + (unspec:SI [(match_operand:QHSD 1 "register_operand" "d") + (match_operand:SI 2 "register_operand" "d")] + UNSPEC_CRCC))] + "" + "crcc.w..w\t%0,%1,%2" + [(set_attr "type" "unknown") + (set_attr "mode" "")]) + +;; Synchronization instructions. + +(include "sync.md") + +; LoongArch csr +(include "lvz.md") + +(define_c_enum "unspec" [ + UNSPEC_ADDRESS_FIRST +]) + +;; .................... +;; +;; N_LARCH-PORT +;; +;; .................... + +(define_c_enum "unspecv" [ + ;; Register save and restore. + UNSPECV_GPR_SAVE + UNSPECV_GPR_RESTORE + + ;; Floating-point unspecs. + ;;UNSPECV_FRFLAGS + ;;UNSPECV_FSFLAGS + + ;; Blockage and synchronization. + ;;UNSPECV_BLOCKAGE + ;;UNSPECV_FENCE + ;;UNSPECV_FENCE_I +]) + + +;; Is copying of this instruction disallowed? +(define_attr "cannot_copy" "no,yes" (const_string "no")) + +(define_insn "stack_tie" + [(set (mem:BLK (scratch)) + (unspec:BLK [(match_operand:X 0 "register_operand" "r") + (match_operand:X 1 "register_operand" "r")] + UNSPEC_TIE))] + "" + "" + [(set_attr "length" "0")] +) + +(define_insn "gpr_save" + [(unspec_volatile [(match_operand 0 "const_int_operand")] UNSPECV_GPR_SAVE) + (clobber (reg:SI T0_REGNUM)) + (clobber (reg:SI T1_REGNUM))] + "" + { return n_loongarch_output_gpr_save (INTVAL (operands[0])); }) + +(define_insn "gpr_restore" + [(unspec_volatile [(match_operand 0 "const_int_operand")] UNSPECV_GPR_RESTORE)] + "" + "tail\t__n_loongarch_restore_%0") + +(define_insn "gpr_restore_return" + [(return) + (use (match_operand 0 "pmode_register_operand" "")) + (const_int 0)] + "" + "") + diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt new file mode 100644 index 0000000000000000000000000000000000000000..23342570f9d49105244deafdf318ce3ee153b054 --- /dev/null +++ b/gcc/config/loongarch/loongarch.opt @@ -0,0 +1,223 @@ + +; +; Copyright (C) 2005-2018 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT +; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +; License for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +HeaderInclude +config/loongarch/loongarch-opts.h + +EB +Driver + +EL +Driver + +mabi= +Target RejectNegative Joined Enum(loongarch_abi) Var(loongarch_abi) Init(LARCH_ABI_DEFAULT) +-mabi=ABI Generate code that conforms to the given ABI. + +Enum +Name(loongarch_abi) Type(int) +Known Loongarch ABIs (for use with the -mabi= option): + +EnumValue +Enum(loongarch_abi) String(lp32) Value(ABILP32) + +EnumValue +Enum(loongarch_abi) String(lpx32) Value(ABILPX32) + +EnumValue +Enum(loongarch_abi) String(lp64) Value(ABILP64) + +march= +Target RejectNegative Joined Var(loongarch_arch_option) ToLower Enum(loongarch_arch_opt_value) +-march=ISA Generate code for the given ISA. + +mbranch-cost= +Target RejectNegative Joined UInteger Var(loongarch_branch_cost) +-mbranch-cost=COST Set the cost of branches to roughly COST instructions. + +mcheck-zero-division +Target Report Mask(CHECK_ZERO_DIV) +Trap on integer divide by zero. + +mdouble-float +Target Report RejectNegative InverseMask(SINGLE_FLOAT, DOUBLE_FLOAT) +Allow hardware floating-point instructions to cover both 32-bit and 64-bit operations. + +mdebug +Target Var(TARGET_DEBUG_MODE) Undocumented + +mdebugd +Target Var(TARGET_DEBUG_D_MODE) Undocumented + +membedded-data +Target Report Var(TARGET_EMBEDDED_DATA) +Use ROM instead of RAM. + +mextern-sdata +Target Report Var(TARGET_EXTERN_SDATA) Init(1) +Use -G for data that is not defined by the current object. + +mfp-exceptions +Target Report Var(TARGET_FP_EXCEPTIONS) Init(1) +FP exceptions are enabled. + +mfp32 +Target Report RejectNegative InverseMask(FLOAT64) +Use 32-bit floating-point registers. + +mfp64 +Target Report RejectNegative Mask(FLOAT64) +Use 64-bit floating-point registers. + +mflush-func= +Target RejectNegative Joined Var(loongarch_cache_flush_func) Init(CACHE_FLUSH_FUNC) +-mflush-func=FUNC Use FUNC to flush the cache before calling stack trampolines. + +mgp32 +Target Report RejectNegative InverseMask(64BIT) +Use 32-bit general registers. + +mgp64 +Target Report RejectNegative Mask(64BIT) +Use 64-bit general registers. + +mgpopt +Target Report Var(TARGET_GPOPT) Init(1) +Use GP-relative addressing to access small data. + +mhard-float +Target Report RejectNegative InverseMask(SOFT_FLOAT_ABI, HARD_FLOAT_ABI) +Allow the use of hardware floating-point ABI and instructions. + +loongarch +Target RejectNegative Joined ToLower Enum(loongarch_loongarch_opt_value) Var(loongarch_isa_option) +-loongarchN Generate code for ISA level N. + +mlocal-sdata +Target Report Var(TARGET_LOCAL_SDATA) Init(1) +Use -G for object-local data. + +mlong-calls +Target Report Var(TARGET_LONG_CALLS) +Use indirect calls. + +mlong32 +Target Report RejectNegative InverseMask(LONG64, LONG32) +Use a 32-bit long type. + +mlong64 +Target Report RejectNegative Mask(LONG64) +Use a 64-bit long type. + +mmemcpy +Target Report Mask(MEMCPY) +Don't optimize block moves. + +mno-float +Target Report RejectNegative Var(TARGET_NO_FLOAT) Condition(TARGET_SUPPORTS_NO_FLOAT) +Prevent the use of all floating-point operations. + +mno-flush-func +Target RejectNegative +Do not use a cache-flushing function before calling stack trampolines. + +mrelax-pic-calls +Target Report Mask(RELAX_PIC_CALLS) +Try to allow the linker to turn PIC calls into direct calls. + +mshared +Target Report Var(TARGET_SHARED) Init(1) +When generating -mabicalls code, make the code suitable for use in shared libraries. + +msingle-float +Target Report RejectNegative Mask(SINGLE_FLOAT) +Restrict the use of hardware floating-point instructions to 32-bit operations. + +msoft-float +Target Report RejectNegative Mask(SOFT_FLOAT_ABI) +Prevent the use of all hardware floating-point instructions. + +msym32 +Target Report Var(TARGET_SYM32) +Assume all symbols have 32-bit values. + +mlra +Target Report Var(loongarch_lra_flag) Init(1) Save +Use LRA instead of reload. + +mtune= +Target RejectNegative Joined Var(loongarch_tune_option) ToLower Enum(loongarch_arch_opt_value) +-mtune=PROCESSOR Optimize the output for PROCESSOR. + +muninit-const-in-rodata +Target Report Var(TARGET_UNINIT_CONST_IN_RODATA) +Put uninitialized constants in ROM (needs -membedded-data). + +mxgot +Target Report Var(TARGET_XGOT) +Lift restrictions on GOT size. + +mframe-header-opt +Target Report Var(flag_frame_header_optimization) Optimization +Optimize frame header. + +noasmopt +Driver + +mload-store-pairs +Target Report Var(TARGET_LOAD_STORE_PAIRS) Init(1) +Enable load/store bonding. + +mlvz +Target Report Var(TARGET_LVZ) +Use LoongArch Privileged state (LVZ) instructions. + +mmax-inline-memcpy-size= +Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) Init(1024) +-mmax-inline-memcpy-size=SIZE Set the max size of memcpy to inline, default is 1024. + +mfix-loongson3-llsc +Target Report Var(FIX_LOONGSON3_LLSC) +Work around an Loongson3 llsc errata. + +; The code model option names for -mcmodel. + +Enum +Name(cmodel) Type(enum loongarch_code_model) +The code model option names for -mcmodel: + +EnumValue +Enum(cmodel) String(normal) Value(LARCH_CMODEL_NORMAL) + +EnumValue +Enum(cmodel) String(tiny) Value(LARCH_CMODEL_TINY) + +EnumValue +Enum(cmodel) String(tiny-static) Value(LARCH_CMODEL_TINY_STATIC) + +EnumValue +Enum(cmodel) String(large) Value(LARCH_CMODEL_LARGE) + +EnumValue +Enum(cmodel) String(extreme) Value(LARCH_CMODEL_EXTREME) + +mcmodel= +Target RejectNegative Joined Enum(cmodel) Var(loongarch_cmodel_var) Init(LARCH_CMODEL_NORMAL) Save +Specify the code model. diff --git a/gcc/config/loongarch/lvz.md b/gcc/config/loongarch/lvz.md new file mode 100644 index 0000000000000000000000000000000000000000..e3000eeeee45fa48d6435aa18b5f9395f6a646bb --- /dev/null +++ b/gcc/config/loongarch/lvz.md @@ -0,0 +1,60 @@ +;; Machine Description for LoongArch vz instructions. +;; Copyright (C) 1989-2014 Free Software Foundation, Inc. +;; Contributed by xuchenghua@loongson.cn + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_c_enum "unspec" [ + ;; lvz + UNSPEC_LVZ_GCSRXCHG + UNSPEC_LVZ_GCSRRD + UNSPEC_LVZ_GCSRWR + +]) + +(define_insn "lvz_

gcsrxchg" + [(set (match_operand:GPR 0 "register_operand" "=d") + (unspec_volatile:GPR + [(match_operand:GPR 1 "register_operand" "0") + (match_operand:GPR 2 "register_operand" "q") + (match_operand 3 "const_uimm14_operand")] + UNSPEC_LVZ_GCSRXCHG))] + "ISA_HAS_LVZ" + "gcsrxchg\t%0,%2,%3" + [(set_attr "type" "load") + (set_attr "mode" "")]) + +(define_insn "lvz_

gcsrrd" + [(set (match_operand:GPR 0 "register_operand" "=d") + (unspec_volatile:GPR [(match_operand 1 "const_uimm14_operand")] + UNSPEC_LVZ_GCSRRD))] + "ISA_HAS_LVZ" + "gcsrrd\t%0,%1" + [(set_attr "type" "load") + (set_attr "mode" "")]) + +(define_insn "lvz_

gcsrwr" + [(set (match_operand:GPR 0 "register_operand" "=d") + (unspec_volatile:GPR + [(match_operand:GPR 1 "register_operand" "0") + (match_operand 2 "const_uimm14_operand")] + UNSPEC_LVZ_GCSRWR))] + "ISA_HAS_LVZ" + "gcsrwr\t%0,%2" + [(set_attr "type" "store") + (set_attr "mode" "")]) + diff --git a/gcc/config/loongarch/lvzintrin.h b/gcc/config/loongarch/lvzintrin.h new file mode 100644 index 0000000000000000000000000000000000000000..0e0a89ed2dcc75ad6701b0ff28fdb5c45729b535 --- /dev/null +++ b/gcc/config/loongarch/lvzintrin.h @@ -0,0 +1,87 @@ +/* Intrinsics for LoongArch vz operations. + + Copyright (C) 2019 Free Software Foundation, Inc. + Contributed by xuchenghua@loongson.cn. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef _GCC_LOONGARCH_LVZ_H +#define _GCC_LOONGARCH_LVZ_H + +#define __lvz_gcsrrd __builtin_lvz_gcsrrd +#define __lvz_gcsrwr __builtin_lvz_gcsrwr +#define __lvz_gcsrxchg __builtin_lvz_gcsrxchg +#define __lvz_dgcsrrd __builtin_lvz_dgcsrrd +#define __lvz_dgcsrwr __builtin_lvz_dgcsrwr +#define __lvz_dgcsrxchg __builtin_lvz_dgcsrxchg + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__builtin_lvz_gtlbsrch (void) +{ + __asm__ volatile ("gtlbsrch\n\t"); +} +#define __lvz_gtlbsrch __builtin_lvz_gtlbsrch + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__builtin_lvz_gtlbrd (void) +{ + __asm__ volatile ("gtlbrd\n\t"); +} +#define __lvz_gtlbrd __builtin_lvz_gtlbrd + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__builtin_lvz_gtlbwr (void) +{ + __asm__ volatile ("gtlbwr\n\t"); +} +#define __lvz_gtlbwr __builtin_lvz_gtlbwr + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__builtin_lvz_gtlbfill (void) +{ + __asm__ volatile ("gtlbfill\n\t"); +} +#define __lvz_gtlbfill __builtin_lvz_gtlbfill + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__builtin_lvz_gtlbclr (void) +{ + __asm__ volatile ("gtlbclr\n\t"); +} +#define __lvz_gtlbclr __builtin_lvz_gtlbclr + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__builtin_lvz_gtlbflush (void) +{ + __asm__ volatile ("gtlbflush\n\t"); +} +#define __lvz_gtlbflush __builtin_lvz_gtlbflush + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__builtin_lvz_hvcl (void) +{ + __asm__ volatile ("hvcl\n\t"); +} +#define __lvz_hvcl __builtin_lvz_hvcl + + +#endif /*_GCC_LOONGARCH_LVZ_H */ diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md new file mode 100644 index 0000000000000000000000000000000000000000..13f5d6a91ebacc620c2e1f604d308f61bf34fc98 --- /dev/null +++ b/gcc/config/loongarch/predicates.md @@ -0,0 +1,561 @@ +;; Predicate definitions for LARCH. +;; Copyright (C) 2004-2018 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_predicate "const_uns_arith_operand" + (and (match_code "const_int") + (match_test "SMALL_OPERAND_UNSIGNED (INTVAL (op))"))) + +(define_predicate "uns_arith_operand" + (ior (match_operand 0 "const_uns_arith_operand") + (match_operand 0 "register_operand"))) + +(define_predicate "const_lu32i_operand" + (and (match_code "const_int") + (match_test "LU32I_OPERAND (INTVAL (op))"))) + +(define_predicate "const_lu52i_operand" + (and (match_code "const_int") + (match_test "LU52I_OPERAND (INTVAL (op))"))) + +(define_predicate "const_arith_operand" + (and (match_code "const_int") + (match_test "IMM12_OPERAND (INTVAL (op))"))) + +(define_predicate "const_imm16_operand" + (and (match_code "const_int") + (match_test "IMM16_OPERAND (INTVAL (op))"))) + +(define_predicate "arith_operand" + (ior (match_operand 0 "const_arith_operand") + (match_operand 0 "register_operand"))) + +(define_predicate "const_immlsa_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 1, 4)"))) + +(define_predicate "const_uimm3_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 7)"))) + +(define_predicate "const_uimm4_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 15)"))) + +(define_predicate "const_uimm5_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 31)"))) + +(define_predicate "const_uimm6_operand" + (and (match_code "const_int") + (match_test "UIMM6_OPERAND (INTVAL (op))"))) + +(define_predicate "const_uimm7_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 127)"))) + +(define_predicate "const_uimm8_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 255)"))) + +(define_predicate "const_uimm14_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 16383)"))) + +(define_predicate "const_uimm15_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 32767)"))) + +(define_predicate "const_imm5_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), -16, 15)"))) + +(define_predicate "const_imm10_operand" + (and (match_code "const_int") + (match_test "IMM10_OPERAND (INTVAL (op))"))) + +(define_predicate "const_imm12_operand" + (and (match_code "const_int") + (match_test "IMM12_OPERAND (INTVAL (op))"))) + +(define_predicate "reg_imm10_operand" + (ior (match_operand 0 "const_imm10_operand") + (match_operand 0 "register_operand"))) + +(define_predicate "aq8b_operand" + (and (match_code "const_int") + (match_test "loongarch_signed_immediate_p (INTVAL (op), 8, 0)"))) + +(define_predicate "aq8h_operand" + (and (match_code "const_int") + (match_test "loongarch_signed_immediate_p (INTVAL (op), 8, 1)"))) + +(define_predicate "aq8w_operand" + (and (match_code "const_int") + (match_test "loongarch_signed_immediate_p (INTVAL (op), 8, 2)"))) + +(define_predicate "aq8d_operand" + (and (match_code "const_int") + (match_test "loongarch_signed_immediate_p (INTVAL (op), 8, 3)"))) + +(define_predicate "aq10b_operand" + (and (match_code "const_int") + (match_test "loongarch_signed_immediate_p (INTVAL (op), 10, 0)"))) + +(define_predicate "aq10h_operand" + (and (match_code "const_int") + (match_test "loongarch_signed_immediate_p (INTVAL (op), 10, 1)"))) + +(define_predicate "aq10w_operand" + (and (match_code "const_int") + (match_test "loongarch_signed_immediate_p (INTVAL (op), 10, 2)"))) + +(define_predicate "aq10d_operand" + (and (match_code "const_int") + (match_test "loongarch_signed_immediate_p (INTVAL (op), 10, 3)"))) + +(define_predicate "aq12b_operand" + (and (match_code "const_int") + (match_test "loongarch_signed_immediate_p (INTVAL (op), 12, 0)"))) + +(define_predicate "aq12h_operand" + (and (match_code "const_int") + (match_test "loongarch_signed_immediate_p (INTVAL (op), 11, 1)"))) + +(define_predicate "aq12w_operand" + (and (match_code "const_int") + (match_test "loongarch_signed_immediate_p (INTVAL (op), 10, 2)"))) + +(define_predicate "aq12d_operand" + (and (match_code "const_int") + (match_test "loongarch_signed_immediate_p (INTVAL (op), 9, 3)"))) + +(define_predicate "sle_operand" + (and (match_code "const_int") + (match_test "SMALL_OPERAND (INTVAL (op) + 1)"))) + +(define_predicate "sleu_operand" + (and (match_operand 0 "sle_operand") + (match_test "INTVAL (op) + 1 != 0"))) + +(define_predicate "const_0_operand" + (and (match_code "const_int,const_double,const_vector") + (match_test "op == CONST0_RTX (GET_MODE (op))"))) + +(define_predicate "const_m1_operand" + (and (match_code "const_int,const_double,const_vector") + (match_test "op == CONSTM1_RTX (GET_MODE (op))"))) + +(define_predicate "reg_or_m1_operand" + (ior (match_operand 0 "const_m1_operand") + (match_operand 0 "register_operand"))) + +(define_predicate "reg_or_0_operand" + (ior (match_operand 0 "const_0_operand") + (match_operand 0 "register_operand"))) + +(define_predicate "const_1_operand" + (and (match_code "const_int,const_double,const_vector") + (match_test "op == CONST1_RTX (GET_MODE (op))"))) + +(define_predicate "reg_or_1_operand" + (ior (match_operand 0 "const_1_operand") + (match_operand 0 "register_operand"))) + +;; These are used in vec_merge, hence accept bitmask as const_int. +(define_predicate "const_exp_2_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (exact_log2 (INTVAL (op)), 0, 1)"))) + +(define_predicate "const_exp_4_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (exact_log2 (INTVAL (op)), 0, 3)"))) + +(define_predicate "const_exp_8_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (exact_log2 (INTVAL (op)), 0, 7)"))) + +(define_predicate "const_exp_16_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (exact_log2 (INTVAL (op)), 0, 15)"))) + +(define_predicate "const_exp_32_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (exact_log2 (INTVAL (op)), 0, 31)"))) + +;; This is used for indexing into vectors, and hence only accepts const_int. +(define_predicate "const_0_or_1_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 1)"))) + +(define_predicate "const_2_or_3_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 2, 3)"))) + +(define_predicate "const_0_to_3_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 3)"))) + +(define_predicate "const_0_to_7_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 7)"))) + +(define_predicate "const_4_to_7_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 4, 7)"))) + +(define_predicate "const_8_to_15_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 7)"))) + +(define_predicate "const_16_to_31_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 7)"))) + +(define_predicate "qi_mask_operand" + (and (match_code "const_int") + (match_test "UINTVAL (op) == 0xff"))) + +(define_predicate "hi_mask_operand" + (and (match_code "const_int") + (match_test "UINTVAL (op) == 0xffff"))) + +(define_predicate "shift_mask_operand" + (and (match_code "const_int") + (ior (match_test "UINTVAL (op) == 0x3fffffffc") + (match_test "UINTVAL (op) == 0x1fffffffe") + (match_test "UINTVAL (op) == 0x7fffffff8") + (match_test "UINTVAL (op) == 0xffffffff0")))) + + + +(define_predicate "si_mask_operand" + (and (match_code "const_int") + (match_test "UINTVAL (op) == 0xffffffff"))) + +(define_predicate "and_load_operand" + (ior (match_operand 0 "qi_mask_operand") + (match_operand 0 "hi_mask_operand") + (match_operand 0 "si_mask_operand"))) + +(define_predicate "low_bitmask_operand" + (and (match_code "const_int") + (match_test "low_bitmask_len (mode, INTVAL (op)) > 12"))) + +(define_predicate "and_reg_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "const_uns_arith_operand") + (match_operand 0 "low_bitmask_operand") + (match_operand 0 "si_mask_operand"))) + +(define_predicate "and_operand" + (ior (match_operand 0 "and_load_operand") + (match_operand 0 "and_reg_operand"))) + +(define_predicate "d_operand" + (and (match_code "reg") + (match_test "GP_REG_P (REGNO (op))"))) + +(define_predicate "db4_operand" + (and (match_code "const_int") + (match_test "loongarch_unsigned_immediate_p (INTVAL (op) + 1, 4, 0)"))) + +(define_predicate "db7_operand" + (and (match_code "const_int") + (match_test "loongarch_unsigned_immediate_p (INTVAL (op) + 1, 7, 0)"))) + +(define_predicate "db8_operand" + (and (match_code "const_int") + (match_test "loongarch_unsigned_immediate_p (INTVAL (op) + 1, 8, 0)"))) + +(define_predicate "ib3_operand" + (and (match_code "const_int") + (match_test "loongarch_unsigned_immediate_p (INTVAL (op) - 1, 3, 0)"))) + +(define_predicate "sb4_operand" + (and (match_code "const_int") + (match_test "loongarch_signed_immediate_p (INTVAL (op), 4, 0)"))) + +(define_predicate "sb5_operand" + (and (match_code "const_int") + (match_test "loongarch_signed_immediate_p (INTVAL (op), 5, 0)"))) + +(define_predicate "sb8_operand" + (and (match_code "const_int") + (match_test "loongarch_signed_immediate_p (INTVAL (op), 8, 0)"))) + +(define_predicate "sd8_operand" + (and (match_code "const_int") + (match_test "loongarch_signed_immediate_p (INTVAL (op), 8, 3)"))) + +(define_predicate "ub4_operand" + (and (match_code "const_int") + (match_test "loongarch_unsigned_immediate_p (INTVAL (op), 4, 0)"))) + +(define_predicate "ub8_operand" + (and (match_code "const_int") + (match_test "loongarch_unsigned_immediate_p (INTVAL (op), 8, 0)"))) + +(define_predicate "uh4_operand" + (and (match_code "const_int") + (match_test "loongarch_unsigned_immediate_p (INTVAL (op), 4, 1)"))) + +(define_predicate "uw4_operand" + (and (match_code "const_int") + (match_test "loongarch_unsigned_immediate_p (INTVAL (op), 4, 2)"))) + +(define_predicate "uw5_operand" + (and (match_code "const_int") + (match_test "loongarch_unsigned_immediate_p (INTVAL (op), 5, 2)"))) + +(define_predicate "uw6_operand" + (and (match_code "const_int") + (match_test "loongarch_unsigned_immediate_p (INTVAL (op), 6, 2)"))) + +(define_predicate "uw8_operand" + (and (match_code "const_int") + (match_test "loongarch_unsigned_immediate_p (INTVAL (op), 8, 2)"))) + +(define_predicate "addiur2_operand" + (and (match_code "const_int") + (ior (match_test "INTVAL (op) == -1") + (match_test "INTVAL (op) == 1") + (match_test "INTVAL (op) == 4") + (match_test "INTVAL (op) == 8") + (match_test "INTVAL (op) == 12") + (match_test "INTVAL (op) == 16") + (match_test "INTVAL (op) == 20") + (match_test "INTVAL (op) == 24")))) + +(define_predicate "addiusp_operand" + (and (match_code "const_int") + (ior (match_test "(IN_RANGE (INTVAL (op), 2, 257))") + (match_test "(IN_RANGE (INTVAL (op), -258, -3))")))) + +(define_predicate "andi16_operand" + (and (match_code "const_int") + (ior (match_test "IN_RANGE (INTVAL (op), 1, 4)") + (match_test "IN_RANGE (INTVAL (op), 7, 8)") + (match_test "IN_RANGE (INTVAL (op), 15, 16)") + (match_test "IN_RANGE (INTVAL (op), 31, 32)") + (match_test "IN_RANGE (INTVAL (op), 63, 64)") + (match_test "INTVAL (op) == 255") + (match_test "INTVAL (op) == 32768") + (match_test "INTVAL (op) == 65535")))) + +(define_predicate "movep_src_register" + (and (match_code "reg") + (ior (match_test ("IN_RANGE (REGNO (op), 2, 3)")) + (match_test ("IN_RANGE (REGNO (op), 16, 20)"))))) + +(define_predicate "movep_src_operand" + (ior (match_operand 0 "const_0_operand") + (match_operand 0 "movep_src_register"))) + +(define_predicate "fcc_reload_operand" + (and (match_code "reg,subreg") + (match_test "ST_REG_P (true_regnum (op))"))) + +(define_predicate "muldiv_target_operand" + (match_operand 0 "register_operand")) + +(define_predicate "const_call_insn_operand" + (match_code "const,symbol_ref,label_ref") +{ + enum loongarch_symbol_type symbol_type; + + if (!loongarch_symbolic_constant_p (op, SYMBOL_CONTEXT_CALL, &symbol_type)) + return false; + + switch (symbol_type) + { + case SYMBOL_GOT_DISP: + /* Without explicit relocs, there is no special syntax for + loading the address of a call destination into a register. + Using "la $25,foo; jal $25" would prevent the lazy binding + of "foo", so keep the address of global symbols with the + jal macro. */ + return 1; + + default: + return false; + } +}) + +(define_predicate "call_insn_operand" + (ior (match_operand 0 "const_call_insn_operand") + (match_operand 0 "register_operand"))) + +(define_predicate "is_const_call_local_symbol" + (and (match_operand 0 "const_call_insn_operand") + (ior (match_test "loongarch_global_symbol_p (op) == 0") + (match_test "loongarch_symbol_binds_local_p (op) != 0")) + (match_test "CONSTANT_P (op)"))) + +(define_predicate "is_const_call_weak_symbol" + (and (match_operand 0 "const_call_insn_operand") + (not (match_operand 0 "is_const_call_local_symbol")) + (match_test "loongarch_weak_symbol_p (op) != 0") + (match_test "CONSTANT_P (op)"))) + +(define_predicate "is_const_call_plt_symbol" + (and (match_operand 0 "const_call_insn_operand") + (match_test "flag_plt != 0") + (match_test "loongarch_global_symbol_noweak_p (op) != 0") + (match_test "CONSTANT_P (op)"))) + +(define_predicate "is_const_call_global_noplt_symbol" + (and (match_operand 0 "const_call_insn_operand") + (match_test "flag_plt == 0") + (match_test "loongarch_global_symbol_noweak_p (op) != 0") + (match_test "CONSTANT_P (op)"))) + +;; A legitimate CONST_INT operand that takes more than one instruction +;; to load. +(define_predicate "splittable_const_int_operand" + (match_code "const_int") +{ + + /* Don't handle multi-word moves this way; we don't want to introduce + the individual word-mode moves until after reload. */ + if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) + return false; + + /* Otherwise check whether the constant can be loaded in a single + instruction. */ +// return !LUI_INT (op) && !SMALL_INT (op) && !SMALL_INT_UNSIGNED (op); + return !LUI_INT (op) && !SMALL_INT (op) && !SMALL_INT_UNSIGNED (op) + && !LU52I_INT (op); +}) + +(define_predicate "move_operand" + (match_operand 0 "general_operand") +{ + enum loongarch_symbol_type symbol_type; + + /* The thinking here is as follows: + + (1) The move expanders should split complex load sequences into + individual instructions. Those individual instructions can + then be optimized by all rtl passes. + + (2) The target of pre-reload load sequences should not be used + to store temporary results. If the target register is only + assigned one value, reload can rematerialize that value + on demand, rather than spill it to the stack. + + (3) If we allowed pre-reload passes like combine and cse to recreate + complex load sequences, we would want to be able to split the + sequences before reload as well, so that the pre-reload scheduler + can see the individual instructions. This falls foul of (2); + the splitter would be forced to reuse the target register for + intermediate results. + + (4) We want to define complex load splitters for combine. These + splitters can request a temporary scratch register, which avoids + the problem in (2). They allow things like: + + (set (reg T1) (high SYM)) + (set (reg T2) (low (reg T1) SYM)) + (set (reg X) (plus (reg T2) (const_int OFFSET))) + + to be combined into: + + (set (reg T3) (high SYM+OFFSET)) + (set (reg X) (lo_sum (reg T3) SYM+OFFSET)) + + if T2 is only used this once. */ + switch (GET_CODE (op)) + { + case CONST_INT: + return !splittable_const_int_operand (op, mode); + + case CONST: + case SYMBOL_REF: + case LABEL_REF: + return (loongarch_symbolic_constant_p (op, SYMBOL_CONTEXT_LEA, &symbol_type)); + default: + return true; + } +}) + +(define_predicate "consttable_operand" + (match_test "CONSTANT_P (op)")) + +(define_predicate "symbolic_operand" + (match_code "const,symbol_ref,label_ref") +{ + enum loongarch_symbol_type type; + return loongarch_symbolic_constant_p (op, SYMBOL_CONTEXT_LEA, &type); +}) + +(define_predicate "force_to_mem_operand" + (match_code "const,symbol_ref,label_ref") +{ + enum loongarch_symbol_type symbol_type; + return (loongarch_symbolic_constant_p (op, SYMBOL_CONTEXT_LEA, &symbol_type) + && loongarch_use_pcrel_pool_p[(int) symbol_type]); +}) + +(define_predicate "got_disp_operand" + (match_code "const,symbol_ref,label_ref") +{ + enum loongarch_symbol_type type; + return (loongarch_symbolic_constant_p (op, SYMBOL_CONTEXT_LEA, &type) + && type == SYMBOL_GOT_DISP); +}) + +(define_predicate "symbol_ref_operand" + (match_code "symbol_ref")) + +(define_predicate "stack_operand" + (and (match_code "mem") + (match_test "loongarch_stack_address_p (XEXP (op, 0), GET_MODE (op))"))) + + + +(define_predicate "equality_operator" + (match_code "eq,ne")) + +(define_predicate "extend_operator" + (match_code "zero_extend,sign_extend")) + +(define_predicate "trap_comparison_operator" + (match_code "eq,ne,lt,ltu,ge,geu")) + +(define_predicate "order_operator" + (match_code "lt,ltu,le,leu,ge,geu,gt,gtu")) + +;; For NE, cstore uses sltu instructions in which the first operand is $0. + +(define_predicate "loongarch_cstore_operator" + (ior (match_code "eq,gt,gtu,ge,geu,lt,ltu,le,leu") + (match_code "ne"))) + +(define_predicate "small_data_pattern" + (and (match_code "set,parallel,unspec,unspec_volatile,prefetch") + (match_test "loongarch_small_data_pattern_p (op)"))) + +(define_predicate "mem_noofs_operand" + (and (match_code "mem") + (match_code "reg" "0"))) + +;; Return 1 if the operand is in non-volatile memory. +(define_predicate "non_volatile_mem_operand" + (and (match_operand 0 "memory_operand") + (not (match_test "MEM_VOLATILE_P (op)")))) diff --git a/gcc/config/loongarch/rtems.h b/gcc/config/loongarch/rtems.h new file mode 100644 index 0000000000000000000000000000000000000000..bbb70b040b83d3790193bcad14f14e84f6dbc0f2 --- /dev/null +++ b/gcc/config/loongarch/rtems.h @@ -0,0 +1,39 @@ +/* Definitions for rtems targeting a LARCH using ELF. + Copyright (C) 1996-2018 Free Software Foundation, Inc. + Contributed by Joel Sherrill (joel@OARcorp.com). + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* Specify predefined symbols in preprocessor. */ + +#define TARGET_OS_CPP_BUILTINS() \ +do { \ + builtin_define ("__rtems__"); \ + builtin_define ("__USE_INIT_FINI__"); \ + builtin_assert ("system=rtems"); \ +} while (0) + +/* No sdata. + * The RTEMS BSPs expect -G0 + */ +#undef LARCH_DEFAULT_GVALUE +#define LARCH_DEFAULT_GVALUE 0 diff --git a/gcc/config/loongarch/sde.opt b/gcc/config/loongarch/sde.opt new file mode 100644 index 0000000000000000000000000000000000000000..321217d51eed090945db3c640990c278f0d283fa --- /dev/null +++ b/gcc/config/loongarch/sde.opt @@ -0,0 +1,28 @@ +; LARCH SDE options. +; +; Copyright (C) 2010-2018 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT +; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +; License for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +; -mcode-xonly is a traditional alias for -mcode-readable=pcrel and +; -mno-data-in-code is a traditional alias for -mcode-readable=no. + +mno-data-in-code +Target RejectNegative Alias(mcode-readable=, no) + +mcode-xonly +Target RejectNegative Alias(mcode-readable=, pcrel) diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md new file mode 100644 index 0000000000000000000000000000000000000000..b5d68c145d602e49169cfaf03cd79d3770952ca3 --- /dev/null +++ b/gcc/config/loongarch/sync.md @@ -0,0 +1,555 @@ +;; Machine description for LARCH atomic operations. +;; Copyright (C) 2011-2018 Free Software Foundation, Inc. +;; Contributed by Andrew Waterman (andrew@sifive.com). +;; Based on LARCH target for GNU compiler. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_c_enum "unspec" [ + UNSPEC_COMPARE_AND_SWAP + UNSPEC_COMPARE_AND_SWAP_ADD + UNSPEC_COMPARE_AND_SWAP_SUB + UNSPEC_COMPARE_AND_SWAP_AND + UNSPEC_COMPARE_AND_SWAP_XOR + UNSPEC_COMPARE_AND_SWAP_OR + UNSPEC_COMPARE_AND_SWAP_NAND + UNSPEC_SYNC_OLD_OP + UNSPEC_SYNC_EXCHANGE + UNSPEC_ATOMIC_STORE + UNSPEC_MEMORY_BARRIER +]) + +(define_code_iterator any_atomic [plus ior xor and]) +(define_code_attr atomic_optab + [(plus "add") (ior "or") (xor "xor") (and "and")]) + +;; This attribute gives the format suffix for atomic memory operations. +(define_mode_attr amo [(SI "w") (DI "d")]) + +;; expands to the name of the atomic operand that implements a particular code. +(define_code_attr amop [(ior "or") + (xor "xor") + (and "and") + (plus "add")]) +;; Memory barriers. + +(define_expand "mem_thread_fence" + [(match_operand:SI 0 "const_int_operand" "")] ;; model + "" +{ + if (INTVAL (operands[0]) != MEMMODEL_RELAXED) + { + rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (mem) = 1; + emit_insn (gen_mem_thread_fence_1 (mem, operands[0])); + } + DONE; +}) + +;; Until the LARCH memory model (hence its mapping from C++) is finalized, +;; conservatively emit a full FENCE. +(define_insn "mem_thread_fence_1" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER)) + (match_operand:SI 1 "const_int_operand" "")] ;; model + "" + "dbar\t0") + +;; Atomic memory operations. + +;; Implement atomic stores with amoswap. Fall back to fences for atomic loads. +(define_insn "atomic_store" + [(set (match_operand:GPR 0 "memory_operand" "+ZB") + (unspec_volatile:GPR + [(match_operand:GPR 1 "reg_or_0_operand" "rJ") + (match_operand:SI 2 "const_int_operand")] ;; model + UNSPEC_ATOMIC_STORE))] + "" + "amswap%A2.\t$zero,%z1,%0" + [(set (attr "length") (const_int 8))]) + +(define_insn "atomic_" + [(set (match_operand:GPR 0 "memory_operand" "+ZB") + (unspec_volatile:GPR + [(any_atomic:GPR (match_dup 0) + (match_operand:GPR 1 "reg_or_0_operand" "rJ")) + (match_operand:SI 2 "const_int_operand")] ;; model + UNSPEC_SYNC_OLD_OP))] + "" + "am%A2.\t$zero,%z1,%0" + [(set (attr "length") (const_int 8))]) + +(define_insn "atomic_fetch_" + [(set (match_operand:GPR 0 "register_operand" "=&r") + (match_operand:GPR 1 "memory_operand" "+ZB")) + (set (match_dup 1) + (unspec_volatile:GPR + [(any_atomic:GPR (match_dup 1) + (match_operand:GPR 2 "reg_or_0_operand" "rJ")) + (match_operand:SI 3 "const_int_operand")] ;; model + UNSPEC_SYNC_OLD_OP))] + "" + "am%A3.\t%0,%z2,%1" + [(set (attr "length") (const_int 8))]) + +(define_insn "atomic_exchange" + [(set (match_operand:GPR 0 "register_operand" "=&r") + (unspec_volatile:GPR + [(match_operand:GPR 1 "memory_operand" "+ZB") + (match_operand:SI 3 "const_int_operand")] ;; model + UNSPEC_SYNC_EXCHANGE)) + (set (match_dup 1) + (match_operand:GPR 2 "register_operand" "r"))] + "" + "amswap%A3.\t%0,%z2,%1" + [(set (attr "length") (const_int 8))]) + +(define_insn "atomic_cas_value_strong" + [(set (match_operand:GPR 0 "register_operand" "=&r") + (match_operand:GPR 1 "memory_operand" "+ZC")) + (set (match_dup 1) + (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") + (match_operand:GPR 3 "reg_or_0_operand" "rJ") + (match_operand:SI 4 "const_int_operand") ;; mod_s + (match_operand:SI 5 "const_int_operand")] ;; mod_f + UNSPEC_COMPARE_AND_SWAP)) + (clobber (match_scratch:GPR 6 "=&r"))] + "" +{ + if (FIX_LOONGSON3_LLSC) + return "%G5\n\t1:\n\tll.\t%0,%1\n\tbne\t%0,%z2,2f\n\tor%i3\t%6,$zero,%3\n\tsc.\t%6,%1\n\tbeq\t$zero,%6,1b\n\t2:\n\tdbar\t0"; + else + return "%G5\n\t1:\n\tll.\t%0,%1\n\tbne\t%0,%z2,2f\n\tor%i3\t%6,$zero,%3\n\tsc.\t%6,%1\n\tbeq\t$zero,%6,1b\n\t2:"; + +} + [(set (attr "length") (const_int 20))]) + +(define_expand "atomic_compare_and_swap" + [(match_operand:SI 0 "register_operand" "") ;; bool output + (match_operand:GPR 1 "register_operand" "") ;; val output + (match_operand:GPR 2 "memory_operand" "") ;; memory + (match_operand:GPR 3 "reg_or_0_operand" "") ;; expected value + (match_operand:GPR 4 "reg_or_0_operand" "") ;; desired value + (match_operand:SI 5 "const_int_operand" "") ;; is_weak + (match_operand:SI 6 "const_int_operand" "") ;; mod_s + (match_operand:SI 7 "const_int_operand" "")] ;; mod_f + "" +{ + emit_insn (gen_atomic_cas_value_strong (operands[1], operands[2], + operands[3], operands[4], + operands[6], operands[7])); + + rtx compare = operands[1]; + if (operands[3] != const0_rtx) + { + rtx difference = gen_rtx_MINUS (mode, operands[1], operands[3]); + compare = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (compare, difference)); + } + + if (word_mode != mode) + { + rtx reg = gen_reg_rtx (word_mode); + emit_insn (gen_rtx_SET (reg, gen_rtx_SIGN_EXTEND (word_mode, compare))); + compare = reg; + } + + emit_insn (gen_rtx_SET (operands[0], gen_rtx_EQ (SImode, compare, const0_rtx))); + DONE; +}) + +(define_expand "atomic_test_and_set" + [(match_operand:QI 0 "register_operand" "") ;; bool output + (match_operand:QI 1 "memory_operand" "+ZB") ;; memory + (match_operand:SI 2 "const_int_operand" "")] ;; model + "" +{ + /* We have no QImode atomics, so use the address LSBs to form a mask, + then use an aligned SImode atomic. */ + rtx result = operands[0]; + rtx mem = operands[1]; + rtx model = operands[2]; + rtx addr = force_reg (Pmode, XEXP (mem, 0)); + rtx tmp_reg = gen_reg_rtx (Pmode); + rtx zero_reg = gen_rtx_REG (Pmode, 0); + + rtx aligned_addr = gen_reg_rtx (Pmode); + emit_move_insn (tmp_reg, gen_rtx_PLUS (Pmode, zero_reg, GEN_INT (-4))); + emit_move_insn (aligned_addr, gen_rtx_AND (Pmode, addr, tmp_reg)); + + rtx aligned_mem = change_address (mem, SImode, aligned_addr); + set_mem_alias_set (aligned_mem, 0); + + rtx offset = gen_reg_rtx (SImode); + emit_move_insn (offset, gen_rtx_AND (SImode, gen_lowpart (SImode, addr), + GEN_INT (3))); + + rtx tmp = gen_reg_rtx (SImode); + emit_move_insn (tmp, GEN_INT (1)); + + rtx shmt = gen_reg_rtx (SImode); + emit_move_insn (shmt, gen_rtx_ASHIFT (SImode, offset, GEN_INT (3))); + + rtx word = gen_reg_rtx (SImode); + emit_move_insn (word, gen_rtx_ASHIFT (SImode, tmp, shmt)); + + tmp = gen_reg_rtx (SImode); + emit_insn (gen_atomic_fetch_orsi (tmp, aligned_mem, word, model)); + + emit_move_insn (gen_lowpart (SImode, result), + gen_rtx_LSHIFTRT (SImode, tmp, shmt)); + DONE; +}) + + + +(define_insn "atomic_cas_value_cmp_and_7_" + [(set (match_operand:GPR 0 "register_operand" "=&r") + (match_operand:GPR 1 "memory_operand" "+ZC")) + (set (match_dup 1) + (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") + (match_operand:GPR 3 "reg_or_0_operand" "rJ") + (match_operand:GPR 4 "reg_or_0_operand" "rJ") + (match_operand:GPR 5 "reg_or_0_operand" "rJ") + (match_operand:SI 6 "const_int_operand")] ;; model + UNSPEC_COMPARE_AND_SWAP)) + (clobber (match_scratch:GPR 7 "=&r"))] + "" +{ + if (FIX_LOONGSON3_LLSC) + return "%G6\n\t1:\n\tll.\t%0,%1\n\tand\t%7,%0,%2\n\tbne\t%7,%z4,2f\n\tand\t%7,%0,%z3\n\tor%i5\t%7,%7,%5\n\tsc.\t%7,%1\n\tbeq\t$zero,%7,1b\n\t2:\n\tdbar\t0"; + else + return "%G6\n\t1:\n\tll.\t%0,%1\n\tand\t%7,%0,%2\n\tbne\t%7,%z4,2f\n\tand\t%7,%0,%z3\n\tor%i5\t%7,%7,%5\n\tsc.\t%7,%1\n\tbeq\t$zero,%7,1b\n\t2:"; +} + [(set (attr "length") (const_int 20))]) + + +(define_expand "atomic_compare_and_swap" + [(match_operand:SI 0 "register_operand" "") ;; bool output + (match_operand:SHORT 1 "register_operand" "") ;; val output + (match_operand:SHORT 2 "memory_operand" "") ;; memory + (match_operand:SHORT 3 "reg_or_0_operand" "") ;; expected value + (match_operand:SHORT 4 "reg_or_0_operand" "") ;; desired value + (match_operand:SI 5 "const_int_operand" "") ;; is_weak + (match_operand:SI 6 "const_int_operand" "") ;; mod_s + (match_operand:SI 7 "const_int_operand" "")] ;; mod_f + "" +{ + union loongarch_gen_fn_ptrs generator; + generator.fn_7 = gen_atomic_cas_value_cmp_and_7_si; + loongarch_expand_atomic_qihi (generator, + operands[1], + operands[2], + operands[3], + operands[4], + operands[7]); + + rtx compare = operands[1]; + if (operands[3] != const0_rtx) + { + machine_mode mode = GET_MODE (operands[3]); + rtx op1 = convert_modes (SImode, mode, operands[1], true); + rtx op3 = convert_modes (SImode, mode, operands[3], true); + rtx difference = gen_rtx_MINUS (SImode, op1, op3); + compare = gen_reg_rtx (SImode); + emit_insn (gen_rtx_SET (compare, difference)); + } + + if (word_mode != mode) + { + rtx reg = gen_reg_rtx (word_mode); + emit_insn (gen_rtx_SET (reg, gen_rtx_SIGN_EXTEND (word_mode, compare))); + compare = reg; + } + + emit_insn (gen_rtx_SET (operands[0], gen_rtx_EQ (SImode, compare, const0_rtx))); + DONE; +}) + + + + +(define_insn "atomic_cas_value_add_7_" + [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res + (match_operand:GPR 1 "memory_operand" "+ZC")) + (set (match_dup 1) + (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") ;; mask + (match_operand:GPR 3 "reg_or_0_operand" "rJ") ;; inverted_mask + (match_operand:GPR 4 "reg_or_0_operand" "rJ") ;; old val + (match_operand:GPR 5 "reg_or_0_operand" "rJ") ;; new val + (match_operand:SI 6 "const_int_operand")] ;; model + UNSPEC_COMPARE_AND_SWAP_ADD)) + (clobber (match_scratch:GPR 7 "=&r")) + (clobber (match_scratch:GPR 8 "=&r"))] + "" + "%G6\n\t1:\n\tll.\t%0,%1\n\tand\t%7,%0,%3\n\tadd.w\t%8,%0,%z5\n\tand\t%8,%8,%z2\n\tor%i8\t%7,%7,%8\n\tsc.\t%7,%1\n\tbeq\t$zero,%7,1b" + [(set (attr "length") (const_int 20))]) + + + +(define_insn "atomic_cas_value_sub_7_" + [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res + (match_operand:GPR 1 "memory_operand" "+ZC")) + (set (match_dup 1) + (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") ;; mask + (match_operand:GPR 3 "reg_or_0_operand" "rJ") ;; inverted_mask + (match_operand:GPR 4 "reg_or_0_operand" "rJ") ;; old val + (match_operand:GPR 5 "reg_or_0_operand" "rJ") ;; new val + (match_operand:SI 6 "const_int_operand")] ;; model + UNSPEC_COMPARE_AND_SWAP_SUB)) + (clobber (match_scratch:GPR 7 "=&r")) + (clobber (match_scratch:GPR 8 "=&r"))] + "" + "%G6\n\t1:\n\tll.\t%0,%1\n\tand\t%7,%0,%3\n\tsub.w\t%8,%0,%z5\n\tand\t%8,%8,%z2\n\tor%i8\t%7,%7,%8\n\tsc.\t%7,%1\n\tbeq\t$zero,%7,1b" + [(set (attr "length") (const_int 20))]) + + + +(define_insn "atomic_cas_value_and_7_" + [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res + (match_operand:GPR 1 "memory_operand" "+ZC")) + (set (match_dup 1) + (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") ;; mask + (match_operand:GPR 3 "reg_or_0_operand" "rJ") ;; inverted_mask + (match_operand:GPR 4 "reg_or_0_operand" "rJ") ;; old val + (match_operand:GPR 5 "reg_or_0_operand" "rJ") ;; new val + (match_operand:SI 6 "const_int_operand")] ;; model + UNSPEC_COMPARE_AND_SWAP_AND)) + (clobber (match_scratch:GPR 7 "=&r")) + (clobber (match_scratch:GPR 8 "=&r"))] + "" + "%G6\n\t1:\n\tll.\t%0,%1\n\tand\t%7,%0,%3\n\tand\t%8,%0,%z5\n\tand\t%8,%8,%z2\n\tor%i8\t%7,%7,%8\n\tsc.\t%7,%1\n\tbeq\t$zero,%7,1b" + [(set (attr "length") (const_int 20))]) + +(define_insn "atomic_cas_value_xor_7_" + [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res + (match_operand:GPR 1 "memory_operand" "+ZC")) + (set (match_dup 1) + (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") ;; mask + (match_operand:GPR 3 "reg_or_0_operand" "rJ") ;; inverted_mask + (match_operand:GPR 4 "reg_or_0_operand" "rJ") ;; old val + (match_operand:GPR 5 "reg_or_0_operand" "rJ") ;; new val + (match_operand:SI 6 "const_int_operand")] ;; model + UNSPEC_COMPARE_AND_SWAP_XOR)) + (clobber (match_scratch:GPR 7 "=&r")) + (clobber (match_scratch:GPR 8 "=&r"))] + "" + "%G6\n\t1:\n\tll.\t%0,%1\n\tand\t%7,%0,%3\n\txor\t%8,%0,%z5\n\tand\t%8,%8,%z2\n\tor%i8\t%7,%7,%8\n\tsc.\t%7,%1\n\tbeq\t$zero,%7,1b" + [(set (attr "length") (const_int 20))]) + +(define_insn "atomic_cas_value_or_7_" + [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res + (match_operand:GPR 1 "memory_operand" "+ZC")) + (set (match_dup 1) + (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") ;; mask + (match_operand:GPR 3 "reg_or_0_operand" "rJ") ;; inverted_mask + (match_operand:GPR 4 "reg_or_0_operand" "rJ") ;; old val + (match_operand:GPR 5 "reg_or_0_operand" "rJ") ;; new val + (match_operand:SI 6 "const_int_operand")] ;; model + UNSPEC_COMPARE_AND_SWAP_OR)) + (clobber (match_scratch:GPR 7 "=&r")) + (clobber (match_scratch:GPR 8 "=&r"))] + "" + "%G6\n\t1:\n\tll.\t%0,%1\n\tand\t%7,%0,%3\n\tor\t%8,%0,%z5\n\tand\t%8,%8,%z2\n\tor%i8\t%7,%7,%8\n\tsc.\t%7,%1\n\tbeq\t$zero,%7,1b" + [(set (attr "length") (const_int 20))]) + +(define_insn "atomic_cas_value_nand_7_" + [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res + (match_operand:GPR 1 "memory_operand" "+ZC")) + (set (match_dup 1) + (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") ;; mask + (match_operand:GPR 3 "reg_or_0_operand" "rJ") ;; inverted_mask + (match_operand:GPR 4 "reg_or_0_operand" "rJ") ;; old val + (match_operand:GPR 5 "reg_or_0_operand" "rJ") ;; new val + (match_operand:SI 6 "const_int_operand")] ;; model + UNSPEC_COMPARE_AND_SWAP_NAND)) + (clobber (match_scratch:GPR 7 "=&r")) + (clobber (match_scratch:GPR 8 "=&r"))] + "" + "%G6\n\t1:\n\tll.\t%0,%1\n\tand\t%7,%0,%3\n\tand\t%8,%0,%z5\n\txor\t%8,%8,%z2\n\tor%i8\t%7,%7,%8\n\tsc.\t%7,%1\n\tbeq\t$zero,%7,1b" + [(set (attr "length") (const_int 20))]) + +;;(define_insn "atomic_cas_value_llsc_6_" +;; [(set (match_operand:GPR 0 "register_operand" "=&r") +;; (match_operand:GPR 1 "memory_operand" "+ZB")) +;; (set (match_dup 1) +;; (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") +;; (match_operand:GPR 3 "reg_or_0_operand" "rJ") +;; (match_operand:GPR 4 "reg_or_0_operand" "rJ") +;; (match_operand:SI 5 "const_int_operand")] ;; model +;; UNSPEC_COMPARE_AND_SWAP)) +;; (clobber (match_scratch:GPR 6 "=&r"))] +;; "" +;; "%G5\n\t1:\n\tll.\t%0,%1\n\tand\t%6,%0,%2\n\tbne\t%6,%z4,2f\n\tand\t%6,%0,%z3\n\tsc.\t%6,%1\n\tbeq\t$zero,%6,1b\n\t2:" +;; [(set (attr "length") (const_int 20))]) +;; + + +(define_expand "atomic_exchange" + [(set (match_operand:SHORT 0 "register_operand") + (unspec_volatile:SHORT + [(match_operand:SHORT 1 "memory_operand") + (match_operand:SI 3 "const_int_operand")] ;; model + UNSPEC_SYNC_EXCHANGE)) + (set (match_dup 1) + (match_operand:SHORT 2 "register_operand"))] + "" +{ + union loongarch_gen_fn_ptrs generator; + generator.fn_7 = gen_atomic_cas_value_cmp_and_7_si; + loongarch_expand_atomic_qihi (generator, + operands[0], + operands[1], + operands[1], + operands[2], + operands[3]); + DONE; +}) + + +(define_expand "atomic_fetch_add" + [(set (match_operand:SHORT 0 "register_operand" "=&r") + (match_operand:SHORT 1 "memory_operand" "+ZB")) + (set (match_dup 1) + (unspec_volatile:SHORT + [(plus:SHORT (match_dup 1) + (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) + (match_operand:SI 3 "const_int_operand")] ;; model + UNSPEC_SYNC_OLD_OP))] + "" +{ + union loongarch_gen_fn_ptrs generator; + generator.fn_7 = gen_atomic_cas_value_add_7_si; + loongarch_expand_atomic_qihi (generator, + operands[0], + operands[1], + operands[1], + operands[2], + operands[3]); + DONE; +}) + +(define_expand "atomic_fetch_sub" + [(set (match_operand:SHORT 0 "register_operand" "=&r") + (match_operand:SHORT 1 "memory_operand" "+ZB")) + (set (match_dup 1) + (unspec_volatile:SHORT + [(minus:SHORT (match_dup 1) + (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) + (match_operand:SI 3 "const_int_operand")] ;; model + UNSPEC_SYNC_OLD_OP))] + "" +{ + union loongarch_gen_fn_ptrs generator; + generator.fn_7 = gen_atomic_cas_value_sub_7_si; + loongarch_expand_atomic_qihi (generator, + operands[0], + operands[1], + operands[1], + operands[2], + operands[3]); + DONE; +}) + +(define_expand "atomic_fetch_and" + [(set (match_operand:SHORT 0 "register_operand" "=&r") + (match_operand:SHORT 1 "memory_operand" "+ZB")) + (set (match_dup 1) + (unspec_volatile:SHORT + [(and:SHORT (match_dup 1) + (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) + (match_operand:SI 3 "const_int_operand")] ;; model + UNSPEC_SYNC_OLD_OP))] + "" +{ + union loongarch_gen_fn_ptrs generator; + generator.fn_7 = gen_atomic_cas_value_and_7_si; + loongarch_expand_atomic_qihi (generator, + operands[0], + operands[1], + operands[1], + operands[2], + operands[3]); + DONE; +}) + +(define_expand "atomic_fetch_xor" + [(set (match_operand:SHORT 0 "register_operand" "=&r") + (match_operand:SHORT 1 "memory_operand" "+ZB")) + (set (match_dup 1) + (unspec_volatile:SHORT + [(xor:SHORT (match_dup 1) + (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) + (match_operand:SI 3 "const_int_operand")] ;; model + UNSPEC_SYNC_OLD_OP))] + "" +{ + union loongarch_gen_fn_ptrs generator; + generator.fn_7 = gen_atomic_cas_value_xor_7_si; + loongarch_expand_atomic_qihi (generator, + operands[0], + operands[1], + operands[1], + operands[2], + operands[3]); + DONE; +}) + +(define_expand "atomic_fetch_or" + [(set (match_operand:SHORT 0 "register_operand" "=&r") + (match_operand:SHORT 1 "memory_operand" "+ZB")) + (set (match_dup 1) + (unspec_volatile:SHORT + [(ior:SHORT (match_dup 1) + (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) + (match_operand:SI 3 "const_int_operand")] ;; model + UNSPEC_SYNC_OLD_OP))] + "" +{ + union loongarch_gen_fn_ptrs generator; + generator.fn_7 = gen_atomic_cas_value_or_7_si; + loongarch_expand_atomic_qihi (generator, + operands[0], + operands[1], + operands[1], + operands[2], + operands[3]); + DONE; +}) + +(define_expand "atomic_fetch_nand" + [(set (match_operand:SHORT 0 "register_operand" "=&r") + (match_operand:SHORT 1 "memory_operand" "+ZB")) + (set (match_dup 1) + (unspec_volatile:SHORT + [(not:SHORT (and:SHORT (match_dup 1) + (match_operand:SHORT 2 "reg_or_0_operand" "rJ"))) + (match_operand:SI 3 "const_int_operand")] ;; model + UNSPEC_SYNC_OLD_OP))] + "" +{ + union loongarch_gen_fn_ptrs generator; + generator.fn_7 = gen_atomic_cas_value_nand_7_si; + loongarch_expand_atomic_qihi (generator, + operands[0], + operands[1], + operands[1], + operands[2], + operands[3]); + DONE; +}) + diff --git a/gcc/config/loongarch/t-linux b/gcc/config/loongarch/t-linux new file mode 100644 index 0000000000000000000000000000000000000000..58f27f89dadfee85e29d065bf6b3794059f39bf2 --- /dev/null +++ b/gcc/config/loongarch/t-linux @@ -0,0 +1,23 @@ +# Copyright (C) 2003-2018 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +MULTILIB_OSDIRNAMES := ../lib64$(call if_multiarch,:loongarch64-linux-gnu) +MULTIARCH_DIRNAME := $(call if_multiarch,loongarch64-linux-gnu) + +# haven't supported lp32 yet +MULTILIB_EXCEPTIONS = mabi=lp32 diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch new file mode 100644 index 0000000000000000000000000000000000000000..298b657f00a1b44b13a4de7e3ab17bd879852e85 --- /dev/null +++ b/gcc/config/loongarch/t-loongarch @@ -0,0 +1,46 @@ +# Copyright (C) 2002-2018 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +$(srcdir)/config/loongarch/loongarch-tables.opt: $(srcdir)/config/loongarch/genopt.sh \ + $(srcdir)/config/loongarch/loongarch-cpus.def + $(SHELL) $(srcdir)/config/loongarch/genopt.sh $(srcdir)/config/loongarch > \ + $(srcdir)/config/loongarch/loongarch-tables.opt + +frame-header-opt.o: $(srcdir)/config/loongarch/frame-header-opt.c + $(COMPILE) $< + $(POSTCOMPILE) + +loongarch-d.o: $(srcdir)/config/loongarch/loongarch-d.c + $(COMPILE) $< + $(POSTCOMPILE) + +loongarch-c.o: $(srcdir)/config/loongarch/loongarch-c.c $(CONFIG_H) $(SYSTEM_H) \ + coretypes.h $(TM_H) $(TREE_H) output.h $(C_COMMON_H) $(TARGET_H) + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + $(srcdir)/config/loongarch/loongarch-c.c + +loongarch-builtins.o: $(srcdir)/config/loongarch/loongarch-builtins.c $(CONFIG_H) \ + $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(TREE_H) $(RECOG_H) langhooks.h \ + $(DIAGNOSTIC_CORE_H) $(OPTABS_H) $(srcdir)/config/loongarch/loongarch-ftypes.def \ + $(srcdir)/config/loongarch/loongarch-modes.def + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + $(srcdir)/config/loongarch/loongarch-builtins.c + +comma=, +MULTILIB_OPTIONS = $(subst $(comma),/, $(patsubst %, mabi=%, $(subst $(comma),$(comma)mabi=,$(TM_MULTILIB_CONFIG)))) +MULTILIB_DIRNAMES = $(subst $(comma), ,$(TM_MULTILIB_CONFIG)) diff --git a/gcc/config/loongarch/x-native b/gcc/config/loongarch/x-native new file mode 100644 index 0000000000000000000000000000000000000000..827d21f1ab2dd04962fb9186249ffec537a7cf78 --- /dev/null +++ b/gcc/config/loongarch/x-native @@ -0,0 +1,3 @@ +driver-native.o : $(srcdir)/config/loongarch/driver-native.c \ + $(CONFIG_H) $(SYSTEM_H) + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< diff --git a/gcc/configure b/gcc/configure index d4f97834fdc7f8dfbfc2d0a8f10e6beca767e926..3ff4290754355209933673227da29b45342fe387 100755 --- a/gcc/configure +++ b/gcc/configure @@ -7658,6 +7658,9 @@ else mips*-*-*) enable_fixed_point=yes ;; + loongarch*-*-*) + enable_fixed_point=yes + ;; *) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: fixed-point is not supported for this target, ignored" >&5 $as_echo "$as_me: WARNING: fixed-point is not supported for this target, ignored" >&2;} @@ -24864,6 +24867,23 @@ x: mips*-*-*) conftest_s=' .section .tdata,"awT",@progbits +x: + .word 2 + .text + addiu $4, $28, %tlsgd(x) + addiu $4, $28, %tlsldm(x) + lui $4, %dtprel_hi(x) + addiu $4, $4, %dtprel_lo(x) + lw $4, %gottprel(x)($28) + lui $4, %tprel_hi(x) + addiu $4, $4, %tprel_lo(x)' + tls_first_major=2 + tls_first_minor=16 + tls_as_opt='-32 --fatal-warnings' + ;; + loongarch*-*-*) + conftest_s=' + .section .tdata,"awT",@progbits x: .word 2 .text @@ -28105,6 +28125,12 @@ $as_echo "#define HAVE_AS_RISCV_ATTRIBUTE 1" >>confdefs.h fi + ;; + loongarch*-*-*) + { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for .dtprelword support" >&5 +$as_echo_n "checking assembler for .dtprelword support... " >&6; } + +$as_echo "#define HAVE_AS_DTPRELWORD 1" >>confdefs.h ;; s390*-*-*) { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for .gnu_attribute support" >&5 @@ -28289,7 +28315,7 @@ esac # Linux on IA64 might be able to use the Intel assembler. case "$target" in - mips*-*-* | *-*-hpux* ) + mips*-*-* | loongarch*-*-* | *-*-hpux* ) if test x$gas_flag = xyes \ || test x"$host" != x"$build" \ || test ! -x "$gcc_cv_as" \ @@ -28310,7 +28336,7 @@ esac # version to the per-target configury. case "$cpu_type" in aarch64 | alpha | arc | arm | avr | bfin | cris | csky | i386 | m32c | m68k \ - | microblaze | mips | nds32 | nios2 | pa | riscv | rs6000 | score | sparc \ + | microblaze | mips | loongarch | nds32 | nios2 | pa | riscv | rs6000 | score | sparc \ | tilegx | tilepro | visium | xstormy16 | xtensa) insn="nop" ;; diff --git a/gcc/configure.ac b/gcc/configure.ac index 44154f69f0a2279fb17846fb23c729653622bdcf..7967d189b172afe80978a85cf34b6f2de9c00e7c 100644 --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -868,6 +868,9 @@ AC_ARG_ENABLE(fixed-point, mips*-*-*) enable_fixed_point=yes ;; + loongarch*-*-*) + enable_fixed_point=yes + ;; *) AC_MSG_WARN([fixed-point is not supported for this target, ignored]) enable_fixed_point=no @@ -3625,6 +3628,23 @@ x: mips*-*-*) conftest_s=' .section .tdata,"awT",@progbits +x: + .word 2 + .text + addiu $4, $28, %tlsgd(x) + addiu $4, $28, %tlsldm(x) + lui $4, %dtprel_hi(x) + addiu $4, $4, %dtprel_lo(x) + lw $4, %gottprel(x)($28) + lui $4, %tprel_hi(x) + addiu $4, $4, %tprel_lo(x)' + tls_first_major=2 + tls_first_minor=16 + tls_as_opt='-32 --fatal-warnings' + ;; + loongarch*-*-*) + conftest_s=' + .section .tdata,"awT",@progbits x: .word 2 .text @@ -5088,6 +5108,160 @@ configured with --enable-newlib-nano-formatted-io.]) [AC_DEFINE(HAVE_AS_RISCV_ATTRIBUTE, 1, [Define if your assembler supports .attribute.])]) ;; + loongarch*-*-*) + gcc_GAS_CHECK_FEATURE([explicit relocation support], + gcc_cv_as_loongarch_explicit_relocs, [2,14,0],, +[ lw $4,%gp_rel(foo)($4)],, + [if test x$target_cpu_default = x + then target_cpu_default=MASK_EXPLICIT_RELOCS + else target_cpu_default="($target_cpu_default)|MASK_EXPLICIT_RELOCS" + fi]) + + gcc_GAS_CHECK_FEATURE([-mno-shared support], + gcc_cv_as_loongarch_no_shared, [2,16,0], [-mno-shared], [nop],, + [AC_DEFINE(HAVE_AS_NO_SHARED, 1, + [Define if the assembler understands -mno-shared.])]) + + gcc_GAS_CHECK_FEATURE([.gnu_attribute support], + gcc_cv_as_loongarch_gnu_attribute, [2,18,0],, + [.gnu_attribute 4,1],, + [AC_DEFINE(HAVE_AS_GNU_ATTRIBUTE, 1, + [Define if your assembler supports .gnu_attribute.])]) + + gcc_GAS_CHECK_FEATURE([.module support], + gcc_cv_as_loongarch_dot_module,,[-32], + [.module mips2 + .module fp=xx],, + [AC_DEFINE(HAVE_AS_DOT_MODULE, 1, + [Define if your assembler supports .module.])]) + if test x$gcc_cv_as_loongarch_dot_module = xno \ + && test x$with_fp_32 != x; then + AC_MSG_ERROR( + [Requesting --with-fp-32= requires assembler support for .module.]) + fi + + gcc_GAS_CHECK_FEATURE([.micromips support], + gcc_cv_as_micromips_support,,[--fatal-warnings], + [.set micromips],, + [AC_DEFINE(HAVE_GAS_MICROMIPS, 1, + [Define if your assembler supports the .set micromips directive])]) + + gcc_GAS_CHECK_FEATURE([.dtprelword support], + gcc_cv_as_loongarch_dtprelword, [2,18,0],, + [.section .tdata,"awT",@progbits +x: + .word 2 + .text + .dtprelword x+0x8000],, + [AC_DEFINE(HAVE_AS_DTPRELWORD, 1, + [Define if your assembler supports .dtprelword.])]) + + gcc_GAS_CHECK_FEATURE([DSPR1 mult with four accumulators support], + gcc_cv_as_loongarch_dspr1_mult,,, +[ .set mips32r2 + .set nodspr2 + .set dsp + madd $ac3,$4,$5 + maddu $ac3,$4,$5 + msub $ac3,$4,$5 + msubu $ac3,$4,$5 + mult $ac3,$4,$5 + multu $ac3,$4,$5],, + [AC_DEFINE(HAVE_AS_DSPR1_MULT, 1, + [Define if your assembler supports DSPR1 mult.])]) + + AC_MSG_CHECKING(assembler and linker for explicit JALR relocation) + gcc_cv_as_ld_jalr_reloc=no + if test $gcc_cv_as_loongarch_explicit_relocs = yes; then + if test $in_tree_ld = yes ; then + if test "$gcc_cv_gld_major_version" -eq 2 -a "$gcc_cv_gld_minor_version" -ge 20 -o "$gcc_cv_gld_major_version" -gt 2 \ + && test $in_tree_ld_is_elf = yes; then + gcc_cv_as_ld_jalr_reloc=yes + fi + elif test x$gcc_cv_as != x -a x$gcc_cv_ld != x -a x$gcc_cv_objdump != x; then + echo ' .ent x' > conftest.s + echo 'x: lw $2,%got_disp(y)($3)' >> conftest.s + echo ' lw $25,%call16(y)($28)' >> conftest.s + echo ' .reloc 1f,R_LOONGISA_JALR,y' >> conftest.s + echo '1: jalr $25' >> conftest.s + echo ' .reloc 1f,R_LOONGISA_JALR,x' >> conftest.s + echo '1: jalr $25' >> conftest.s + echo ' .end x' >> conftest.s + if $gcc_cv_as -o conftest.o conftest.s >/dev/null 2>&AS_MESSAGE_LOG_FD \ + && $gcc_cv_ld -shared -o conftest.so conftest.o >/dev/null 2>&AS_MESSAGE_LOG_FD; then + if $gcc_cv_objdump -d conftest.so | grep jalr >/dev/null 2>&1 \ + && $gcc_cv_objdump -d conftest.so | grep "bal.*" >/dev/null 2>&1; then + gcc_cv_as_ld_jalr_reloc=yes + fi + fi + rm -f conftest.* + fi + fi + if test $gcc_cv_as_ld_jalr_reloc = yes; then + if test x$target_cpu_default = x; then + target_cpu_default=MASK_RELAX_PIC_CALLS + else + target_cpu_default="($target_cpu_default)|MASK_RELAX_PIC_CALLS" + fi + fi + AC_MSG_RESULT($gcc_cv_as_ld_jalr_reloc) + + AC_CACHE_CHECK([linker for .eh_frame personality relaxation], + [gcc_cv_ld_loongarch_personality_relaxation], + [gcc_cv_ld_loongarch_personality_relaxation=no + if test $in_tree_ld = yes ; then + if test "$gcc_cv_gld_major_version" -eq 2 \ + -a "$gcc_cv_gld_minor_version" -ge 21 \ + -o "$gcc_cv_gld_major_version" -gt 2; then + gcc_cv_ld_loongarch_personality_relaxation=yes + fi + elif test x$gcc_cv_as != x \ + -a x$gcc_cv_ld != x \ + -a x$gcc_cv_readelf != x ; then + cat > conftest.s < /dev/null 2>&1 \ + && $gcc_cv_ld -o conftest conftest.o -shared > /dev/null 2>&1; then + if $gcc_cv_readelf -d conftest 2>&1 \ + | grep TEXTREL > /dev/null 2>&1; then + : + elif $gcc_cv_readelf --relocs conftest 2>&1 \ + | grep 'R_LOONGISA_REL32 *$' > /dev/null 2>&1; then + : + else + gcc_cv_ld_loongarch_personality_relaxation=yes + fi + fi + fi + rm -f conftest.s conftest.o conftest]) + if test x$gcc_cv_ld_loongarch_personality_relaxation = xyes; then + AC_DEFINE(HAVE_LD_PERSONALITY_RELAXATION, 1, + [Define if your linker can relax absolute .eh_frame personality +pointers into PC-relative form.]) + fi + + gcc_GAS_CHECK_FEATURE([-mnan= support], + gcc_cv_as_loongarch_nan,, + [-mnan=2008],,, + [AC_DEFINE(HAVE_AS_NAN, 1, + [Define if the assembler understands -mnan=.])]) + if test x$gcc_cv_as_loongarch_nan = xno \ + && test x$with_nan != x; then + AC_MSG_ERROR( + [Requesting --with-nan= requires assembler support for -mnan=]) + fi + ;; s390*-*-*) gcc_GAS_CHECK_FEATURE([.gnu_attribute support], gcc_cv_as_s390_gnu_attribute, [2,18,0],, @@ -5125,7 +5299,7 @@ esac # Linux on IA64 might be able to use the Intel assembler. case "$target" in - mips*-*-* | *-*-hpux* ) + mips*-*-* | loongarch*-*-* | *-*-hpux* ) if test x$gas_flag = xyes \ || test x"$host" != x"$build" \ || test ! -x "$gcc_cv_as" \ diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi index 7d98ec4190aec574fcd77a924cc8f29af11de065..8d076bcdb8b878c45d26187cf0fd17aff2a16068 100644 --- a/gcc/doc/install.texi +++ b/gcc/doc/install.texi @@ -1113,8 +1113,9 @@ sysv, aix. @itemx --without-multilib-list Specify what multilibs to build. @var{list} is a comma separated list of values, possibly consisting of a single value. Currently only implemented -for aarch64*-*-*, arm*-*-*, riscv*-*-*, sh*-*-* and x86-64-*-linux*. The -accepted values and meaning for each target is given below. +for aarch64*-*-*, arm*-*-*, loongarch64-*-*, riscv*-*-*, sh*-*-* and +x86-64-*-linux*. The accepted values and meaning for each target is given +below. @table @code @item aarch64*-*-* @@ -1201,6 +1202,14 @@ profile. The union of these options is considered when specifying both @code{-mfloat-abi=hard} @end multitable +@item loongarch*-*-* +@var{list} is a comma-separated list of the following ABI identifiers: +@code{lp64d[/base]} @code{lp64f[/base]} @code{lp64d[/base]}, where the +@code{/base} suffix may be omitted, to enable their respective run-time +libraries. If @var{list} is empty, @code{default} +or @option{--with-multilib-list} is not specified, then the default ABI +as specified by @option{--with-abi} or implied by @option{--target} is selected. + @item riscv*-*-* @var{list} is a single ABI name. The target architecture must be either @code{rv32gc} or @code{rv64gc}. This will build a single multilib for the @@ -4300,6 +4309,34 @@ This configuration is intended for embedded systems. Lattice Mico32 processor. This configuration is intended for embedded systems running uClinux. +@html +


+@end html +@anchor{loongarch} +@heading LoongArch +LoongArch processor. +The following LoongArch targets are available: +@table @code +@item loongarch64-linux-gnu* +LoongArch processor running GNU/Linux. This target triplet may be coupled +with a small set of possible suffixes to identify their default ABI type: +@table @code +@item f64 +Uses @code{lp64d/base} ABI by default. +@item f32 +Uses @code{lp64f/base} ABI by default. +@item sf +Uses @code{lp64s/base} ABI by default. +@end table + +@item loongarch64-linux-gnu +Same as @code{loongarch64-linux-gnuf64}, but may be used with +@option{--with-abi=*} to configure the default ABI type. +@end table + +More information about LoongArch can be found at +@uref{https://github.com/loongson/LoongArch-Documentation}. + @html
@end html diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 4b0fd2ffb3811400b28786751724c7bc0b343252..ea6a1d96c70b7c88366cd6a1f75487cd296141fd 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -941,6 +941,16 @@ Objective-C and Objective-C++ Dialects}. @gccoptlist{-mbarrel-shift-enabled -mdivide-enabled -mmultiply-enabled @gol -msign-extend-enabled -muser-enabled} +@emph{LoongArch Options} +@gccoptlist{-march=@var{cpu-type} -mtune=@var{cpu-type} -mabi=@var{base-abi-type} @gol +-mfpu=@var{fpu-type} -msoft-float -msingle-float -mdouble-float @gol +-mbranch-cost=@var{n} -mcheck-zero-division -mno-check-zero-division @gol +-mcond-move-int -mno-cond-move-int @gol +-mcond-move-float -mno-cond-move-float @gol +-memcpy -mno-memcpy -mstrict-align @gol +-mmax-inline-memcpy-size=@var{n} @gol +-mlra -mcmodel=@var{code-model}} + @emph{M32R/D Options} @gccoptlist{-m32r2 -m32rx -m32r @gol -mdebug @gol @@ -16751,6 +16761,7 @@ platform. * HPPA Options:: * IA-64 Options:: * LM32 Options:: +* LoongArch Options:: * M32C Options:: * M32R/D Options:: * M680x0 Options:: @@ -22138,6 +22149,195 @@ Enable user-defined instructions. @end table +@node LoongArch Options +@subsection LoongArch Options +@cindex LoongArch Options + +These command-line options are defined for LoongArch targets: + +@table @gcctabopt +@item -march=@var{cpu-type} +@opindex -march +Generate instructions for the machine type @var{cpu-type}. In contrast to +@option{-mtune=@var{cpu-type}}, which merely tunes the generated code +for the specified @var{cpu-type}, @option{-march=@var{cpu-type}} allows GCC +to generate code that may not run at all on processors other than the one +indicated. Specifying @option{-march=@var{cpu-type}} implies +@option{-mtune=@var{cpu-type}}, except where noted otherwise. + +The choices for @var{cpu-type} are: + +@table @samp +@item native +This selects the CPU to generate code for at compilation time by determining +the processor type of the compiling machine. Using @option{-march=native} +enables all instruction subsets supported by the local machine (hence +the result might not run on different machines). Using @option{-mtune=native} +produces code optimized for the local machine under the constraints +of the selected instruction set. +@item loongarch64 +A generic CPU with 64-bit extensions. +@item la464 +LoongArch LA464 CPU with LBT, LSX, LASX, LVZ. +@end table + + +@item -mtune=@var{cpu-type} +@opindex mtune +Optimize the output for the given processor, specified by microarchitecture +name. + +@item -mabi=@var{base-abi-type} +@opindex mabi +Generate code for the specified calling convention. @gol +Set base ABI to one of: @gol +@table @samp +@item lp64d +Uses 64-bit general purpose registers and 32/64-bit floating-point +registers for parameter passing. Data model is LP64, where int +is 32 bits, while long int and pointers are 64 bits. +@item lp64f +Uses 64-bit general purpose registers and 32-bit floating-point +registers for parameter passing. Data model is LP64, where int +is 32 bits, while long int and pointers are 64 bits. +@item lp64s +Uses 64-bit general purpose registers and no floating-point +registers for parameter passing. Data model is LP64, where int +is 32 bits, while long int and pointers are 64 bits. +@end table + + +@item -mfpu=@var{fpu-type} +@opindex mfpu +Generating code for the specified FPU type: @gol +@table @samp +@item 64 +Allow the use of hardware floating-point instructions for 32-bit +and 64-bit operations. +@item 32 +Allow the use of hardware floating-point instructions for 32-bit +operations. +@item none +@item 0 +Prevent the use of hardware floating-point instructions. +@end table + + +@item -msoft-float +@opindex msoft-float +Force @option{-mfpu=none} and prevents the use of floating-point +registers for parameter passing. This option may change the target +ABI. + +@item -msingle-float +@opindex -msingle-float +Force @option{-mfpu=32} and allow the use of 32-bit floating-point +registers for parameter passing. This option may change the target +ABI. + +@item -mdouble-float +@opindex -mdouble-float +Force @option{-mfpu=64} and allow the use of 32/64-bit floating-point +registers for parameter passing. This option may change the target +ABI. + + +@item -mbranch-cost=@var{n} +@opindex -mbranch-cost +Set the cost of branches to roughly n instructions. + +@item -mcheck-zero-division +@itemx -mno-check-zero-divison +@opindex -mcheck-zero-division +Trap (do not trap) on integer division by zero. The default is '-mcheck-zero- +division'. + + +@item -mcond-move-int +@itemx -mno-cond-move-int +@opindex -mcond-move-int +Conditional moves for floating point are enabled (disabled). The default is +'-mcond-move-float'. + +@item -mmemcpy +@itemx -mno-memcpy +@opindex -mmemcpy +Force (do not force) the use of memcpy for non-trivial block moves. The default +is '-mno-memcpy', which allows GCC to inline most constant-sized copies. + + +@item -mlra +@opindex -mlra +Use the new LRA register allocator. By default, the LRA is used. + +@item -mstrict-align +@itemx -mno-strict-align +@opindex -mstrict-align +Avoid or allow generating memory accesses that may not be aligned on a natural +object boundary as described in the architecture specification. + +@item -msmall-data-limit=@var{number} +@opindex -msmall-data-limit +Put global and static data smaller than @code{number} bytes into a special section (on some targets). +Default value is 0. + +@item -mmax-inline-memcpy-size=@var{n} +@opindex -mmax-inline-memcpy-size +Set the max size n of memcpy to inline, default @code{n} is 1024. + +@item -mcmodel=@var{code-model} +Default code model is normal. +Set the code model to one of: +@table @samp +@item tiny-static +@itemize @bullet +@item +local symbol and global strong symbol: The data section must be within +/-2MiB addressing space. +The text section must be within +/-128MiB addressing space. +@item +global weak symbol: The got table must be within +/-2GiB addressing space. +@end itemize + +@item tiny +@itemize @bullet +@item +local symbol: The data section must be within +/-2MiB addressing space. +The text section must be within +/-128MiB +addressing space. +@item +global symbol: The got table must be within +/-2GiB addressing space. +@end itemize + +@item normal +@itemize @bullet +@item +local symbol: The data section must be within +/-2GiB addressing space. +The text section must be within +/-128MiB addressing space. +@item +global symbol: The got table must be within +/-2GiB addressing space. +@end itemize + +@item large +@itemize @bullet +@item +local symbol: The data section must be within +/-2GiB addressing space. +The text section must be within +/-128GiB addressing space. +@item +global symbol: The got table must be within +/-2GiB addressing space. +@end itemize + +@item extreme(Not implemented yet) +@itemize @bullet +@item +local symbol: The data and text section must be within +/-8EiB addressing space. +@item +global symbol: The data got table must be within +/-8EiB addressing space. +@end itemize +@end table +@end table + + + @node M32C Options @subsection M32C Options @cindex M32C options diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 2c67c818da5ae48769e579307fb812efc2842e22..85803837dd781bbcf22e52a67c0c60a637b6bab1 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -2739,6 +2739,61 @@ Memory addressed using the small base register ($sb). $r1h @end table +@item LoongArch---@file{config/loongarch/constraints.md} +@table @code +@item a +A constant call global and noplt address. +@item c +A constant call local address. +@item e +A register that is used as function call. +@item f +A floating-point register (if available). +@item h +A constant call plt address. +@item j +A rester that is used as sibing call. +@item l +A signed 16-bit constant. +@item m +A memory operand whose address is formed by a base register and offset +that is suitable for use in instructions with the same addressing mode +as @code{st.w} and @code{ld.w}. +@item q +A general-purpose register except for $r0 and $r1 for csr instructions. +@item t +A constant call weak address. +@item u +A signed 52bit constant and low 32-bit is zero (for logic instructions). +@item v +A nsigned 64-bit constant and low 44-bit is zero (for logic instructions). +@item z +A floating-point condition code register. +@item G +Floating-point zero. +@item I +A signed 12-bit constant (for arithmetic instructions). +@item J +Integer zero. +@item K +An unsigned 12-bit constant (for logic instructions). +@item Q +A 12-bit constant used for logical operations. +@item W +A memory address based on a member of @code{BASE_REG_CLASS}. This is +true for allreferences. +@item Yd +A constant @code{move_operand} that can be safely loaded using +@code{la}. +@item ZB +An address that is held in a general-purpose register. +The offset is zero. +@item ZC +A memory operand whose address is formed by a base register and offset +that is suitable for use in instructions with the same addressing mode +as @code{ll.w} and @code{sc.w}. +@end table + @item MicroBlaze---@file{config/microblaze/constraints.md} @table @code @item d diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-rom.C b/gcc/testsuite/g++.dg/cpp0x/constexpr-rom.C index 2e0ef685f36fa0482b800a0078200d015fe35d1c..424979a604b88c6edb48803761f957dc1b5a4ca5 100644 --- a/gcc/testsuite/g++.dg/cpp0x/constexpr-rom.C +++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-rom.C @@ -1,6 +1,6 @@ // PR c++/49673: check that test_data goes into .rodata // { dg-do compile { target c++11 } } -// { dg-additional-options -G0 { target { { alpha*-*-* frv*-*-* ia64-*-* lm32*-*-* m32r*-*-* microblaze*-*-* mips*-*-* nios2-*-* powerpc*-*-* rs6000*-*-* } && { ! { *-*-darwin* *-*-aix* alpha*-*-*vms* } } } } } +// { dg-additional-options -G0 { target { { alpha*-*-* frv*-*-* ia64-*-* lm32*-*-* m32r*-*-* microblaze*-*-* mips*-*-* loongarch*-*-* nios2-*-* powerpc*-*-* rs6000*-*-* } && { ! { *-*-darwin* *-*-aix* alpha*-*-*vms* } } } } } // { dg-final { scan-assembler "\\.rdata" { target mips*-*-* } } } // { dg-final { scan-assembler "rodata" { target { { *-*-linux-gnu *-*-gnu* *-*-elf } && { ! { mips*-*-* riscv*-*-* } } } } } } diff --git a/gcc/testsuite/g++.old-deja/g++.abi/ptrmem.C b/gcc/testsuite/g++.old-deja/g++.abi/ptrmem.C index bda7960d8a210591b97283917d57df9bcd68b5d4..f69000e90811c097e19dadd86a21f8b4cb7a2bd6 100644 --- a/gcc/testsuite/g++.old-deja/g++.abi/ptrmem.C +++ b/gcc/testsuite/g++.old-deja/g++.abi/ptrmem.C @@ -7,7 +7,7 @@ function. However, some platforms use all bits to encode a function pointer. Such platforms use the lowest bit of the delta, that is shifted left by one bit. */ -#if defined __MN10300__ || defined __SH5__ || defined __arm__ || defined __thumb__ || defined __mips__ || defined __aarch64__ || defined __PRU__ +#if defined __MN10300__ || defined __SH5__ || defined __arm__ || defined __thumb__ || defined __mips__ || defined __aarch64__ || defined __PRU__ || defined __loongarch__ #define ADJUST_PTRFN(func, virt) ((void (*)())(func)) #define ADJUST_DELTA(delta, virt) (((delta) << 1) + !!(virt)) #else diff --git a/gcc/testsuite/g++.old-deja/g++.pt/ptrmem6.C b/gcc/testsuite/g++.old-deja/g++.pt/ptrmem6.C index 9f4bbe43f89fda192ff7a97095ab2053e89e8afb..8f8f7017ab761c3fd316b7b9b86afdb5a56fbef7 100644 --- a/gcc/testsuite/g++.old-deja/g++.pt/ptrmem6.C +++ b/gcc/testsuite/g++.old-deja/g++.pt/ptrmem6.C @@ -25,7 +25,7 @@ int main() { h<&B::j>(); // { dg-error "" } g<(void (A::*)()) &A::f>(); // { dg-error "" "" { xfail c++11 } } h<(int A::*) &A::i>(); // { dg-error "" "" { xfail c++11 } } - g<(void (A::*)()) &B::f>(); // { dg-error "" "" { xfail { c++11 && { aarch64*-*-* arm*-*-* mips*-*-* } } } } + g<(void (A::*)()) &B::f>(); // { dg-error "" "" { xfail { c++11 && { aarch64*-*-* arm*-*-* mips*-*-* loongarch*-*-* } } } } h<(int A::*) &B::j>(); // { dg-error "" } g<(void (A::*)()) 0>(); // { dg-error "" "" { target { ! c++11 } } } h<(int A::*) 0>(); // { dg-error "" "" { target { ! c++11 } } } diff --git a/gcc/testsuite/gcc.dg/20020312-2.c b/gcc/testsuite/gcc.dg/20020312-2.c index 52c33d09b90a94e52c498fa78a96cbd37952366e..92bc150df0fc8f7871e05f6e9930eec6404fc895 100644 --- a/gcc/testsuite/gcc.dg/20020312-2.c +++ b/gcc/testsuite/gcc.dg/20020312-2.c @@ -37,6 +37,8 @@ extern void abort (void); /* PIC register is r1, but is used even without -fpic. */ #elif defined(__lm32__) /* No pic register. */ +#elif defined(__loongarch__) +/* No pic register. */ #elif defined(__M32R__) /* No pic register. */ #elif defined(__m68k__) diff --git a/gcc/testsuite/gcc.dg/loop-8.c b/gcc/testsuite/gcc.dg/loop-8.c index 1eefccc1a3b1e67b014d54b8d75e973b339a39d2..0f2458d9d74672f04eb8d53c52a2b9f33c0ce04d 100644 --- a/gcc/testsuite/gcc.dg/loop-8.c +++ b/gcc/testsuite/gcc.dg/loop-8.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O1 -fdump-rtl-loop2_invariant" } */ -/* { dg-skip-if "unexpected IV" { "hppa*-*-* mips*-*-* visium-*-* powerpc*-*-* riscv*-*-*" } } */ +/* { dg-skip-if "unexpected IV" { "hppa*-*-* mips*-*-* visium-*-* powerpc*-*-* riscv*-*-* loongarch*-*-*"} } */ /* Load immediate on condition is available from z13 on and prevents moving the load out of the loop, so always run this test with -march=zEC12 that does not have load immediate on condition. */ diff --git a/gcc/testsuite/gcc.dg/stack-usage-1.c b/gcc/testsuite/gcc.dg/stack-usage-1.c index be1254a7348d8b9fedec182c186653746e70c4fa..93d221283e3fba5f91944e0f988a056ab4901bca 100644 --- a/gcc/testsuite/gcc.dg/stack-usage-1.c +++ b/gcc/testsuite/gcc.dg/stack-usage-1.c @@ -103,6 +103,9 @@ #define SIZE 252 #elif defined (__csky__) # define SIZE 252 +#elif defined (__loongarch__) +# define SIZE 256 - 16 +#else #else # define SIZE 256 #endif diff --git a/gcc/testsuite/gcc.dg/torture/stackalign/builtin-apply-2.c b/gcc/testsuite/gcc.dg/torture/stackalign/builtin-apply-2.c index 31585a0ae69477e00302dd719746930c64f39bcd..dd9c3aea6956338824700571f5de0242c587dd2b 100644 --- a/gcc/testsuite/gcc.dg/torture/stackalign/builtin-apply-2.c +++ b/gcc/testsuite/gcc.dg/torture/stackalign/builtin-apply-2.c @@ -9,7 +9,7 @@ /* arm_hf_eabi: Variadic funcs use Base AAPCS. Normal funcs use VFP variant. avr: Variadic funcs don't pass arguments in registers, while normal funcs do. */ -/* { dg-skip-if "Variadic funcs use different argument passing from normal funcs" { arm_hf_eabi || { avr-*-* riscv*-*-* or1k*-*-* msp430-*-* amdgcn-*-* pru-*-* } } } */ +/* { dg-skip-if "Variadic funcs use different argument passing from normal funcs" { arm_hf_eabi || { avr-*-* riscv*-*-* or1k*-*-* msp430-*-* amdgcn-*-* pru-*-* loongarch*-*-*} } } */ /* { dg-skip-if "Variadic funcs have all args on stack. Normal funcs have args in registers." { nds32*-*-* } { v850*-*-* } } */ /* { dg-require-effective-target untyped_assembly } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-3.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-3.c index 6b6255b9713b061fc26c48ddf162959e814b2e6a..224dd4f72ef82724430eb37966f5dca246184f10 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-3.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-3.c @@ -5,7 +5,7 @@ When the condition is true, we distribute "(int) (a + b)" as "(int) a + (int) b", otherwise we keep the original. */ -/* { dg-do compile { target { ! mips64 } } } */ +/* { dg-do compile { target { ! mips64 } && { ! loongarch64 } } } */ /* { dg-options "-O -fno-tree-forwprop -fno-tree-ccp -fwrapv -fdump-tree-fre1-details" } */ /* From PR14844. */ diff --git a/gcc/testsuite/gcc.target/loongarch/loongarch.exp b/gcc/testsuite/gcc.target/loongarch/loongarch.exp new file mode 100644 index 0000000000000000000000000000000000000000..9f374a9bc73cf917a8b5d4dd4811e3b29ef06539 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/loongarch.exp @@ -0,0 +1,40 @@ +# Copyright (C) 2021 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +# GCC testsuite that uses the `dg.exp' driver. + +# Exit immediately if this isn't a LoongArch target. +if ![istarget loongarch*-*-*] then { + return +} + +# Load support procs. +load_lib gcc-dg.exp + +# If a testcase doesn't have special options, use these. +global DEFAULT_CFLAGS +if ![info exists DEFAULT_CFLAGS] then { + set DEFAULT_CFLAGS " " +} + +# Initialize `dg'. +dg-init + +# Main loop. +dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\]]] \ + "" $DEFAULT_CFLAGS +# All done. +dg-finish diff --git a/gcc/testsuite/gcc.target/loongarch/tst-asm-const.c b/gcc/testsuite/gcc.target/loongarch/tst-asm-const.c new file mode 100644 index 0000000000000000000000000000000000000000..2e04b99e301a1fdb22fb690c8655783a1666abf6 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/tst-asm-const.c @@ -0,0 +1,16 @@ +/* Test asm const. */ +/* { dg-do compile } */ +/* { dg-final { scan-assembler-times "foo:.*\\.long 1061109567.*\\.long 52" 1 } } */ +int foo () +{ + __asm__ volatile ( + "foo:" + "\n\t" + ".long %a0\n\t" + ".long %a1\n\t" + : + :"i"(0x3f3f3f3f), "i"(52) + : + ); +} + diff --git a/gcc/testsuite/go.test/go-test.exp b/gcc/testsuite/go.test/go-test.exp index 51f9b381d677039d69da38ef1e9a3df9b8fb1517..f6a94e33cab3bf426fc6b3430111073b4065e7a2 100644 --- a/gcc/testsuite/go.test/go-test.exp +++ b/gcc/testsuite/go.test/go-test.exp @@ -251,6 +251,9 @@ proc go-set-goarch { } { "riscv64-*-*" { set goarch "riscv64" } + "loongarch64-*-*" { + set goarch "loongarch64" + } "s390*-*-*" { if [check_effective_target_ilp32] { set goarch "s390" diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index bd62a0d9e799fc703935d24c565ec1beb823a1d5..9468420b5d279e40730bc9219c20ccba2f3b4a02 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -287,6 +287,10 @@ proc check_weak_available { } { return 1 } + if { [ string first "loongarch" $target_cpu ] >= 0 } { + return 1 + } + # All AIX targets should support it if { [istarget *-*-aix*] } { @@ -707,6 +711,7 @@ proc check_profiling_available { test_what } { || [istarget m68k-*-elf] || [istarget m68k-*-uclinux*] || [istarget mips*-*-elf*] + || [istarget loongarch*-*-elf*] || [istarget mmix-*-*] || [istarget mn10300-*-elf*] || [istarget moxie-*-elf*] @@ -1277,6 +1282,14 @@ proc check_effective_target_hard_float { } { }] } + if { [istarget loongarch*-*-*] } { + return [check_no_compiler_messages hard_float assembly { + #if (defined __loongarch_soft_float) + #error __loongarch_soft_float + #endif + }] + } + # This proc is actually checking the availabilty of FPU # support for doubles, so on the RX we must fail if the # 64-bit double multilib has been selected. @@ -6380,6 +6393,7 @@ proc check_effective_target_vect_float { } { expr { [istarget i?86-*-*] || [istarget x86_64-*-*] || [istarget powerpc*-*-*] || [istarget mips-sde-elf] + || [istarget loongarch-sde-elf] || [istarget mipsisa64*-*-*] || [istarget ia64-*-*] || [istarget aarch64*-*-*] @@ -7700,6 +7714,7 @@ proc check_effective_target_sync_char_short { } { || [istarget crisv32-*-*] || [istarget cris-*-*] || ([istarget sparc*-*-*] && [check_effective_target_sparc_v9]) || ([istarget arc*-*-*] && [check_effective_target_arc_atomic]) + || [istarget loongarch*-*-*] || [check_effective_target_mips_llsc] }}] } @@ -9626,6 +9641,7 @@ proc check_effective_target_branch_cost {} { || [istarget frv*-*-*] || [istarget i?86-*-*] || [istarget x86_64-*-*] || [istarget mips*-*-*] + || [istarget loongarch*-*-*] || [istarget s390*-*-*] || [istarget riscv*-*-*] || [istarget sh*-*-*] } { diff --git a/libgcc/config.host b/libgcc/config.host index c529cc40f0c8d536524e2539483e6b148ded4413..a14a3af47225b4a20615e618412609b553015f59 100644 --- a/libgcc/config.host +++ b/libgcc/config.host @@ -166,6 +166,23 @@ mips*-*-*) fi tmake_file="${tmake_file} t-softfp" ;; +loongarch*-*-*) + # All MIPS targets provide a full set of FP routines. + cpu_type=loongarch + tmake_file="loongarch/t-loongarch" + if test "${libgcc_cv_loongarch_hard_float}" = yes; then + tmake_file="${tmake_file} t-hardfp-sfdf t-hardfp" + else + tmake_file="${tmake_file} t-softfp-sfdf" + fi + if test "${ac_cv_sizeof_long_double}" = 16; then + tmake_file="${tmake_file} loongarch/t-softfp-tf" + fi + if test "${host_address}" = 64; then + tmake_file="${tmake_file} loongarch/t-loongarch64" + fi + tmake_file="${tmake_file} t-softfp" + ;; nds32*-*) cpu_type=nds32 ;; @@ -998,6 +1015,16 @@ mips*-*-linux*) # Linux MIPS, either endian. esac md_unwind_header=mips/linux-unwind.h ;; +loongarch*-*-linux*) # Linux MIPS, either endian. + extra_parts="$extra_parts crtfastmath.o" + tmake_file="${tmake_file} t-crtfm" + case ${host} in + *) + tmake_file="${tmake_file} t-slibgcc-libgcc" + ;; + esac + md_unwind_header=loongarch/linux-unwind.h + ;; mips*-sde-elf*) tmake_file="$tmake_file mips/t-crtstuff mips/t-mips16" case "${with_newlib}" in @@ -1011,6 +1038,19 @@ mips*-sde-elf*) esac extra_parts="$extra_parts crti.o crtn.o" ;; +loongarch*-sde-elf*) + tmake_file="$tmake_file loongarch/t-crtstuff" + case "${with_newlib}" in + yes) + # newlib / libgloss. + ;; + *) + # MIPS toolkit libraries. + tmake_file="$tmake_file loongarch/t-sdemtk" + ;; + esac + extra_parts="$extra_parts crti.o crtn.o" + ;; mipsisa32-*-elf* | mipsisa32el-*-elf* | \ mipsisa32r2-*-elf* | mipsisa32r2el-*-elf* | \ mipsisa32r6-*-elf* | mipsisa32r6el-*-elf* | \ diff --git a/libgcc/config/loongarch/crtfastmath.c b/libgcc/config/loongarch/crtfastmath.c new file mode 100644 index 0000000000000000000000000000000000000000..052295ea93e2ee34bfc01d8c9a1110e1d761447b --- /dev/null +++ b/libgcc/config/loongarch/crtfastmath.c @@ -0,0 +1,53 @@ +/* Copyright (C) 2010-2018 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License + and a copy of the GCC Runtime Library Exception along with this + program; see the files COPYING3 and COPYING.RUNTIME respectively. + If not, see . */ + +#ifdef __loongarch_hard_float + +/* Flush denormalized numbers to zero. */ +#define _FPU_FLUSH_TZ 0x1000000 + +/* Rounding control. */ +#define _FPU_RC_NEAREST 0x0 /* RECOMMENDED */ +#define _FPU_RC_ZERO 0x1 +#define _FPU_RC_UP 0x2 +#define _FPU_RC_DOWN 0x3 + +/* Enable interrupts for IEEE exceptions. */ +#define _FPU_IEEE 0x00000F80 + +/* Macros for accessing the hardware control word. */ +#define _FPU_GETCW(cw) __asm__ ("movgr2fcsr %0,$r1" : "=r" (cw)) +#define _FPU_SETCW(cw) __asm__ ("movfcsr2gr %0,$r1" : : "r" (cw)) + +static void __attribute__((constructor)) +set_fast_math (void) +{ + unsigned int fcr; + + /* Flush to zero, round to nearest, IEEE exceptions disabled. */ + fcr = _FPU_FLUSH_TZ | _FPU_RC_NEAREST; + + _FPU_SETCW(fcr); +} + +#endif /* __loongarch_hard_float */ diff --git a/libgcc/config/loongarch/crti.S b/libgcc/config/loongarch/crti.S new file mode 100644 index 0000000000000000000000000000000000000000..dcd05afea02b7a4cbbc990f6c7629efb46ed133b --- /dev/null +++ b/libgcc/config/loongarch/crti.S @@ -0,0 +1,43 @@ +/* Copyright (C) 2001-2018 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* 4 slots for argument spill area. 1 for cpreturn, 1 for stack. + Return spill offset of 40 and 20. Aligned to 16 bytes for n32. */ + + .section .init,"ax",@progbits + .globl _init + .type _init,@function +_init: + addi.d $r3,$r3,-48 + st.d $r1,$r3,40 + addi.d $r3,$r3,48 + jirl $r0,$r1,0 + + .section .fini,"ax",@progbits + .globl _fini + .type _fini,@function +_fini: + addi.d $r3,$r3,-48 + st.d $r1,$r3,40 + addi.d $r3,$r3,48 + jirl $r0,$r1,0 diff --git a/libgcc/config/loongarch/crtn.S b/libgcc/config/loongarch/crtn.S new file mode 100644 index 0000000000000000000000000000000000000000..91d9d5e7fc54ccd4d56d6326d5bb9d7b0ba3638c --- /dev/null +++ b/libgcc/config/loongarch/crtn.S @@ -0,0 +1,39 @@ +/* Copyright (C) 2001-2018 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* 4 slots for argument spill area. 1 for cpreturn, 1 for stack. + Return spill offset of 40 and 20. Aligned to 16 bytes for n32. */ + + + .section .init,"ax",@progbits +init: + ld.d $r1,$r3,40 + addi.d $r3,$r3,48 + jirl $r0,$r1,0 + + .section .fini,"ax",@progbits +fini: + ld.d $r1,$r3,40 + addi.d $r3,$r3,48 + jirl $r0,$r1,0 + diff --git a/libgcc/config/loongarch/gthr-loongnixsde.h b/libgcc/config/loongarch/gthr-loongnixsde.h new file mode 100644 index 0000000000000000000000000000000000000000..f62b57318c44a8124959b98a232c0f7e317f0e0d --- /dev/null +++ b/libgcc/config/loongarch/gthr-loongnixsde.h @@ -0,0 +1,237 @@ +/* LARCH SDE threads compatibility routines for libgcc2 and libobjc. */ +/* Compile this one with gcc. */ +/* Copyright (C) 2006-2018 Free Software Foundation, Inc. + Contributed by Nigel Stephens + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#ifndef GCC_GTHR_LARCHSDE_H +#define GCC_GTHR_LARCHSDE_H + +/* LARCH SDE threading API specific definitions. + Easy, since the interface is pretty much one-to-one. */ + +#define __GTHREADS 1 + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef __sdethread_key_t __gthread_key_t; +typedef __sdethread_once_t __gthread_once_t; +typedef __sdethread_mutex_t __gthread_mutex_t; + +typedef struct { + long depth; + __sdethread_t owner; + __sdethread_mutex_t actual; +} __gthread_recursive_mutex_t; + +#define __GTHREAD_MUTEX_INIT __SDETHREAD_MUTEX_INITIALIZER("gthr") +#define __GTHREAD_ONCE_INIT __SDETHREAD_ONCE_INIT +static inline int +__gthread_recursive_mutex_init_function(__gthread_recursive_mutex_t *__mutex); +#define __GTHREAD_RECURSIVE_MUTEX_INIT_FUNCTION __gthread_recursive_mutex_init_function + +#if SUPPORTS_WEAK && GTHREAD_USE_WEAK +# define __gthrw(name) \ + static __typeof(name) __gthrw_ ## name __attribute__ ((__weakref__(#name))); +# define __gthrw_(name) __gthrw_ ## name +#else +# define __gthrw(name) +# define __gthrw_(name) name +#endif + +__gthrw(__sdethread_once) +__gthrw(__sdethread_key_create) +__gthrw(__sdethread_key_delete) +__gthrw(__sdethread_getspecific) +__gthrw(__sdethread_setspecific) + +__gthrw(__sdethread_self) + +__gthrw(__sdethread_mutex_lock) +__gthrw(__sdethread_mutex_trylock) +__gthrw(__sdethread_mutex_unlock) + +__gthrw(__sdethread_mutex_init) + +__gthrw(__sdethread_threading) + +#if SUPPORTS_WEAK && GTHREAD_USE_WEAK + +static inline int +__gthread_active_p (void) +{ + return !!(void *)&__sdethread_threading; +} + +#else /* not SUPPORTS_WEAK */ + +static inline int +__gthread_active_p (void) +{ + return 1; +} + +#endif /* SUPPORTS_WEAK */ + +static inline int +__gthread_once (__gthread_once_t *__once, void (*__func) (void)) +{ + if (__gthread_active_p ()) + return __gthrw_(__sdethread_once) (__once, __func); + else + return -1; +} + +static inline int +__gthread_key_create (__gthread_key_t *__key, void (*__dtor) (void *)) +{ + return __gthrw_(__sdethread_key_create) (__key, __dtor); +} + +static inline int +__gthread_key_delete (__gthread_key_t __key) +{ + return __gthrw_(__sdethread_key_delete) (__key); +} + +static inline void * +__gthread_getspecific (__gthread_key_t __key) +{ + return __gthrw_(__sdethread_getspecific) (__key); +} + +static inline int +__gthread_setspecific (__gthread_key_t __key, const void *__ptr) +{ + return __gthrw_(__sdethread_setspecific) (__key, __ptr); +} + +static inline int +__gthread_mutex_destroy (__gthread_mutex_t * UNUSED(__mutex)) +{ + return 0; +} + +static inline int +__gthread_mutex_lock (__gthread_mutex_t *__mutex) +{ + if (__gthread_active_p ()) + return __gthrw_(__sdethread_mutex_lock) (__mutex); + else + return 0; +} + +static inline int +__gthread_mutex_trylock (__gthread_mutex_t *__mutex) +{ + if (__gthread_active_p ()) + return __gthrw_(__sdethread_mutex_trylock) (__mutex); + else + return 0; +} + +static inline int +__gthread_mutex_unlock (__gthread_mutex_t *__mutex) +{ + if (__gthread_active_p ()) + return __gthrw_(__sdethread_mutex_unlock) (__mutex); + else + return 0; +} + +static inline int +__gthread_recursive_mutex_init_function (__gthread_recursive_mutex_t *__mutex) +{ + __mutex->depth = 0; + __mutex->owner = __gthrw_(__sdethread_self) (); + return __gthrw_(__sdethread_mutex_init) (&__mutex->actual, NULL); +} + +static inline int +__gthread_recursive_mutex_lock (__gthread_recursive_mutex_t *__mutex) +{ + if (__gthread_active_p ()) + { + __sdethread_t __me = __gthrw_(__sdethread_self) (); + + if (__mutex->owner != __me) + { + __gthrw_(__sdethread_mutex_lock) (&__mutex->actual); + __mutex->owner = __me; + } + + __mutex->depth++; + } + return 0; +} + +static inline int +__gthread_recursive_mutex_trylock (__gthread_recursive_mutex_t *__mutex) +{ + if (__gthread_active_p ()) + { + __sdethread_t __me = __gthrw_(__sdethread_self) (); + + if (__mutex->owner != __me) + { + if (__gthrw_(__sdethread_mutex_trylock) (&__mutex->actual)) + return 1; + __mutex->owner = __me; + } + + __mutex->depth++; + } + return 0; +} + +static inline int +__gthread_recursive_mutex_unlock (__gthread_recursive_mutex_t *__mutex) +{ + if (__gthread_active_p ()) + { + if (--__mutex->depth == 0) + { + __mutex->owner = (__sdethread_t) 0; + __gthrw_(__sdethread_mutex_unlock) (&__mutex->actual); + } + } + return 0; +} + +static inline int +__gthread_recursive_mutex_destroy (__gthread_recursive_mutex_t + * UNUSED(__mutex)) +{ + return 0; +} + +#ifdef __cplusplus +} +#endif + +#endif /* ! GCC_GTHR_LARCHSDE_H */ diff --git a/libgcc/config/loongarch/linux-unwind.h b/libgcc/config/loongarch/linux-unwind.h new file mode 100644 index 0000000000000000000000000000000000000000..b4c9fda2b7532ce75bdc48cad424184f83de1b09 --- /dev/null +++ b/libgcc/config/loongarch/linux-unwind.h @@ -0,0 +1,91 @@ +/* DWARF2 EH unwinding support for LARCH Linux. + Copyright (C) 2004-2018 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#ifndef inhibit_libc +/* Do code reading to identify a signal frame, and set the frame + state data appropriately. See unwind-dw2.c for the structs. */ + +#include +#include + +/* The third parameter to the signal handler points to something with + * this structure defined in asm/ucontext.h, but the name clashes with + * struct ucontext from sys/ucontext.h so this private copy is used. */ +typedef struct _sig_ucontext { + unsigned long uc_flags; + struct _sig_ucontext *uc_link; + stack_t uc_stack; + struct sigcontext uc_mcontext; + sigset_t uc_sigmask; +} _sig_ucontext_t; + +#define MD_FALLBACK_FRAME_STATE_FOR loongarch_fallback_frame_state + +static _Unwind_Reason_Code +loongarch_fallback_frame_state (struct _Unwind_Context *context, + _Unwind_FrameState *fs) +{ + u_int32_t *pc = (u_int32_t *) context->ra; + struct sigcontext *sc; + _Unwind_Ptr new_cfa; + int i; + + /* 03822c0b dli a7, 0x8b (sigreturn) */ + /* 002b0000 syscall 0 */ + if (pc[1] != 0x002b0000) + return _URC_END_OF_STACK; + if (pc[0] == 0x03822c0b) + { + struct rt_sigframe { + u_int32_t ass[4]; /* Argument save space for o32. */ + u_int32_t trampoline[2]; + siginfo_t info; + _sig_ucontext_t uc; + } *rt_ = context->cfa; + sc = &rt_->uc.uc_mcontext; + } + else + return _URC_END_OF_STACK; + + new_cfa = (_Unwind_Ptr) sc; + fs->regs.cfa_how = CFA_REG_OFFSET; + fs->regs.cfa_reg = __LIBGCC_STACK_POINTER_REGNUM__; + fs->regs.cfa_offset = new_cfa - (_Unwind_Ptr) context->cfa; + + for (i = 0; i < 32; i++) { + fs->regs.reg[i].how = REG_SAVED_OFFSET; + fs->regs.reg[i].loc.offset + = (_Unwind_Ptr)&(sc->sc_regs[i]) - new_cfa; + } + + fs->signal_frame = 1; + fs->regs.reg[__LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__].how + = REG_SAVED_VAL_OFFSET; + fs->regs.reg[__LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__].loc.offset + = (_Unwind_Ptr)(sc->sc_pc) - new_cfa; + fs->retaddr_column = __LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__; + + return _URC_NO_REASON; +} +#endif diff --git a/libgcc/config/loongarch/sfp-machine.h b/libgcc/config/loongarch/sfp-machine.h new file mode 100644 index 0000000000000000000000000000000000000000..f7800a003053f0e42c540004434c822af9de1f2c --- /dev/null +++ b/libgcc/config/loongarch/sfp-machine.h @@ -0,0 +1,148 @@ +/* softfp machine description for LARCH. + Copyright (C) 2009-2018 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#ifdef __loongarch64 +#define _FP_W_TYPE_SIZE 64 +#define _FP_W_TYPE unsigned long long +#define _FP_WS_TYPE signed long long +#define _FP_I_TYPE long long + +typedef int TItype __attribute__ ((mode (TI))); +typedef unsigned int UTItype __attribute__ ((mode (TI))); +#define TI_BITS (__CHAR_BIT__ * (int) sizeof (TItype)) + +#define _FP_MUL_MEAT_S(R,X,Y) \ + _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_D(R,X,Y) \ + _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_Q(R,X,Y) \ + _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm) + +#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_udiv_norm(S,R,X,Y) +#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_1_udiv_norm(D,R,X,Y) +#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_2_udiv(Q,R,X,Y) + +# define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1) +# define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1) +# define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1 +#else +#define _FP_W_TYPE_SIZE 32 +#define _FP_W_TYPE unsigned int +#define _FP_WS_TYPE signed int +#define _FP_I_TYPE int + +#define _FP_MUL_MEAT_S(R,X,Y) \ + _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_D(R,X,Y) \ + _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_Q(R,X,Y) \ + _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm) + +#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_udiv_norm(S,R,X,Y) +#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv(D,R,X,Y) +#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_4_udiv(Q,R,X,Y) + +# define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1) +# define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1), -1 +# define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1 +#endif + +/* The type of the result of a floating point comparison. This must + match __libgcc_cmp_return__ in GCC for the target. */ +typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__))); +#define CMPtype __gcc_CMPtype + +#define _FP_NANSIGN_S 0 +#define _FP_NANSIGN_D 0 +#define _FP_NANSIGN_Q 0 + +#define _FP_KEEPNANFRACP 1 +# define _FP_QNANNEGATEDP 0 + +/* NaN payloads should be preserved for NAN2008. */ +# define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \ + do \ + { \ + R##_s = X##_s; \ + _FP_FRAC_COPY_##wc (R, X); \ + R##_c = FP_CLS_NAN; \ + } \ + while (0) + +#ifdef __loongarch_hard_float +#define FP_EX_INVALID 0x100000 +#define FP_EX_DIVZERO 0x080000 +#define FP_EX_OVERFLOW 0x040000 +#define FP_EX_UNDERFLOW 0x020000 +#define FP_EX_INEXACT 0x010000 +#define FP_EX_ALL \ + (FP_EX_INVALID | FP_EX_DIVZERO | FP_EX_OVERFLOW | FP_EX_UNDERFLOW \ + | FP_EX_INEXACT) + +#define FP_EX_ENABLE_SHIFT 16 +#define FP_EX_CAUSE_SHIFT 8 + +#define FP_RND_NEAREST 0x000 +#define FP_RND_ZERO 0x100 +#define FP_RND_PINF 0x200 +#define FP_RND_MINF 0x300 +#define FP_RND_MASK 0x300 + +#define _FP_DECL_EX \ + unsigned long int _fcsr __attribute__ ((unused)) = FP_RND_NEAREST + +#define FP_INIT_ROUNDMODE \ + do { \ + _fcsr = __builtin_loongarch_movfcsr2gr (0); \ + } while (0) + +#define FP_ROUNDMODE (_fcsr & FP_RND_MASK) + +#define FP_TRAPPING_EXCEPTIONS ((_fcsr << FP_EX_ENABLE_SHIFT) & FP_EX_ALL) + +#define FP_HANDLE_EXCEPTIONS \ + do { \ + _fcsr &= ~(FP_EX_ALL << FP_EX_CAUSE_SHIFT); \ + _fcsr |= _fex | (_fex << FP_EX_CAUSE_SHIFT); \ + __builtin_loongarch_movgr2fcsr (0, _fcsr); \ + } while (0) + +#else +#define FP_EX_INVALID (1 << 4) +#define FP_EX_DIVZERO (1 << 3) +#define FP_EX_OVERFLOW (1 << 2) +#define FP_EX_UNDERFLOW (1 << 1) +#define FP_EX_INEXACT (1 << 0) +#endif + +#define _FP_TININESS_AFTER_ROUNDING 1 + +#define __LITTLE_ENDIAN 1234 + +# define __BYTE_ORDER __LITTLE_ENDIAN + +/* Define ALIASNAME as a strong alias for NAME. */ +# define strong_alias(name, aliasname) _strong_alias(name, aliasname) +# define _strong_alias(name, aliasname) \ + extern __typeof (name) aliasname __attribute__ ((alias (#name))); diff --git a/libgcc/config/loongarch/t-crtstuff b/libgcc/config/loongarch/t-crtstuff new file mode 100644 index 0000000000000000000000000000000000000000..d8ab43be2aee20cde830c29ec9397599136edad9 --- /dev/null +++ b/libgcc/config/loongarch/t-crtstuff @@ -0,0 +1,2 @@ +# Don't let CTOR_LIST end up in sdata section. +CRTSTUFF_T_CFLAGS = -G 0 diff --git a/libgcc/config/loongarch/t-elf b/libgcc/config/loongarch/t-elf new file mode 100644 index 0000000000000000000000000000000000000000..651f10a530387ec2ae2c94f27694cf39440430d7 --- /dev/null +++ b/libgcc/config/loongarch/t-elf @@ -0,0 +1,3 @@ +# We must build libgcc2.a with -G 0, in case the user wants to link +# without the $gp register. +HOST_LIBGCC2_CFLAGS += -G 0 diff --git a/libgcc/config/loongarch/t-loongarch b/libgcc/config/loongarch/t-loongarch new file mode 100644 index 0000000000000000000000000000000000000000..9508cb2fcae982a239b9fc1220e8443f605701b7 --- /dev/null +++ b/libgcc/config/loongarch/t-loongarch @@ -0,0 +1,9 @@ +LIB2_SIDITI_CONV_FUNCS = yes + +softfp_float_modes := +softfp_int_modes := si di +softfp_extensions := +softfp_truncations := +softfp_exclude_libgcc2 := n + +LIB2ADD_ST += $(srcdir)/config/loongarch/lib2funcs.c diff --git a/libgcc/config/loongarch/t-loongarch64 b/libgcc/config/loongarch/t-loongarch64 new file mode 100644 index 0000000000000000000000000000000000000000..a1e3513e2884ffa856cc2d41670f150a27e87553 --- /dev/null +++ b/libgcc/config/loongarch/t-loongarch64 @@ -0,0 +1 @@ +softfp_int_modes += ti diff --git a/libgcc/config/loongarch/t-sdemtk b/libgcc/config/loongarch/t-sdemtk new file mode 100644 index 0000000000000000000000000000000000000000..a06e828b545a633f18f35867b0cffbce0f3315f9 --- /dev/null +++ b/libgcc/config/loongarch/t-sdemtk @@ -0,0 +1,3 @@ +# Don't build FPBIT and DPBIT; we'll be using the SDE soft-float library. +FPBIT = +DPBIT = diff --git a/libgcc/config/loongarch/t-softfp-tf b/libgcc/config/loongarch/t-softfp-tf new file mode 100644 index 0000000000000000000000000000000000000000..306677b12554597d9bb96c6aaa0b3391762cdf38 --- /dev/null +++ b/libgcc/config/loongarch/t-softfp-tf @@ -0,0 +1,3 @@ +softfp_float_modes += tf +softfp_extensions += sftf dftf +softfp_truncations += tfsf tfdf diff --git a/libgcc/configure b/libgcc/configure index 26bf75789e0d999fd5edbdc6e93d356474b298be..b8937e878702a638eab6bca36bca60467988373d 100644 --- a/libgcc/configure +++ b/libgcc/configure @@ -2412,6 +2412,9 @@ case "${host}" in # sets the default TLS model and affects inlining. PICFLAG=-fPIC ;; + # loongarch*-*-*) + # PICFLAG=-fpic + # ;; mips-sgi-irix6*) # PIC is the default. ;; @@ -5062,7 +5065,7 @@ $as_echo "$libgcc_cv_cfi" >&6; } # word size rather than the address size. cat > conftest.c < conftest.c < -#endif +/* #endif */ #ifdef HAVE_STDLIB_H #include #endif