diff --git a/gcc/config/i386/avx512dqintrin.h b/gcc/config/i386/avx512dqintrin.h index e924250a4ad909cc182a9a3b03b2307bec6737c2..e8f8efe3be8551a19eaaa34f2847ce4652ecc157 100644 --- a/gcc/config/i386/avx512dqintrin.h +++ b/gcc/config/i386/avx512dqintrin.h @@ -2800,11 +2800,11 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm) ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X), \ (int) (C), (__mmask8) (-1))) \ -#define _mm_mask_fpclass_ss_mask(X, C, U) \ +#define _mm_mask_fpclass_ss_mask(U, X, C) \ ((__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) (__m128) (X), \ (int) (C), (__mmask8) (U))) -#define _mm_mask_fpclass_sd_mask(X, C, U) \ +#define _mm_mask_fpclass_sd_mask(U, X, C) \ ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X), \ (int) (C), (__mmask8) (U))) @@ -2839,8 +2839,9 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm) (__mmask8)(U))) #define _mm_reduce_round_sd(A, B, C, R) \ - ((__m128d) __builtin_ia32_reducesd_round ((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), (int)(C), (__mmask8)(U), (int)(R))) + ((__m128d) __builtin_ia32_reducesd_mask_round ((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \ + (__mmask8)(-1), (int)(R))) #define _mm_mask_reduce_round_sd(W, U, A, B, C, R) \ ((__m128d) __builtin_ia32_reducesd_mask_round ((__v2df)(__m128d)(A), \ @@ -2867,8 +2868,9 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm) (__mmask8)(U))) #define _mm_reduce_round_ss(A, B, C, R) \ - ((__m128) __builtin_ia32_reducess_round ((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), (int)(C), (__mmask8)(U), (int)(R))) + ((__m128) __builtin_ia32_reducess_mask_round ((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \ + (__mmask8)(-1), (int)(R))) #define _mm_mask_reduce_round_ss(W, U, A, B, C, R) \ ((__m128) __builtin_ia32_reducess_mask_round ((__v4sf)(__m128)(A), \ @@ -2876,7 +2878,7 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm) (__mmask8)(U), (int)(R))) #define _mm_maskz_reduce_round_ss(U, A, B, C, R) \ - ((__m128) __builtin_ia32_reducesd_mask_round ((__v4sf)(__m128)(A), \ + ((__m128) __builtin_ia32_reducess_mask_round ((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \ (__mmask8)(U), (int)(R))) diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h index b16ccfcb7f17c01ef95208856848d6d857f20c20..6330e57ebb85bfc2b067fb9357858d073858becc 100644 --- a/gcc/config/i386/avx512fp16intrin.h +++ b/gcc/config/i386/avx512fp16intrin.h @@ -2321,11 +2321,11 @@ _mm512_fpclass_ph_mask (__m512h __A, const int __imm) #else #define _mm512_mask_fpclass_ph_mask(u, x, c) \ ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \ - (int) (c),(__mmask8)(u))) + (int) (c),(__mmask32)(u))) #define _mm512_fpclass_ph_mask(x, c) \ ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \ - (int) (c),(__mmask8)-1)) + (int) (c),(__mmask32)-1)) #endif /* __OPIMTIZE__ */ /* Intrinsics vgetexpph, vgetexpsh. */ diff --git a/gcc/config/i386/avx512vlbwintrin.h b/gcc/config/i386/avx512vlbwintrin.h index 192d54e743f72525cc3df8aab18856190dd1d2c9..c918ed520c5cc9ac34c37fe036a2435e3ea583ad 100644 --- a/gcc/config/i386/avx512vlbwintrin.h +++ b/gcc/config/i386/avx512vlbwintrin.h @@ -1839,7 +1839,7 @@ _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, int __B) #define _mm256_mask_alignr_epi8(W, U, X, Y, N) \ ((__m256i) __builtin_ia32_palignr256_mask ((__v4di)(__m256i)(X), \ (__v4di)(__m256i)(Y), (int)((N) * 8), \ - (__v4di)(__m256i)(X), (__mmask32)(U))) + (__v4di)(__m256i)(W), (__mmask32)(U))) #define _mm256_mask_srli_epi16(W, U, A, B) \ ((__m256i) __builtin_ia32_psrlwi256_mask ((__v16hi)(__m256i)(A), \ @@ -1922,7 +1922,7 @@ _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, int __B) #define _mm_mask_alignr_epi8(W, U, X, Y, N) \ ((__m128i) __builtin_ia32_palignr128_mask ((__v2di)(__m128i)(X), \ (__v2di)(__m128i)(Y), (int)((N) * 8), \ - (__v2di)(__m128i)(X), (__mmask16)(U))) + (__v2di)(__m128i)(W), (__mmask16)(U))) #define _mm_maskz_alignr_epi8(U, X, Y, N) \ ((__m128i) __builtin_ia32_palignr128_mask ((__v2di)(__m128i)(X), \ diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h index 26b286eae6b79ee8bd4e475423be8cf3eebd0383..c6f3f35a009e1448bbaa1b4035797fc3621fe307 100644 --- a/gcc/config/i386/avx512vlintrin.h +++ b/gcc/config/i386/avx512vlintrin.h @@ -13609,7 +13609,7 @@ _mm256_permutex_pd (__m256d __X, const int __M) #define _mm_mask_alignr_epi64(W, U, X, Y, C) \ ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \ - (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1)) + (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(W), (__mmask8)(U))) #define _mm_maskz_alignr_epi64(U, X, Y, C) \ ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \ diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md index 7361687632fae86fa078d83a9b07b4d1ea053fd1..e4b66340589f70ec17edc42bbbb9b75c052f6727 100644 --- a/gcc/config/i386/constraints.md +++ b/gcc/config/i386/constraints.md @@ -187,7 +187,7 @@ (and (match_operand 0 "memory_operand") (match_test "constant_address_p (XEXP (op, 0))"))) -(define_memory_constraint "Bk" +(define_special_memory_constraint "Bk" "@internal TLS address that allows insn using non-integer registers." (and (match_operand 0 "memory_operand") (not (match_test "ix86_gpr_tls_address_pattern_p (op)")))) diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index d3ab21eeac35321d54e6c4e94b392d3721364093..d1713b70e07036db9ace97f2ab7e552cd393eaad 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -2751,18 +2751,18 @@ BDESC (0, OPTION_MASK_ISA2_AVX5124VNNIW, CODE_FOR_avx5124vnniw_vp4dpwssds_mask, BDESC (0, OPTION_MASK_ISA2_RDPID, CODE_FOR_rdpid, "__builtin_ia32_rdpid", IX86_BUILTIN_RDPID, UNKNOWN, (int) UNSIGNED_FTYPE_VOID) /* VAES. */ -BDESC (0, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdec_v16qi, "__builtin_ia32_vaesdec_v16qi", IX86_BUILTIN_VAESDEC16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI) -BDESC (0, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdec_v32qi, "__builtin_ia32_vaesdec_v32qi", IX86_BUILTIN_VAESDEC32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) -BDESC (0, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdec_v64qi, "__builtin_ia32_vaesdec_v64qi", IX86_BUILTIN_VAESDEC64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) -BDESC (0, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdeclast_v16qi, "__builtin_ia32_vaesdeclast_v16qi", IX86_BUILTIN_VAESDECLAST16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI) -BDESC (0, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdeclast_v32qi, "__builtin_ia32_vaesdeclast_v32qi", IX86_BUILTIN_VAESDECLAST32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) -BDESC (0, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdeclast_v64qi, "__builtin_ia32_vaesdeclast_v64qi", IX86_BUILTIN_VAESDECLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) -BDESC (0, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenc_v16qi, "__builtin_ia32_vaesenc_v16qi", IX86_BUILTIN_VAESENC16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI) -BDESC (0, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenc_v32qi, "__builtin_ia32_vaesenc_v32qi", IX86_BUILTIN_VAESENC32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) -BDESC (0, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenc_v64qi, "__builtin_ia32_vaesenc_v64qi", IX86_BUILTIN_VAESENC64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) -BDESC (0, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenclast_v16qi, "__builtin_ia32_vaesenclast_v16qi", IX86_BUILTIN_VAESENCLAST16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI) -BDESC (0, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenclast_v32qi, "__builtin_ia32_vaesenclast_v32qi", IX86_BUILTIN_VAESENCLAST32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) -BDESC (0, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenclast_v64qi, "__builtin_ia32_vaesenclast_v64qi", IX86_BUILTIN_VAESENCLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdec_v16qi, "__builtin_ia32_vaesdec_v16qi", IX86_BUILTIN_VAESDEC16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI) +BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdec_v32qi, "__builtin_ia32_vaesdec_v32qi", IX86_BUILTIN_VAESDEC32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) +BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdec_v64qi, "__builtin_ia32_vaesdec_v64qi", IX86_BUILTIN_VAESDEC64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdeclast_v16qi, "__builtin_ia32_vaesdeclast_v16qi", IX86_BUILTIN_VAESDECLAST16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI) +BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdeclast_v32qi, "__builtin_ia32_vaesdeclast_v32qi", IX86_BUILTIN_VAESDECLAST32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) +BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdeclast_v64qi, "__builtin_ia32_vaesdeclast_v64qi", IX86_BUILTIN_VAESDECLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenc_v16qi, "__builtin_ia32_vaesenc_v16qi", IX86_BUILTIN_VAESENC16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI) +BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenc_v32qi, "__builtin_ia32_vaesenc_v32qi", IX86_BUILTIN_VAESENC32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) +BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenc_v64qi, "__builtin_ia32_vaesenc_v64qi", IX86_BUILTIN_VAESENC64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenclast_v16qi, "__builtin_ia32_vaesenclast_v16qi", IX86_BUILTIN_VAESENCLAST16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI) +BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenclast_v32qi, "__builtin_ia32_vaesenclast_v32qi", IX86_BUILTIN_VAESENCLAST32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) +BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenclast_v64qi, "__builtin_ia32_vaesenclast_v64qi", IX86_BUILTIN_VAESENCLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) /* BF16 */ BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v32hi, "__builtin_ia32_cvtne2ps2bf16_v32hi", IX86_BUILTIN_CVTNE2PS2HI16_V32HI, UNKNOWN, (int) V32HI_FTYPE_V16SF_V16SF) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index bc2e6198007188c83386023fddaf0c3aa5e9420d..52e3274992819376d1d7767aaebd18db1c4146b7 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -12730,6 +12730,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, op1 = expand_normal (arg1); op2 = expand_normal (arg2); + if (GET_MODE (op1) != Pmode) + op1 = convert_to_mode (Pmode, op1, 1); + if (!address_operand (op2, VOIDmode)) { op2 = convert_memory_address (Pmode, op2); @@ -12765,6 +12768,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, emit_label (ok_label); emit_insn (gen_rtx_SET (target, pat)); + if (GET_MODE (op0) != Pmode) + op0 = convert_to_mode (Pmode, op0, 1); + for (i = 0; i < 8; i++) { op = gen_rtx_MEM (V2DImode, @@ -12789,6 +12795,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, if (!REG_P (op0)) op0 = copy_to_mode_reg (SImode, op0); + if (GET_MODE (op2) != Pmode) + op2 = convert_to_mode (Pmode, op2, 1); + op = gen_rtx_REG (V2DImode, GET_SSE_REGNO (0)); emit_move_insn (op, op1); @@ -12826,6 +12835,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, if (!REG_P (op0)) op0 = copy_to_mode_reg (SImode, op0); + if (GET_MODE (op3) != Pmode) + op3 = convert_to_mode (Pmode, op3, 1); + /* Force to use xmm0, xmm1 for keylow, keyhi*/ op = gen_rtx_REG (V2DImode, GET_SSE_REGNO (0)); emit_move_insn (op, op1); diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index 991661fe4a23139cb909bbfd5c7d557e5623cad2..061a15843181b28ca08ff4d67802da7d8bd03f9a 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -2802,6 +2802,9 @@ ix86_option_override_internal (bool main_args_p, { if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)) opts->x_ix86_move_max = PVW_AVX512; + /* Align with vectorizer to avoid potential STLF issue. */ + else if (TARGET_AVX_P (opts->x_ix86_isa_flags)) + opts->x_ix86_move_max = PVW_AVX256; else opts->x_ix86_move_max = PVW_AVX128; } @@ -2823,6 +2826,9 @@ ix86_option_override_internal (bool main_args_p, { if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)) opts->x_ix86_store_max = PVW_AVX512; + /* Align with vectorizer to avoid potential STLF issue. */ + else if (TARGET_AVX_P (opts->x_ix86_isa_flags)) + opts->x_ix86_store_max = PVW_AVX256; else opts->x_ix86_store_max = PVW_AVX128; } diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index ade965927ac1b585dd9ad9173f45d8c15b909274..e2743e0bd5cb582315fc0294c4d26a059e4e26bf 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -14359,9 +14359,19 @@ ix86_dirflag_mode_needed (rtx_insn *insn) static bool ix86_check_avx_upper_register (const_rtx exp) { - return (SSE_REG_P (exp) - && !EXT_REX_SSE_REG_P (exp) - && GET_MODE_BITSIZE (GET_MODE (exp)) > 128); + /* construct_container may return a parallel with expr_list + which contains the real reg and mode */ + subrtx_iterator::array_type array; + FOR_EACH_SUBRTX (iter, array, exp, NONCONST) + { + const_rtx x = *iter; + if (SSE_REG_P (x) + && !EXT_REX_SSE_REG_P (x) + && GET_MODE_BITSIZE (GET_MODE (x)) > 128) + return true; + } + + return false; } /* Check if a 256bit or 512bit AVX register is referenced in stores. */ @@ -14369,7 +14379,9 @@ ix86_check_avx_upper_register (const_rtx exp) static void ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data) { - if (ix86_check_avx_upper_register (dest)) + if (SSE_REG_P (dest) + && !EXT_REX_SSE_REG_P (dest) + && GET_MODE_BITSIZE (GET_MODE (dest)) > 128) { bool *used = (bool *) data; *used = true; @@ -14427,14 +14439,14 @@ ix86_avx_u128_mode_needed (rtx_insn *insn) return AVX_U128_CLEAN; } - subrtx_iterator::array_type array; - rtx set = single_set (insn); if (set) { rtx dest = SET_DEST (set); rtx src = SET_SRC (set); - if (ix86_check_avx_upper_register (dest)) + if (SSE_REG_P (dest) + && !EXT_REX_SSE_REG_P (dest) + && GET_MODE_BITSIZE (GET_MODE (dest)) > 128) { /* This is an YMM/ZMM load. Return AVX_U128_DIRTY if the source isn't zero. */ @@ -14445,9 +14457,8 @@ ix86_avx_u128_mode_needed (rtx_insn *insn) } else { - FOR_EACH_SUBRTX (iter, array, src, NONCONST) - if (ix86_check_avx_upper_register (*iter)) - return AVX_U128_DIRTY; + if (ix86_check_avx_upper_register (src)) + return AVX_U128_DIRTY; } /* This isn't YMM/ZMM load/store. */ @@ -14458,9 +14469,8 @@ ix86_avx_u128_mode_needed (rtx_insn *insn) Hardware changes state only when a 256bit register is written to, but we need to prevent the compiler from moving optimal insertion point above eventual read from 256bit or 512 bit register. */ - FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) - if (ix86_check_avx_upper_register (*iter)) - return AVX_U128_DIRTY; + if (ix86_check_avx_upper_register (PATTERN (insn))) + return AVX_U128_DIRTY; return AVX_U128_ANY; } diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 23b858ab21c46e7b27136c821ebc8ea1ba960cfb..c6a8e3011453323fd3300872a76b1beca8528011 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -3724,32 +3724,19 @@ ;; Since vpcmpd implicitly clear the upper bits of dest, transform ;; vpcmpd + zero_extend to vpcmpd since the instruction -(define_insn_and_split "*_cmp3_zero_extend" - [(set (match_operand:SWI248x 0 "register_operand") +(define_insn "*_cmp3_zero_extend" + [(set (match_operand:SWI248x 0 "register_operand" "=k") (zero_extend:SWI248x (unspec: - [(match_operand:V48H_AVX512VL 1 "nonimmediate_operand") - (match_operand:V48H_AVX512VL 2 "nonimmediate_operand") + [(match_operand:V48H_AVX512VL 1 "nonimmediate_operand" "v") + (match_operand:V48H_AVX512VL 2 "nonimmediate_operand" "vm") (match_operand:SI 3 "const_0_to_7_operand" "n")] UNSPEC_PCMP)))] "TARGET_AVX512F && (!VALID_MASK_AVX512BW_MODE (mode) || TARGET_AVX512BW) - && ix86_pre_reload_split () && (GET_MODE_NUNITS (mode) < GET_MODE_PRECISION (mode))" - "#" - "&& 1" - [(set (match_dup 0) - (unspec: - [(match_dup 1) - (match_dup 2) - (match_dup 3)] - UNSPEC_PCMP))] -{ - operands[1] = force_reg (mode, operands[1]); - operands[0] = lowpart_subreg (mode, - operands[0], mode); -} + "vcmp\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssecmp") (set_attr "length_immediate" "1") (set_attr "prefix" "evex") @@ -3777,21 +3764,22 @@ "#" "&& 1" [(set (match_dup 0) - (unspec: - [(match_dup 1) - (match_dup 2) - (match_dup 3)] - UNSPEC_PCMP)) - (set (match_dup 4) (match_dup 0))] + (zero_extend:SWI248x + (unspec: + [(match_dup 1) + (match_dup 2) + (match_dup 3)] + UNSPEC_PCMP))) + (set (match_dup 4) (match_dup 5))] { - operands[1] = force_reg (mode, operands[1]); - operands[0] = lowpart_subreg (mode, + operands[5] = lowpart_subreg (mode, operands[0], mode); -} - [(set_attr "type" "ssecmp") - (set_attr "length_immediate" "1") - (set_attr "prefix" "evex") - (set_attr "mode" "")]) + if (SUBREG_P (operands[5])) + { + SUBREG_PROMOTED_VAR_P (operands[5]) = 1; + SUBREG_PROMOTED_SET (operands[5], 1); + } +}) (define_insn_and_split "*_cmp3" [(set (match_operand: 0 "register_operand") @@ -3826,31 +3814,18 @@ (set_attr "prefix" "evex") (set_attr "mode" "")]) -(define_insn_and_split "*_cmp3_zero_extend" - [(set (match_operand:SWI248x 0 "register_operand") +(define_insn "*_cmp3_zero_extend" + [(set (match_operand:SWI248x 0 "register_operand" "=k") (zero_extend:SWI248x (unspec: - [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand") - (match_operand:VI12_AVX512VL 2 "nonimmediate_operand") - (match_operand:SI 3 "const_0_to_7_operand")] + [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v") + (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm") + (match_operand:SI 3 "const_0_to_7_operand" "n")] UNSPEC_PCMP)))] "TARGET_AVX512BW - && ix86_pre_reload_split () - && (GET_MODE_NUNITS (mode) - < GET_MODE_PRECISION (mode))" - "#" - "&& 1" - [(set (match_dup 0) - (unspec: - [(match_dup 1) - (match_dup 2) - (match_dup 3)] - UNSPEC_PCMP))] -{ - operands[1] = force_reg (mode, operands[1]); - operands[0] = lowpart_subreg (mode, - operands[0], mode); -} + && (GET_MODE_NUNITS (mode) + < GET_MODE_PRECISION (mode))" + "vpcmp\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssecmp") (set_attr "length_immediate" "1") (set_attr "prefix" "evex") @@ -3877,16 +3852,21 @@ "#" "&& 1" [(set (match_dup 0) - (unspec: - [(match_dup 1) - (match_dup 2) - (match_dup 3)] - UNSPEC_PCMP)) - (set (match_dup 4) (match_dup 0))] + (zero_extend:SWI248x + (unspec: + [(match_dup 1) + (match_dup 2) + (match_dup 3)] + UNSPEC_PCMP))) + (set (match_dup 4) (match_dup 5))] { - operands[1] = force_reg (mode, operands[1]); - operands[0] = lowpart_subreg (mode, + operands[5] = lowpart_subreg (mode, operands[0], mode); + if (SUBREG_P (operands[5])) + { + SUBREG_PROMOTED_VAR_P (operands[5]) = 1; + SUBREG_PROMOTED_SET (operands[5], 1); + } } [(set_attr "type" "ssecmp") (set_attr "length_immediate" "1") @@ -3945,31 +3925,18 @@ (set_attr "prefix" "evex") (set_attr "mode" "")]) -(define_insn_and_split "*_ucmp3_zero_extend" - [(set (match_operand:SWI248x 0 "register_operand") +(define_insn "*_ucmp3_zero_extend" + [(set (match_operand:SWI248x 0 "register_operand" "=k") (zero_extend:SWI248x (unspec: - [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand") - (match_operand:VI12_AVX512VL 2 "nonimmediate_operand") - (match_operand:SI 3 "const_0_to_7_operand")] + [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v") + (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm") + (match_operand:SI 3 "const_0_to_7_operand" "n")] UNSPEC_UNSIGNED_PCMP)))] "TARGET_AVX512BW - && ix86_pre_reload_split () && (GET_MODE_NUNITS (mode) < GET_MODE_PRECISION (mode))" - "#" - "&& 1" - [(set (match_dup 0) - (unspec: - [(match_dup 1) - (match_dup 2) - (match_dup 3)] - UNSPEC_UNSIGNED_PCMP))] -{ - operands[1] = force_reg (mode, operands[1]); - operands[0] = lowpart_subreg (mode, - operands[0], mode); -} + "vpcmpu\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssecmp") (set_attr "length_immediate" "1") (set_attr "prefix" "evex") @@ -3997,16 +3964,21 @@ "#" "&& 1" [(set (match_dup 0) - (unspec: - [(match_dup 1) - (match_dup 2) - (match_dup 3)] - UNSPEC_UNSIGNED_PCMP)) - (set (match_dup 4) (match_dup 0))] -{ - operands[1] = force_reg (mode, operands[1]); - operands[0] = lowpart_subreg (mode, + (zero_extend:SWI248x + (unspec: + [(match_dup 1) + (match_dup 2) + (match_dup 3)] + UNSPEC_UNSIGNED_PCMP))) + (set (match_dup 4) (match_dup 5))] +{ + operands[5] = lowpart_subreg (mode, operands[0], mode); + if (SUBREG_P (operands[5])) + { + SUBREG_PROMOTED_VAR_P (operands[5]) = 1; + SUBREG_PROMOTED_SET (operands[5], 1); + } } [(set_attr "type" "ssecmp") (set_attr "length_immediate" "1") @@ -4043,32 +4015,19 @@ (set_attr "prefix" "evex") (set_attr "mode" "")]) -(define_insn_and_split "*_ucmp3_zero_extend" - [(set (match_operand:SWI248x 0 "register_operand") +(define_insn "*_ucmp3_zero_extend" + [(set (match_operand:SWI248x 0 "register_operand" "=k") (zero_extend:SWI248x (unspec: - [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand") - (match_operand:VI48_AVX512VL 2 "nonimmediate_operand") - (match_operand:SI 3 "const_0_to_7_operand")] + [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "v") + (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm") + (match_operand:SI 3 "const_0_to_7_operand" "n")] UNSPEC_UNSIGNED_PCMP)))] "TARGET_AVX512F && (!VALID_MASK_AVX512BW_MODE (mode) || TARGET_AVX512BW) - && ix86_pre_reload_split () && (GET_MODE_NUNITS (mode) < GET_MODE_PRECISION (mode))" - "#" - "&& 1" - [(set (match_dup 0) - (unspec: - [(match_dup 1) - (match_dup 2) - (match_dup 3)] - UNSPEC_UNSIGNED_PCMP))] -{ - operands[1] = force_reg (mode, operands[1]); - operands[0] = lowpart_subreg (mode, - operands[0], mode); -} + "vpcmpu\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssecmp") (set_attr "length_immediate" "1") (set_attr "prefix" "evex") @@ -4096,16 +4055,21 @@ "#" "&& 1" [(set (match_dup 0) - (unspec: - [(match_dup 1) - (match_dup 2) - (match_dup 3)] - UNSPEC_UNSIGNED_PCMP)) - (set (match_dup 4) (match_dup 0))] -{ - operands[1] = force_reg (mode, operands[1]); - operands[0] = lowpart_subreg (mode, + (zero_extend:SWI248x + (unspec: + [(match_dup 1) + (match_dup 2) + (match_dup 3)] + UNSPEC_UNSIGNED_PCMP))) + (set (match_dup 4) (match_dup 5))] +{ + operands[5] = lowpart_subreg (mode, operands[0], mode); + if (SUBREG_P (operands[5])) + { + SUBREG_PROMOTED_VAR_P (operands[5]) = 1; + SUBREG_PROMOTED_SET (operands[5], 1); + } } [(set_attr "type" "ssecmp") (set_attr "length_immediate" "1") @@ -9162,7 +9126,10 @@ (match_dup 2) (parallel [(const_int 0) (const_int 1)]))))] "TARGET_SSE2" - "operands[2] = gen_reg_rtx (V4SFmode);") +{ + operands[2] = gen_reg_rtx (V4SFmode); + emit_move_insn (operands[2], CONST0_RTX (V4SFmode)); +}) (define_expand "vec_unpacks_hi_v8sf" [(set (match_dup 2) @@ -13886,7 +13853,7 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn "*avx512vl_v2div2qi2_mask_store_1" +(define_insn "avx512vl_v2div2qi2_mask_store_1" [(set (match_operand:V2QI 0 "memory_operand" "=m") (vec_merge:V2QI (any_truncate:V2QI @@ -13900,28 +13867,19 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn_and_split "avx512vl_v2div2qi2_mask_store_2" - [(set (match_operand:HI 0 "memory_operand") - (subreg:HI - (vec_merge:V2QI - (any_truncate:V2QI - (match_operand:V2DI 1 "register_operand")) - (vec_select:V2QI - (subreg:V4QI - (vec_concat:V2HI - (match_dup 0) - (const_int 0)) 0) - (parallel [(const_int 0) (const_int 1)])) - (match_operand:QI 2 "register_operand")) 0))] - "TARGET_AVX512VL && ix86_pre_reload_split ()" - "#" - "&& 1" - [(set (match_dup 0) - (vec_merge:V2QI - (any_truncate:V2QI (match_dup 1)) - (match_dup 0) - (match_dup 2)))] - "operands[0] = adjust_address_nv (operands[0], V2QImode, 0);") +(define_expand "avx512vl_v2div2qi2_mask_store_2" + [(match_operand:HI 0 "memory_operand") + (any_truncate:V2QI + (match_operand:V2DI 1 "register_operand")) + (match_operand:QI 2 "register_operand")] + "TARGET_AVX512VL" +{ + operands[0] = adjust_address_nv (operands[0], V2QImode, 0); + emit_insn (gen_avx512vl_v2div2qi2_mask_store_1 (operands[0], + operands[1], + operands[2])); + DONE; +}) (define_insn "*avx512vl_v4qi2_store_1" [(set (match_operand:V4QI 0 "memory_operand" "=m") @@ -13990,7 +13948,7 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn "*avx512vl_v4qi2_mask_store_1" +(define_insn "avx512vl_v4qi2_mask_store_1" [(set (match_operand:V4QI 0 "memory_operand" "=m") (vec_merge:V4QI (any_truncate:V4QI @@ -14004,29 +13962,19 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn_and_split "avx512vl_v4qi2_mask_store_2" - [(set (match_operand:SI 0 "memory_operand") - (subreg:SI - (vec_merge:V4QI - (any_truncate:V4QI - (match_operand:VI4_128_8_256 1 "register_operand")) - (vec_select:V4QI - (subreg:V8QI - (vec_concat:V2SI - (match_dup 0) - (const_int 0)) 0) - (parallel [(const_int 0) (const_int 1) - (const_int 2) (const_int 3)])) - (match_operand:QI 2 "register_operand")) 0))] - "TARGET_AVX512VL && ix86_pre_reload_split ()" - "#" - "&& 1" - [(set (match_dup 0) - (vec_merge:V4QI - (any_truncate:V4QI (match_dup 1)) - (match_dup 0) - (match_dup 2)))] - "operands[0] = adjust_address_nv (operands[0], V4QImode, 0);") +(define_expand "avx512vl_v4qi2_mask_store_2" + [(match_operand:SI 0 "memory_operand") + (any_truncate:V4QI + (match_operand:VI4_128_8_256 1 "register_operand")) + (match_operand:QI 2 "register_operand")] + "TARGET_AVX512VL" +{ + operands[0] = adjust_address_nv (operands[0], V4QImode, 0); + emit_insn (gen_avx512vl_v4qi2_mask_store_1 (operands[0], + operands[1], + operands[2])); + DONE; +}) (define_mode_iterator VI2_128_BW_4_256 [(V8HI "TARGET_AVX512BW") V8SI]) @@ -14098,7 +14046,7 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn "*avx512vl_v8qi2_mask_store_1" +(define_insn "avx512vl_v8qi2_mask_store_1" [(set (match_operand:V8QI 0 "memory_operand" "=m") (vec_merge:V8QI (any_truncate:V8QI @@ -14112,31 +14060,19 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn_and_split "avx512vl_v8qi2_mask_store_2" - [(set (match_operand:DI 0 "memory_operand") - (subreg:DI - (vec_merge:V8QI - (any_truncate:V8QI - (match_operand:VI2_128_BW_4_256 1 "register_operand")) - (vec_select:V8QI - (subreg:V16QI - (vec_concat:V2DI - (match_dup 0) - (const_int 0)) 0) - (parallel [(const_int 0) (const_int 1) - (const_int 2) (const_int 3) - (const_int 4) (const_int 5) - (const_int 6) (const_int 7)])) - (match_operand:QI 2 "register_operand")) 0))] - "TARGET_AVX512VL && ix86_pre_reload_split ()" - "#" - "&& 1" - [(set (match_dup 0) - (vec_merge:V8QI - (any_truncate:V8QI (match_dup 1)) - (match_dup 0) - (match_dup 2)))] - "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);") +(define_expand "avx512vl_v8qi2_mask_store_2" + [(match_operand:DI 0 "memory_operand") + (any_truncate:V8QI + (match_operand:VI2_128_BW_4_256 1 "register_operand")) + (match_operand:QI 2 "register_operand")] + "TARGET_AVX512VL" +{ + operands[0] = adjust_address_nv (operands[0], V8QImode, 0); + emit_insn (gen_avx512vl_v8qi2_mask_store_1 (operands[0], + operands[1], + operands[2])); + DONE; +}) (define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI]) (define_mode_attr pmov_dst_4 @@ -14258,7 +14194,7 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn "*avx512vl_v4hi2_mask_store_1" +(define_insn "avx512vl_v4hi2_mask_store_1" [(set (match_operand:V4HI 0 "memory_operand" "=m") (vec_merge:V4HI (any_truncate:V4HI @@ -14276,30 +14212,19 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn_and_split "avx512vl_v4hi2_mask_store_2" - [(set (match_operand:DI 0 "memory_operand") - (subreg:DI - (vec_merge:V4HI - (any_truncate:V4HI - (match_operand:VI4_128_8_256 1 "register_operand")) - (vec_select:V4HI - (subreg:V8HI - (vec_concat:V2DI - (match_dup 0) - (const_int 0)) 0) - (parallel [(const_int 0) (const_int 1) - (const_int 2) (const_int 3)])) - (match_operand:QI 2 "register_operand")) 0))] - "TARGET_AVX512VL && ix86_pre_reload_split ()" - "#" - "&& 1" - [(set (match_dup 0) - (vec_merge:V4HI - (any_truncate:V4HI (match_dup 1)) - (match_dup 0) - (match_dup 2)))] - "operands[0] = adjust_address_nv (operands[0], V4HImode, 0);") - +(define_expand "avx512vl_v4hi2_mask_store_2" + [(match_operand:DI 0 "memory_operand") + (any_truncate:V4HI + (match_operand:VI4_128_8_256 1 "register_operand")) + (match_operand:QI 2 "register_operand")] + "TARGET_AVX512VL" +{ + operands[0] = adjust_address_nv (operands[0], V4HImode, 0); + emit_insn (gen_avx512vl_v4hi2_mask_store_1 (operands[0], + operands[1], + operands[2])); + DONE; +}) (define_insn "*avx512vl_v2div2hi2_store_1" [(set (match_operand:V2HI 0 "memory_operand" "=m") @@ -14360,7 +14285,7 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn "*avx512vl_v2div2hi2_mask_store_1" +(define_insn "avx512vl_v2div2hi2_mask_store_1" [(set (match_operand:V2HI 0 "memory_operand" "=m") (vec_merge:V2HI (any_truncate:V2HI @@ -14374,28 +14299,19 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn_and_split "avx512vl_v2div2hi2_mask_store_2" - [(set (match_operand:SI 0 "memory_operand") - (subreg:SI - (vec_merge:V2HI - (any_truncate:V2HI - (match_operand:V2DI 1 "register_operand")) - (vec_select:V2HI - (subreg:V4HI - (vec_concat:V2SI - (match_dup 0) - (const_int 0)) 0) - (parallel [(const_int 0) (const_int 1)])) - (match_operand:QI 2 "register_operand")) 0))] - "TARGET_AVX512VL && ix86_pre_reload_split ()" - "#" - "&& 1" - [(set (match_dup 0) - (vec_merge:V2HI - (any_truncate:V2HI (match_dup 1)) - (match_dup 0) - (match_dup 2)))] - "operands[0] = adjust_address_nv (operands[0], V2HImode, 0);") +(define_expand "avx512vl_v2div2hi2_mask_store_2" + [(match_operand:SI 0 "memory_operand") + (any_truncate:V2HI + (match_operand:V2DI 1 "register_operand")) + (match_operand:QI 2 "register_operand")] + "TARGET_AVX512VL" +{ + operands[0] = adjust_address_nv (operands[0], V2HImode, 0); + emit_insn (gen_avx512vl_v2div2hi2_mask_store_1 (operands[0], + operands[1], + operands[2])); + DONE; +}) (define_expand "truncv2div2si2" [(set (match_operand:V2SI 0 "register_operand") @@ -14503,7 +14419,7 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn "*avx512vl_v2div2si2_mask_store_1" +(define_insn "avx512vl_v2div2si2_mask_store_1" [(set (match_operand:V2SI 0 "memory_operand" "=m") (vec_merge:V2SI (any_truncate:V2SI @@ -14517,28 +14433,19 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn_and_split "avx512vl_v2div2si2_mask_store_2" - [(set (match_operand:DI 0 "memory_operand") - (subreg:DI - (vec_merge:V2SI - (any_truncate:V2SI - (match_operand:V2DI 1 "register_operand")) - (vec_select:V2SI - (subreg:V4SI - (vec_concat:V2DI - (match_dup 0) - (const_int 0)) 0) - (parallel [(const_int 0) (const_int 1)])) - (match_operand:QI 2 "register_operand")) 0))] - "TARGET_AVX512VL && ix86_pre_reload_split ()" - "#" - "&& 1" - [(set (match_dup 0) - (vec_merge:V2SI - (any_truncate:V2SI (match_dup 1)) - (match_dup 0) - (match_dup 2)))] - "operands[0] = adjust_address_nv (operands[0], V2SImode, 0);") +(define_expand "avx512vl_v2div2si2_mask_store_2" + [(match_operand:DI 0 "memory_operand") + (any_truncate:V2SI + (match_operand:V2DI 1 "register_operand")) + (match_operand:QI 2 "register_operand")] + "TARGET_AVX512VL" +{ + operands[0] = adjust_address_nv (operands[0], V2SImode, 0); + emit_insn (gen_avx512vl_v2div2si2_mask_store_1 (operands[0], + operands[1], + operands[2])); + DONE; +}) (define_expand "truncv8div8qi2" [(set (match_operand:V8QI 0 "register_operand") @@ -14637,7 +14544,7 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn "*avx512f_v8div16qi2_mask_store_1" +(define_insn "avx512f_v8div16qi2_mask_store_1" [(set (match_operand:V8QI 0 "memory_operand" "=m") (vec_merge:V8QI (any_truncate:V8QI @@ -14651,31 +14558,19 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn_and_split "avx512f_v8div16qi2_mask_store_2" - [(set (match_operand:DI 0 "memory_operand") - (subreg:DI - (vec_merge:V8QI - (any_truncate:V8QI - (match_operand:V8DI 1 "register_operand")) - (vec_select:V8QI - (subreg:V16QI - (vec_concat:V2DI - (match_dup 0) - (const_int 0)) 0) - (parallel [(const_int 0) (const_int 1) - (const_int 2) (const_int 3) - (const_int 4) (const_int 5) - (const_int 6) (const_int 7)])) - (match_operand:QI 2 "register_operand")) 0))] - "TARGET_AVX512F && ix86_pre_reload_split ()" - "#" - "&& 1" - [(set (match_dup 0) - (vec_merge:V8QI - (any_truncate:V8QI (match_dup 1)) - (match_dup 0) - (match_dup 2)))] - "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);") +(define_expand "avx512f_v8div16qi2_mask_store_2" + [(match_operand:DI 0 "memory_operand") + (any_truncate:V8QI + (match_operand:V8DI 1 "register_operand")) + (match_operand:QI 2 "register_operand")] + "TARGET_AVX512F" +{ + operands[0] = adjust_address_nv (operands[0], V8QImode, 0); + emit_insn (gen_avx512f_v8div16qi2_mask_store_1 (operands[0], + operands[1], + operands[2])); + DONE; +}) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 109858f7666784c2c2cc1af68b18c2a6869b410b..90073ac983252c6ccf3237143ee053411b36df64 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -31441,6 +31441,7 @@ Intel Core 2 CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, CX16, SAHF and FXSR instruction set support. @item nehalem +@itemx corei7 Intel Nehalem CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF and FXSR instruction set support. @@ -31449,17 +31450,20 @@ Intel Westmere CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR and PCLMUL instruction set support. @item sandybridge +@itemx corei7-avx Intel Sandy Bridge CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE and PCLMUL instruction set support. @item ivybridge +@itemx core-avx-i Intel Ivy Bridge CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND and F16C instruction set support. @item haswell -Intel Haswell CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, +@itemx core-avx2 +Intel Haswell CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE and HLE instruction set support. @@ -31475,47 +31479,6 @@ SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, AES, CLFLUSHOPT, XSAVEC, XSAVES and SGX instruction set support. -@item bonnell -Intel Bonnell CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3 and SSSE3 -instruction set support. - -@item silvermont -Intel Silvermont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, -SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW and RDRND -instruction set support. - -@item goldmont -Intel Goldmont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, -SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW, RDRND, AES, SHA, -RDSEED, XSAVE, XSAVEC, XSAVES, XSAVEOPT, CLFLUSHOPT and FSGSBASE instruction -set support. - -@item goldmont-plus -Intel Goldmont Plus CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, -SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW, RDRND, AES, -SHA, RDSEED, XSAVE, XSAVEC, XSAVES, XSAVEOPT, CLFLUSHOPT, FSGSBASE, PTWRITE, -RDPID and SGX instruction set support. - -@item tremont -Intel Tremont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, -SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW, RDRND, AES, SHA, -RDSEED, XSAVE, XSAVEC, XSAVES, XSAVEOPT, CLFLUSHOPT, FSGSBASE, PTWRITE, RDPID, -SGX, CLWB, GFNI-SSE, MOVDIRI, MOVDIR64B, CLDEMOTE and WAITPKG instruction set -support. - -@item knl -Intel Knight's Landing CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, -SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, -RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, -AVX512PF, AVX512ER, AVX512F, AVX512CD and PREFETCHWT1 instruction set support. - -@item knm -Intel Knights Mill CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, -SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, -RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, -AVX512PF, AVX512ER, AVX512F, AVX512CD and PREFETCHWT1, AVX5124VNNIW, -AVX5124FMAPS and AVX512VPOPCNTDQ instruction set support. - @item skylake-avx512 Intel Skylake Server CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, @@ -31523,16 +31486,30 @@ RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, CLWB, AVX512VL, AVX512BW, AVX512DQ and AVX512CD instruction set support. +@item cascadelake +Intel Cascade Lake CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, +SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, +F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, AES, +CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, CLWB, AVX512VL, AVX512BW, AVX512DQ, +AVX512CD and AVX512VNNI instruction set support. + @item cannonlake -Intel Cannonlake Server CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, +Intel Cannon Lake Server CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ, AVX512CD, PKU, AVX512VBMI, AVX512IFMA and SHA instruction set support. +@item cooperlake +Intel Cooper Lake CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, +SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, +F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, AES, +CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, CLWB, AVX512VL, AVX512BW, AVX512DQ, +AVX512CD, AVX512VNNI and AVX512BF16 instruction set support. + @item icelake-client -Intel Icelake Client CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, +Intel Ice Lake Client CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ, @@ -31540,7 +31517,7 @@ AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2 , VPCLMULQDQ, AVX512BITALG, RDPID and AVX512VPOPCNTDQ instruction set support. @item icelake-server -Intel Icelake Server CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, +Intel Ice Lake Server CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ, @@ -31548,55 +31525,84 @@ AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2 , VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, WBNOINVD and CLWB instruction set support. -@item cascadelake -Intel Cascadelake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, -SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, -F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, AES, -CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, CLWB, AVX512VL, AVX512BW, AVX512DQ, -AVX512CD and AVX512VNNI instruction set support. - -@item cooperlake -Intel cooperlake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, +@item tigerlake +Intel Tiger Lake CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, AES, -CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, CLWB, AVX512VL, AVX512BW, AVX512DQ, -AVX512CD, AVX512VNNI and AVX512BF16 instruction set support. +CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ, +AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2, +VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, MOVDIRI, MOVDIR64B, CLWB, +AVX512VP2INTERSECT and KEYLOCKER instruction set support. -@item tigerlake -Intel Tigerlake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, +@item rocketlake +Intel Rocket Lake CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, AES, -CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ, AVX512CD +CLFLUSHOPT, XSAVEC, XSAVES, AVX512F, AVX512VL, AVX512BW, AVX512DQ, AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2, -VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, MOVDIRI, MOVDIR64B, CLWB, -AVX512VP2INTERSECT and KEYLOCKER instruction set support. +VPCLMULQDQ, AVX512BITALG, RDPID and AVX512VPOPCNTDQ instruction set support. + +@item alderlake +Intel Alder Lake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, +SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, XSAVES, +XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI, MOVDIR64B, +CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, PCONFIG, PKU, +VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL and AVX-VNNI instruction set +support. @item sapphirerapids -Intel sapphirerapids CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, -SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, -RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, -AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ, +Intel Sapphire Rapids CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, +SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, +F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, AES, +CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ, AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2, VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, WBNOINVD, CLWB, MOVDIRI, MOVDIR64B, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512-FP16 and AVX512BF16 instruction set support. -@item alderlake -Intel Alderlake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, -SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, XSAVES, -XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI, MOVDIR64B, -CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, PCONFIG, PKU, -VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL and AVX-VNNI instruction set +@item bonnell +@itemx atom +Intel Bonnell CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3 and SSSE3 +instruction set support. + +@item silvermont +@itemx slm +Intel Silvermont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, +SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW and RDRND +instruction set support. + +@item goldmont +Intel Goldmont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, +SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW, RDRND, AES, SHA, +RDSEED, XSAVE, XSAVEC, XSAVES, XSAVEOPT, CLFLUSHOPT and FSGSBASE instruction +set support. + +@item goldmont-plus +Intel Goldmont Plus CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, +SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW, RDRND, AES, +SHA, RDSEED, XSAVE, XSAVEC, XSAVES, XSAVEOPT, CLFLUSHOPT, FSGSBASE, PTWRITE, +RDPID and SGX instruction set support. + +@item tremont +Intel Tremont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, +SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW, RDRND, AES, SHA, +RDSEED, XSAVE, XSAVEC, XSAVES, XSAVEOPT, CLFLUSHOPT, FSGSBASE, PTWRITE, RDPID, +SGX, CLWB, GFNI-SSE, MOVDIRI, MOVDIR64B, CLDEMOTE and WAITPKG instruction set support. -@item rocketlake -Intel Rocketlake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3 -, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, -F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, AES, -CLFLUSHOPT, XSAVEC, XSAVES, AVX512F, AVX512VL, AVX512BW, AVX512DQ, AVX512CD -PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2, -VPCLMULQDQ, AVX512BITALG, RDPID and AVX512VPOPCNTDQ instruction set support. +@item knl +Intel Knights Landing CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, +SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, +RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, +AVX512PF, AVX512ER, AVX512F, AVX512CD and PREFETCHWT1 instruction set support. + +@item knm +Intel Knights Mill CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, +SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, +RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, +AVX512PF, AVX512ER, AVX512F, AVX512CD and PREFETCHWT1, AVX5124VNNIW, +AVX5124FMAPS and AVX512VPOPCNTDQ instruction set support. @item graniterapids Intel graniterapids CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c index b1165f069bb589d45a0fc3c38f9b280e6b334eb6..e7d6183232b775fa86cabf70ecfc97fab3c2b40e 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c @@ -1,8 +1,7 @@ /* PR target/103750 */ /* { dg-do compile } */ /* { dg-options "-O2 -mavx512bw -mavx512vl" } */ -/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */ -/* xfail need to be fixed. */ +/* { dg-final { scan-assembler-not "kmov" } } */ #include extern __m128i* pi128; diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpalignr-1b.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpalignr-1b.c new file mode 100644 index 0000000000000000000000000000000000000000..2b42aa90b91868445c6b926eaae64cd48693f540 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpalignr-1b.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -mavx512bw -mavx512vl" } */ +/* { dg-final { scan-assembler-times "vpalignr\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpalignr\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ + +#include + +volatile __m256i y; +volatile __m128i x; +volatile __mmask32 m2; +volatile __mmask16 m3; + +void extern +avx512bw_test (void) +{ + y = _mm256_mask_alignr_epi8 (y, m2, y, y, 10); + x = _mm_mask_alignr_epi8 (x, m3, x, x, 10); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vfpclasssd-1b.c b/gcc/testsuite/gcc.target/i386/avx512dq-vfpclasssd-1b.c new file mode 100644 index 0000000000000000000000000000000000000000..8c7f96fb7a702a89039a3ab5abf82fa8c21a940b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512dq-vfpclasssd-1b.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512dq -O0" } */ +/* { dg-final { scan-assembler-times "vfpclasssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ + +#include + +volatile __m128d x128; +volatile __mmask8 m8; + +void extern +avx512dq_test (void) +{ + m8 = _mm_mask_fpclass_sd_mask (m8, x128, 13); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vfpclassss-1b.c b/gcc/testsuite/gcc.target/i386/avx512dq-vfpclassss-1b.c new file mode 100644 index 0000000000000000000000000000000000000000..3196fd60d64be9ca8b86f6dd908396a1b5f5d188 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512dq-vfpclassss-1b.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512dq -O0" } */ +/* { dg-final { scan-assembler-times "vfpclassss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ + +#include + +volatile __m128 x128; +volatile __mmask8 m8; + +void extern +avx512dq_test (void) +{ + m8 = _mm_mask_fpclass_ss_mask (m8, x128, 13); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vreducesd-1b.c b/gcc/testsuite/gcc.target/i386/avx512dq-vreducesd-1b.c new file mode 100644 index 0000000000000000000000000000000000000000..9ae8259d373c50158ecd05ca4c42c68853a7276c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512dq-vreducesd-1b.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512dq -O0" } */ +/* { dg-final { scan-assembler-times "vreducesd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ + +#include + +#define IMM 123 + +volatile __m128d x1, x2, xx1, xx2; +volatile __mmask8 m; + +void extern +avx512dq_test (void) +{ + xx1 = _mm_reduce_round_sd (xx1, xx2, IMM, _MM_FROUND_NO_EXC); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vreducess-1b.c b/gcc/testsuite/gcc.target/i386/avx512dq-vreducess-1b.c new file mode 100644 index 0000000000000000000000000000000000000000..47bf48fb617885a3a7c635e56c09f966f8c112e1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512dq-vreducess-1b.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512dq -O0" } */ +/* { dg-final { scan-assembler-times "vreducess\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ + +#include + +#define IMM 123 + +volatile __m128 x1, x2, xx1, xx2; +volatile __mmask8 m; + +void extern +avx512dq_test (void) +{ + xx1 = _mm_reduce_round_ss (xx1, xx2, IMM, _MM_FROUND_NO_EXC); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vfpclassph-1c.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vfpclassph-1c.c new file mode 100644 index 0000000000000000000000000000000000000000..4739f1228e3262ef346d8df46299a03fc4a9c2c0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vfpclassph-1c.c @@ -0,0 +1,77 @@ +/* { dg-do run } */ +/* { dg-options "-O0 -mavx512fp16" } */ +/* { dg-require-effective-target avx512fp16 } */ + +#define AVX512FP16 +#include "avx512f-helper.h" + +#include +#include +#include +#define SIZE (AVX512F_LEN / 16) +#include "avx512f-mask-type.h" + +#ifndef __FPCLASSPH__ +#define __FPCLASSPH__ +int check_fp_class_hp (_Float16 src, int imm) +{ + int qNaN_res = isnan (src); + int sNaN_res = isnan (src); + int Pzero_res = (src == 0.0); + int Nzero_res = (src == -0.0); + int PInf_res = (isinf (src) == 1); + int NInf_res = (isinf (src) == -1); + int Denorm_res = (fpclassify (src) == FP_SUBNORMAL); + int FinNeg_res = __builtin_finite (src) && (src < 0); + + int result = (((imm & 1) && qNaN_res) + || (((imm >> 1) & 1) && Pzero_res) + || (((imm >> 2) & 1) && Nzero_res) + || (((imm >> 3) & 1) && PInf_res) + || (((imm >> 4) & 1) && NInf_res) + || (((imm >> 5) & 1) && Denorm_res) + || (((imm >> 6) & 1) && FinNeg_res) + || (((imm >> 7) & 1) && sNaN_res)); + return result; +} +#endif + +MASK_TYPE +CALC (_Float16 *s1, int imm) +{ + int i; + MASK_TYPE res = 0; + + for (i = 0; i < SIZE; i++) + if (check_fp_class_hp(s1[i], imm)) + res = res | (1 << i); + + return res; +} + +void +TEST (void) +{ + int i; + UNION_TYPE (AVX512F_LEN, h) src; + MASK_TYPE res1, res2, res_ref = 0; + MASK_TYPE mask = MASK_VALUE; + + src.a[SIZE - 1] = NAN; + src.a[SIZE - 2] = 1.0 / 0.0; + for (i = 0; i < SIZE - 2; i++) + { + src.a[i] = -24.43 + 0.6 * i; + } + + res1 = INTRINSIC (_fpclass_ph_mask) (src.x, 0xFF); + res2 = INTRINSIC (_mask_fpclass_ph_mask) (mask, src.x, 0xFF); + + res_ref = CALC (src.a, 0xFF); + + if (res_ref != res1) + abort (); + + if ((mask & res_ref) != res2) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-valignq-1b.c b/gcc/testsuite/gcc.target/i386/avx512vl-valignq-1b.c new file mode 100644 index 0000000000000000000000000000000000000000..0ab16b27733845f7eb871b2bfe61ee0d0b7f20a2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-valignq-1b.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -mavx512vl" } */ +/* { dg-final { scan-assembler-times "valignq\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ + +#include + +volatile __m256i y; +volatile __m128i x; +volatile __mmask8 m; + +void extern +avx512vl_test (void) +{ + x = _mm_mask_alignr_epi64 (x, m, x, x, 1); +} diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c index 5faee21f9b9931c8bf8294bcd5d551815c4dab45..78f92ac5197dcfcf765f07083e2ef027c2250488 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mmove-max=128 -mstore-max=128 -mtune=sandybridge" } */ extern char *dst, *src; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c new file mode 100644 index 0000000000000000000000000000000000000000..605b3623ffc50f13bab4c27596db78ac493aaea3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c @@ -0,0 +1,12 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */ + +extern char *dst, *src; + +void +foo (void) +{ + __builtin_memcpy (dst, src, 33); +} + +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c index 5f99cc98c4720c902ad4ce93bca5372bb98438b4..57b74ae4b2308621590a21c9abc4ad038ea4933b 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c @@ -1,5 +1,5 @@ /* { dg-do compile { target { ! ia32 } } } */ -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mmove-max=128 -mstore-max=128 -mtune=sandybridge" } */ extern char *dst, *src; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-38.c b/gcc/testsuite/gcc.target/i386/pieces-memset-38.c index ed4a24a54fda5b767ad54a92f490596b1f773d5c..d9443678735d64d30c9b87be7c7b96b347af835e 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memset-38.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-38.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */ +/* { dg-options "-O2 -mno-avx512f -mavx2 -mmove-max=128 -mstore-max=128 -mtune=sandybridge" } */ extern char *dst; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-40.c b/gcc/testsuite/gcc.target/i386/pieces-memset-40.c index 4eda73ead5924786994f4edd77b5a8f5ff7db7e0..8ad6ad7e494ac147205a467e08c060ebc77f09d0 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memset-40.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-40.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */ +/* { dg-options "-O2 -mno-avx512f -mavx2 -mmove-max=128 -mstore-max=128 -mtune=sandybridge" } */ extern char *dst; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-41.c b/gcc/testsuite/gcc.target/i386/pieces-memset-41.c index 93df8101e4d0fe5654555d8fac7a1ea4344f1c80..08fd6e9a92783a9631956122da771bb34dbd24a0 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memset-41.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-41.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge -mno-stackrealign" } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mmove-max=128 -mstore-max=128 -mtune=sandybridge -mno-stackrealign" } */ extern char *dst; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-42.c b/gcc/testsuite/gcc.target/i386/pieces-memset-42.c index df0c122aae71d4d010064719a632e5b42895b9d0..6b73bb256af660c7dbc8bd42a0642a3701e61b50 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memset-42.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-42.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mmove-max=128 -mstore-max=128 -mtune=sandybridge" } */ extern char *dst; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-43.c b/gcc/testsuite/gcc.target/i386/pieces-memset-43.c index 2f2179c2df9e3797b5f6b7276cd00cb4a4c8be92..c6c7ff234dab657fd45bfcf9a85f74e7cde247ee 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memset-43.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-43.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mmove-max=128 -mstore-max=128 -mtune=sandybridge" } */ extern char *dst; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-51.c b/gcc/testsuite/gcc.target/i386/pieces-memset-51.c new file mode 100644 index 0000000000000000000000000000000000000000..192ec0d1647db21c46b0b84b23167ec46c5e02aa --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-51.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */ + +extern char *dst; + +void +foo (int x) +{ + __builtin_memset (dst, x, 64); +} + +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c index 90446edb4f35086353359af1801fafe73a0ab74d..40ada119625d7afef5f24db04d60255a1c2b6453 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c +++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c @@ -1,5 +1,5 @@ /* { dg-do compile { target { ! ia32 } } } */ -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mmove-max=128 -mstore-max=128 -mtune=sandybridge" } */ extern char *strcpy (char *, const char *); diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c new file mode 100644 index 0000000000000000000000000000000000000000..df7571b547fc6aab7c9818da75615ebc617e7843 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c @@ -0,0 +1,15 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */ + +extern char *strcpy (char *, const char *); + +void +foo (char *s) +{ + strcpy (s, + "1234567890abcdef123456abcdef5678123456abcdef567abcdef678" + "1234567"); +} + +/* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\[^\n\]*%ymm" 2 } } */ +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr116043.c b/gcc/testsuite/gcc.target/i386/pr116043.c new file mode 100644 index 0000000000000000000000000000000000000000..76553496c109dc809d5d56382229ece72544509c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr116043.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512bf16 -O3" } */ +/* { dg-final { scan-assembler-not {(?n)lea.*@gottpoff} } } */ + +extern __thread int a, c, i, j, k, l; +int *b; +struct d { + int e; +} f, g; +char *h; + +void m(struct d *n) { + b = &k; + for (; n->e; b++, n--) { + i = b && a; + if (i) + j = c; + } +} + +char *o(struct d *n) { + for (; n->e;) + return h; +} + +int q() { + if (l) + return 1; + int p = *o(&g); + m(&f); + m(&g); + l = p; +} diff --git a/gcc/testsuite/gcc.target/i386/pr116512.c b/gcc/testsuite/gcc.target/i386/pr116512.c new file mode 100644 index 0000000000000000000000000000000000000000..c2bc6c91b648450f7e1e945f55ee387ba54130d2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr116512.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v4 -O2" } */ +/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */ + +#include + +struct B { + union { + __m512 f; + __m512i s; + }; +}; + +struct B foo(int n) { + struct B res; + res.s = _mm512_set1_epi32(n); + + return res; +} + +__m512i bar(int n) { + struct B res; + res.s = _mm512_set1_epi32(n); + + return res.s; +} diff --git a/gcc/testsuite/gcc.target/i386/pr117159.c b/gcc/testsuite/gcc.target/i386/pr117159.c new file mode 100644 index 0000000000000000000000000000000000000000..b67d682ecef4ea105f51a401e811407f0780d359 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr117159.c @@ -0,0 +1,42 @@ +/* { dg-do run } */ +/* { dg-options "-Os -mavx512bw" } */ +/* { dg-require-effective-target avx512bw } */ + +typedef __attribute__((__vector_size__ (4))) unsigned char W; +typedef __attribute__((__vector_size__ (64))) int V; +typedef __attribute__((__vector_size__ (64))) long long Vq; + +W w; +V v; +Vq vq; + +static inline W +foo (short m) +{ + unsigned k = __builtin_ia32_pcmpgtq512_mask ((Vq) { }, vq, m); + W r = (W) k + w; + return r; +} + +static inline W +foo1 (short m) +{ + unsigned k = __builtin_ia32_pcmpgtd512_mask ((V) {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, v, m); + W r = (W) k + w; + return r; +} + +int +main () +{ + if (!__builtin_cpu_supports ("avx512bw")) + return 0; + W y = foo1 (65535); + if (!y[0] || !y[1] || y[2] || y[3]) + __builtin_abort(); + W x = foo (65535); + if (x[0] || x[1] || x[2] || x[3]) + __builtin_abort(); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/i386/pr117240_avx.c b/gcc/testsuite/gcc.target/i386/pr117240_avx.c new file mode 100644 index 0000000000000000000000000000000000000000..24a97a9f74c6e7def93ab00b8b1a9c2781c851a6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr117240_avx.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mvaes -mno-xsave -Wno-psabi -Wno-implicit-function-declaration" } */ + +typedef __attribute__((__vector_size__(32))) char V; + +V +foo(V v) +{ + return __builtin_ia32_vaesenc_v32qi(v, v);/* { dg-error "incompatible types when returning" } */ +} diff --git a/gcc/testsuite/gcc.target/i386/pr117240_avx512f.c b/gcc/testsuite/gcc.target/i386/pr117240_avx512f.c new file mode 100644 index 0000000000000000000000000000000000000000..1e7b5a88d7aa42af1da497aa3f410f16587ec55d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr117240_avx512f.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mvaes -mno-xsave -Wno-psabi -Wno-implicit-function-declaration" } */ + +typedef __attribute__((__vector_size__(64))) char V; + +V +foo(V v) +{ + return __builtin_ia32_vaesenc_v64qi(v, v);/* { dg-error "incompatible types when returning" } */ +} diff --git a/gcc/testsuite/gcc.target/i386/pr117318.c b/gcc/testsuite/gcc.target/i386/pr117318.c new file mode 100644 index 0000000000000000000000000000000000000000..3d316ad04cffde1141164cea36f1a4f9eeac8db8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr117318.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -O" } */ + +typedef __attribute__((__vector_size__ (64))) long long V; +unsigned long long x; + +unsigned long long +foo() +{ + __builtin_ia32_pmovusqb512mem_mask (&x, (V){8000000000000000}, 255); + return x; +} diff --git a/gcc/testsuite/gcc.target/i386/pr117418-1.c b/gcc/testsuite/gcc.target/i386/pr117418-1.c new file mode 100644 index 0000000000000000000000000000000000000000..4839b139b79ab6741d194f6b930302759d1e1eaf --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr117418-1.c @@ -0,0 +1,24 @@ +/* PR target/117418 */ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-maddress-mode=long -mwidekl -mx32" } */ +/* { dg-require-effective-target maybe_x32 } */ +/* { dg-final { scan-assembler-times "aesdec128kl" 1 } } */ +/* { dg-final { scan-assembler-times "aesdec256kl" 1 } } */ +/* { dg-final { scan-assembler-times "aesenc128kl" 1 } } */ +/* { dg-final { scan-assembler-times "aesenc256kl" 1 } } */ +/* { dg-final { scan-assembler-times "encodekey128" 1 } } */ +/* { dg-final { scan-assembler-times "encodekey256" 1 } } */ + +typedef __attribute__((__vector_size__(16))) long long V; +V a; + +void +foo() +{ + __builtin_ia32_aesdec128kl_u8 (&a, a, &a); + __builtin_ia32_aesdec256kl_u8 (&a, a, &a); + __builtin_ia32_aesenc128kl_u8 (&a, a, &a); + __builtin_ia32_aesenc256kl_u8 (&a, a, &a); + __builtin_ia32_encodekey128_u32 (0, a, &a); + __builtin_ia32_encodekey256_u32 (0, a, a, &a); +}