diff --git a/SME-0145-RISC-V-autovec-Verify-that-GET_MODE_NUNITS-is-a-mult.patch b/SME-0145-RISC-V-autovec-Verify-that-GET_MODE_NUNITS-is-a-mult.patch new file mode 100644 index 0000000000000000000000000000000000000000..b53042f6e891a3cd93c7bbb86dc72cc598e22013 --- /dev/null +++ b/SME-0145-RISC-V-autovec-Verify-that-GET_MODE_NUNITS-is-a-mult.patch @@ -0,0 +1,53 @@ +From 58c3ee1f6886490fd8149147553ce3aac82a31eb Mon Sep 17 00:00:00 2001 +From: Michael Collison +Date: Sat, 6 May 2023 12:37:50 -0600 +Subject: [PATCH 1/3] RISC-V: autovec: Verify that GET_MODE_NUNITS is a + multiple of 2. + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=730909fa858bd691095bc23655077aa13b7941a9 + +While working on autovectorizing for the RISCV port I encountered an issue +where can_duplicate_and_interleave_p assumes that GET_MODE_NUNITS is a +evenly divisible by two. The RISC-V target has vector modes (e.g. VNx1DImode), +where GET_MODE_NUNITS is equal to one. + +Tested on RISCV and x86_64-linux-gnu. Okay? + +gcc/ + * tree-vect-slp.cc (can_duplicate_and_interleave_p): + Check that GET_MODE_NUNITS is a multiple of 2. +--- + gcc/tree-vect-slp.cc | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc +index af477c31a..39c0955e1 100644 +--- a/gcc/tree-vect-slp.cc ++++ b/gcc/tree-vect-slp.cc +@@ -399,10 +399,13 @@ can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count, + (GET_MODE_BITSIZE (int_mode), 1); + tree vector_type + = get_vectype_for_scalar_type (vinfo, int_type, count); ++ poly_int64 half_nelts; + if (vector_type + && VECTOR_MODE_P (TYPE_MODE (vector_type)) + && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type)), +- GET_MODE_SIZE (base_vector_mode))) ++ GET_MODE_SIZE (base_vector_mode)) ++ && multiple_p (GET_MODE_NUNITS (TYPE_MODE (vector_type)), ++ 2, &half_nelts)) + { + /* Try fusing consecutive sequences of COUNT / NVECTORS elements + together into elements of type INT_TYPE and using the result +@@ -410,7 +413,7 @@ can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count, + poly_uint64 nelts = GET_MODE_NUNITS (TYPE_MODE (vector_type)); + vec_perm_builder sel1 (nelts, 2, 3); + vec_perm_builder sel2 (nelts, 2, 3); +- poly_int64 half_nelts = exact_div (nelts, 2); ++ + for (unsigned int i = 0; i < 3; ++i) + { + sel1.quick_push (i); +-- +2.19.1 + diff --git a/SME-0146-Add-operator-to-gimple_stmt_iterator-and-gphi_iterat.patch b/SME-0146-Add-operator-to-gimple_stmt_iterator-and-gphi_iterat.patch new file mode 100644 index 0000000000000000000000000000000000000000..cbef79812d894ac9aea4e43157ca03dbdd700f7c --- /dev/null +++ b/SME-0146-Add-operator-to-gimple_stmt_iterator-and-gphi_iterat.patch @@ -0,0 +1,41 @@ +From 97fba4337709aaaaa08375e6990887ea314bf8e3 Mon Sep 17 00:00:00 2001 +From: Richard Biener +Date: Tue, 18 Apr 2023 16:58:26 +0200 +Subject: [PATCH 2/3] Add operator* to gimple_stmt_iterator and gphi_iterator + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c39cdd9e654540f74cd2478019c40f1611554a44 + +This allows STL style iterator dereference. It's the same +as gsi_stmt () or .phi (). + + * gimple-iterator.h (gimple_stmt_iterator::operator*): Add. + (gphi_iterator::operator*): Likewise. +--- + gcc/gimple-iterator.h | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/gcc/gimple-iterator.h b/gcc/gimple-iterator.h +index 216ebee24..5d281e4f7 100644 +--- a/gcc/gimple-iterator.h ++++ b/gcc/gimple-iterator.h +@@ -24,6 +24,8 @@ along with GCC; see the file COPYING3. If not see + + struct gimple_stmt_iterator + { ++ gimple *operator * () const { return ptr; } ++ + /* Sequence node holding the current statement. */ + gimple_seq_node ptr; + +@@ -38,6 +40,8 @@ struct gimple_stmt_iterator + /* Iterator over GIMPLE_PHI statements. */ + struct gphi_iterator : public gimple_stmt_iterator + { ++ gphi *operator * () const { return as_a (ptr); } ++ + gphi *phi () const + { + return as_a (ptr); +-- +2.19.1 + diff --git a/SME-0147-tree-optimization-110221-SLP-and-loop-mask-len.patch b/SME-0147-tree-optimization-110221-SLP-and-loop-mask-len.patch new file mode 100644 index 0000000000000000000000000000000000000000..b6eb11c7dec1a6fe433d37a12d7a1b913b892e91 --- /dev/null +++ b/SME-0147-tree-optimization-110221-SLP-and-loop-mask-len.patch @@ -0,0 +1,74 @@ +From 2379b38302ea3548d8c1ee19f90c28b411ba48b5 Mon Sep 17 00:00:00 2001 +From: Richard Biener +Date: Fri, 10 Nov 2023 12:39:11 +0100 +Subject: [PATCH 3/3] tree-optimization/110221 - SLP and loop mask/len + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=e5f1956498251a4973d52c8aad3faf34d0443169 + +The following fixes the issue that when SLP stmts are internal defs +but appear invariant because they end up only using invariant defs +then they get scheduled outside of the loop. This nice optimization +breaks down when loop masks or lens are applied since those are not +explicitly tracked as dependences. The following makes sure to never +schedule internal defs outside of the vectorized loop when the +loop uses masks/lens. + + PR tree-optimization/110221 + * tree-vect-slp.cc (vect_schedule_slp_node): When loop + masking / len is applied make sure to not schedule + intenal defs outside of the loop. + + * gfortran.dg/pr110221.f: New testcase. +--- + gcc/testsuite/gfortran.dg/pr110221.f | 17 +++++++++++++++++ + gcc/tree-vect-slp.cc | 10 ++++++++++ + 2 files changed, 27 insertions(+) + create mode 100644 gcc/testsuite/gfortran.dg/pr110221.f + +diff --git a/gcc/testsuite/gfortran.dg/pr110221.f b/gcc/testsuite/gfortran.dg/pr110221.f +new file mode 100644 +index 000000000..8b5738431 +--- /dev/null ++++ b/gcc/testsuite/gfortran.dg/pr110221.f +@@ -0,0 +1,17 @@ ++C PR middle-end/68146 ++C { dg-do compile } ++C { dg-options "-O2 -w" } ++C { dg-additional-options "-mavx512f --param vect-partial-vector-usage=2" { target avx512f } } ++ SUBROUTINE CJYVB(V,Z,V0,CBJ,CDJ,CBY,CYY) ++ IMPLICIT DOUBLE PRECISION (A,B,G,O-Y) ++ IMPLICIT COMPLEX*16 (C,Z) ++ DIMENSION CBJ(0:*),CDJ(0:*),CBY(0:*) ++ N=INT(V) ++ CALL GAMMA2(VG,GA) ++ DO 65 K=1,N ++ CBY(K)=CYY ++65 CONTINUE ++ CDJ(0)=V0/Z*CBJ(0)-CBJ(1) ++ DO 70 K=1,N ++70 CDJ(K)=-(K+V0)/Z*CBJ(K)+CBJ(K-1) ++ END +diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc +index 39c0955e1..71f20cf56 100644 +--- a/gcc/tree-vect-slp.cc ++++ b/gcc/tree-vect-slp.cc +@@ -7266,6 +7266,16 @@ vect_schedule_slp_node (vec_info *vinfo, + /* Emit other stmts after the children vectorized defs which is + earliest possible. */ + gimple *last_stmt = NULL; ++ if (auto loop_vinfo = dyn_cast (vinfo)) ++ if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) ++ || LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)) ++ { ++ /* But avoid scheduling internal defs outside of the loop when ++ we might have only implicitly tracked loop mask/len defs. */ ++ gimple_stmt_iterator si ++ = gsi_after_labels (LOOP_VINFO_LOOP (loop_vinfo)->header); ++ last_stmt = *si; ++ } + bool seen_vector_def = false; + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) + if (SLP_TREE_DEF_TYPE (child) == vect_internal_def) +-- +2.19.1 + diff --git a/gcc-12.spec b/gcc-12.spec index 60fcc02cd3d3a9e6e258e8a38bcb6bfbfc6e30c4..94a0905749d47ff0a762b67f3ffe765b9c5591c4 100644 --- a/gcc-12.spec +++ b/gcc-12.spec @@ -86,7 +86,7 @@ Summary: Various compilers (C, C++, Objective-C, ...) Name: %{?scl_prefix}gcc%{gcc_ver} Version: 12.3.1 -Release: 17 +Release: 18 # libgcc, libgfortran, libgomp, libstdc++ and crtstuff have # GCC Runtime Exception. License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD @@ -288,6 +288,9 @@ Patch3144: SME-0141-Canonicalize-X-Y-as-X-Y-in-match.pd-when-Y-is-0-1.patch Patch3145: SME-0142-middle-end-Add-new-tbranch-optab-to-add-support-for-.patch Patch3146: SME-0143-explow-Allow-dynamic-allocations-after-vregs.patch Patch3147: SME-0144-PR105169-Fix-references-to-discarded-sections.patch +Patch3148: SME-0145-RISC-V-autovec-Verify-that-GET_MODE_NUNITS-is-a-mult.patch +Patch3149: SME-0146-Add-operator-to-gimple_stmt_iterator-and-gphi_iterat.patch +Patch3150: SME-0147-tree-optimization-110221-SLP-and-loop-mask-len.patch # Patch 5000 - @@ -2832,6 +2835,9 @@ end %doc rpm.doc/changelogs/libcc1/ChangeLog* %changelog +* Tue Apr 09 2024 eastb233 12.3.1-18 +- AArch64: Fix issue https://gitee.com/src-openeuler/gcc-12/issues/I9DE8T + * Sat Mar 09 2024 eastb233 12.3.1-17 - AArch64: Support SME intrinsics