diff --git a/0002-i386-Add-syscall-to-enable-AMX-for-latest-kernels.patch b/0002-i386-Add-syscall-to-enable-AMX-for-latest-kernels.patch new file mode 100644 index 0000000000000000000000000000000000000000..94625b5cf146f6c15ad878f7a3e22722e55dccb2 --- /dev/null +++ b/0002-i386-Add-syscall-to-enable-AMX-for-latest-kernels.patch @@ -0,0 +1,77 @@ +From 5e377d21f1f345d8b157b9bc306e02bb9bd45e01 Mon Sep 17 00:00:00 2001 +From: Haochen Jiang +Date: Thu, 16 Jun 2022 00:15:53 -0700 +Subject: [PATCH] i386: Add syscall to enable AMX for latest kernels + +gcc/testsuite/ChangeLog: + + * gcc.target/i386/amx-check.h (request_perm_xtile_data): + New function to check if AMX is usable and enable AMX. + (main): Run test if AMX is usable. +--- + gcc/testsuite/gcc.target/i386/amx-check.h | 30 +++++++++++++++++++++++ + 1 file changed, 30 insertions(+) + +diff --git a/gcc/testsuite/gcc.target/i386/amx-check.h b/gcc/testsuite/gcc.target/i386/amx-check.h +index 434b0e59703..6fff5ff4631 100644 +--- a/gcc/testsuite/gcc.target/i386/amx-check.h ++++ b/gcc/testsuite/gcc.target/i386/amx-check.h +@@ -4,11 +4,24 @@ + #include + #include + #include ++#include ++#ifdef __linux__ ++#include ++#endif + #ifdef DEBUG + #include + #endif + #include "cpuid.h" + ++#define XFEATURE_XTILECFG 17 ++#define XFEATURE_XTILEDATA 18 ++#define XFEATURE_MASK_XTILECFG (1 << XFEATURE_XTILECFG) ++#define XFEATURE_MASK_XTILEDATA (1 << XFEATURE_XTILEDATA) ++#define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILECFG | XFEATURE_MASK_XTILEDATA) ++ ++#define ARCH_GET_XCOMP_PERM 0x1022 ++#define ARCH_REQ_XCOMP_PERM 0x1023 ++ + /* TODO: The tmm emulation is temporary for current + AMX implementation with no tmm regclass, should + be changed in the future. */ +@@ -44,6 +57,20 @@ typedef struct __tile + /* Stride (colum width in byte) used for tileload/store */ + #define _STRIDE 64 + ++#ifdef __linux__ ++/* We need syscall to use amx functions */ ++int request_perm_xtile_data() ++{ ++ unsigned long bitmask; ++ ++ if (syscall (SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA) || ++ syscall (SYS_arch_prctl, ARCH_GET_XCOMP_PERM, &bitmask)) ++ return 0; ++ ++ return (bitmask & XFEATURE_MASK_XTILE) != 0; ++} ++#endif ++ + /* Initialize tile config by setting all tmm size to 16x64 */ + void init_tile_config (__tilecfg_u *dst) + { +@@ -185,6 +212,9 @@ main () + #endif + #ifdef AMX_BF16 + && __builtin_cpu_supports ("amx-bf16") ++#endif ++#ifdef __linux__ ++ && request_perm_xtile_data () + #endif + ) + { +-- +2.18.2 + diff --git a/0003-Remove-AVX512_VP2INTERSECT-from-PTA_SAPPHIRERAPIDS.patch b/0003-Remove-AVX512_VP2INTERSECT-from-PTA_SAPPHIRERAPIDS.patch new file mode 100644 index 0000000000000000000000000000000000000000..42cabc2e6f034da13cd7461105553f1ae5967e43 --- /dev/null +++ b/0003-Remove-AVX512_VP2INTERSECT-from-PTA_SAPPHIRERAPIDS.patch @@ -0,0 +1,83 @@ +From 11c72f20d4d7ba1862a257cef05dc3a5e84a276d Mon Sep 17 00:00:00 2001 +From: "Cui,Lili" +Date: Thu, 29 Sep 2022 14:28:06 +0800 +Subject: [PATCH] Remove AVX512_VP2INTERSECT from PTA_SAPPHIRERAPIDS + +gcc/ChangeLog: + + * config/i386/driver-i386.cc (host_detect_local_cpu): + Move sapphirerapids out of AVX512_VP2INTERSECT. + * config/i386/i386.h: Remove AVX512_VP2INTERSECT from PTA_SAPPHIRERAPIDS + * doc/invoke.texi: Remove AVX512_VP2INTERSECT from SAPPHIRERAPIDS +--- + gcc/config/i386/driver-i386.cc | 13 +++++-------- + gcc/config/i386/i386.h | 7 +++---- + gcc/doc/invoke.texi | 8 ++++---- + 3 files changed, 12 insertions(+), 16 deletions(-) + +diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc +index 3c702fdca33..ef567045c67 100644 +--- a/gcc/config/i386/driver-i386.cc ++++ b/gcc/config/i386/driver-i386.cc +@@ -589,15 +589,12 @@ const char *host_detect_local_cpu (int argc, const char **argv) + /* This is unknown family 0x6 CPU. */ + if (has_feature (FEATURE_AVX)) + { ++ /* Assume Tiger Lake */ + if (has_feature (FEATURE_AVX512VP2INTERSECT)) +- { +- if (has_feature (FEATURE_TSXLDTRK)) +- /* Assume Sapphire Rapids. */ +- cpu = "sapphirerapids"; +- else +- /* Assume Tiger Lake */ +- cpu = "tigerlake"; +- } ++ cpu = "tigerlake"; ++ /* Assume Sapphire Rapids. */ ++ else if (has_feature (FEATURE_TSXLDTRK)) ++ cpu = "sapphirerapids"; + /* Assume Cooper Lake */ + else if (has_feature (FEATURE_AVX512BF16)) + cpu = "cooperlake"; +diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h +index 900a3bc3673..372a2cff8fe 100644 +--- a/gcc/config/i386/i386.h ++++ b/gcc/config/i386/i386.h +@@ -2326,10 +2326,9 @@ constexpr wide_int_bitmask PTA_ICELAKE_SERVER = PTA_ICELAKE_CLIENT + constexpr wide_int_bitmask PTA_TIGERLAKE = PTA_ICELAKE_CLIENT | PTA_MOVDIRI + | PTA_MOVDIR64B | PTA_CLWB | PTA_AVX512VP2INTERSECT | PTA_KL | PTA_WIDEKL; + constexpr wide_int_bitmask PTA_SAPPHIRERAPIDS = PTA_ICELAKE_SERVER | PTA_MOVDIRI +- | PTA_MOVDIR64B | PTA_AVX512VP2INTERSECT | PTA_ENQCMD | PTA_CLDEMOTE +- | PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE | PTA_TSXLDTRK | PTA_AMX_TILE +- | PTA_AMX_INT8 | PTA_AMX_BF16 | PTA_UINTR | PTA_AVXVNNI | PTA_AVX512FP16 +- | PTA_AVX512BF16; ++ | PTA_MOVDIR64B | PTA_ENQCMD | PTA_CLDEMOTE | PTA_PTWRITE | PTA_WAITPKG ++ | PTA_SERIALIZE | PTA_TSXLDTRK | PTA_AMX_TILE | PTA_AMX_INT8 | PTA_AMX_BF16 ++ | PTA_UINTR | PTA_AVXVNNI | PTA_AVX512FP16 | PTA_AVX512BF16; + constexpr wide_int_bitmask PTA_KNL = PTA_BROADWELL | PTA_AVX512PF + | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD | PTA_PREFETCHWT1; + constexpr wide_int_bitmask PTA_BONNELL = PTA_CORE2 | PTA_MOVBE; +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 271c8bb8468..a9ecc4426a4 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -32057,11 +32057,11 @@ Intel sapphirerapids CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, + SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, + RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, + AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ, +-AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2 ++AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2, + VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, WBNOINVD, CLWB, +-MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, +-SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16 +-and AVX512BF16 instruction set support. ++MOVDIRI, MOVDIR64B, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, SERIALIZE, TSXLDTRK, ++UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16 and AVX512BF16 ++instruction set support. + + @item alderlake + Intel Alderlake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, +-- +2.18.2 + diff --git a/0004-Add-attribute-hot-judgement-for-INLINE_HINT_known_ho.patch b/0004-Add-attribute-hot-judgement-for-INLINE_HINT_known_ho.patch new file mode 100644 index 0000000000000000000000000000000000000000..3e70f0c2f07c3f4243599d4c9963eb29af67e297 --- /dev/null +++ b/0004-Add-attribute-hot-judgement-for-INLINE_HINT_known_ho.patch @@ -0,0 +1,123 @@ +From 1b9a5cc9ec08e9f239dd2096edcc447b7a72f64a Mon Sep 17 00:00:00 2001 +From: "Cui,Lili" +Date: Tue, 1 Nov 2022 09:16:49 +0800 +Subject: [PATCH] Add attribute hot judgement for INLINE_HINT_known_hot hint. + +We set up INLINE_HINT_known_hot hint only when we have profile feedback, +now add function attribute judgement for it, when both caller and callee +have __attribute__((hot)), we will also set up INLINE_HINT_known_hot hint +for it. + +With this patch applied, +ADL Multi-copy: 538.imagic_r 16.7% +ICX Multi-copy: 538.imagic_r 15.2% +CLX Multi-copy: 538.imagic_r 12.7% +Znver3 Multi-copy: 538.imagic_r 10.6% +Arm Multi-copy: 538.imagic_r 13.4% + +gcc/ChangeLog + + * ipa-inline-analysis.cc (do_estimate_edge_time): Add function attribute + judgement for INLINE_HINT_known_hot hint. + +gcc/testsuite/ChangeLog: + + * gcc.dg/ipa/inlinehint-6.c: New test. +--- + gcc/ipa-inline-analysis.cc | 13 ++++--- + gcc/testsuite/gcc.dg/ipa/inlinehint-6.c | 47 +++++++++++++++++++++++++ + 2 files changed, 56 insertions(+), 4 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/ipa/inlinehint-6.c + +diff --git a/gcc/ipa-inline-analysis.cc b/gcc/ipa-inline-analysis.cc +index 1ca685d1b0e..7bd29c36590 100644 +--- a/gcc/ipa-inline-analysis.cc ++++ b/gcc/ipa-inline-analysis.cc +@@ -48,6 +48,7 @@ along with GCC; see the file COPYING3. If not see + #include "ipa-utils.h" + #include "cfgexpand.h" + #include "gimplify.h" ++#include "attribs.h" + + /* Cached node/edge growths. */ + fast_call_summary *edge_growth_cache = NULL; +@@ -249,15 +250,19 @@ do_estimate_edge_time (struct cgraph_edge *edge, sreal *ret_nonspec_time) + hints = estimates.hints; + } + +- /* When we have profile feedback, we can quite safely identify hot +- edges and for those we disable size limits. Don't do that when +- probability that caller will call the callee is low however, since it ++ /* When we have profile feedback or function attribute, we can quite safely ++ identify hot edges and for those we disable size limits. Don't do that ++ when probability that caller will call the callee is low however, since it + may hurt optimization of the caller's hot path. */ +- if (edge->count.ipa ().initialized_p () && edge->maybe_hot_p () ++ if ((edge->count.ipa ().initialized_p () && edge->maybe_hot_p () + && (edge->count.ipa ().apply_scale (2, 1) + > (edge->caller->inlined_to + ? edge->caller->inlined_to->count.ipa () + : edge->caller->count.ipa ()))) ++ || (lookup_attribute ("hot", DECL_ATTRIBUTES (edge->caller->decl)) ++ != NULL ++ && lookup_attribute ("hot", DECL_ATTRIBUTES (edge->callee->decl)) ++ != NULL)) + hints |= INLINE_HINT_known_hot; + + gcc_checking_assert (size >= 0); +diff --git a/gcc/testsuite/gcc.dg/ipa/inlinehint-6.c b/gcc/testsuite/gcc.dg/ipa/inlinehint-6.c +new file mode 100644 +index 00000000000..1f3be641c6d +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/ipa/inlinehint-6.c +@@ -0,0 +1,47 @@ ++/* { dg-options "-O3 -c -fdump-ipa-inline-details -fno-early-inlining -fno-ipa-cp" } */ ++/* { dg-add-options bind_pic_locally } */ ++ ++#define size_t long long int ++ ++struct A ++{ ++ size_t f1, f2, f3, f4; ++}; ++struct C ++{ ++ struct A a; ++ size_t b; ++}; ++struct C x; ++ ++__attribute__((hot)) struct C callee (struct A *a, struct C *c) ++{ ++ c->a=(*a); ++ ++ if((c->b + 7) & 17) ++ { ++ c->a.f1 = c->a.f2 + c->a.f1; ++ c->a.f2 = c->a.f3 - c->a.f2; ++ c->a.f3 = c->a.f2 + c->a.f3; ++ c->a.f4 = c->a.f2 - c->a.f4; ++ c->b = c->a.f2; ++ ++ } ++ return *c; ++} ++ ++__attribute__((hot)) struct C caller (size_t d, size_t e, size_t f, size_t g, struct C *c) ++{ ++ struct A a; ++ a.f1 = 1 + d; ++ a.f2 = e; ++ a.f3 = 12 + f; ++ a.f4 = 68 + g; ++ if (c->b > 0) ++ return callee (&a, c); ++ else ++ return *c; ++} ++ ++/* { dg-final { scan-ipa-dump "known_hot" "inline" } } */ ++ +-- +2.18.2 + diff --git a/gcc-12.spec b/gcc-12.spec index caace549994908628e15d767abb2e13f8d1011ff..81a5da425090eeaf94a5cef17ee5e73f6f05b109 100644 --- a/gcc-12.spec +++ b/gcc-12.spec @@ -84,7 +84,7 @@ Summary: Various compilers (C, C++, Objective-C, ...) Name: %{?scl_prefix}gcc Version: 12.2.1 -Release: 5 +Release: 8 # libgcc, libgfortran, libgomp, libstdc++ and crtstuff have # GCC Runtime Exception. License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD @@ -133,6 +133,9 @@ Provides: %{?scl_prefix}gcc(major) = %{gcc_major} Patch0: 0001-change-gcc-version.patch +Patch1: 0002-i386-Add-syscall-to-enable-AMX-for-latest-kernels.patch +Patch2: 0003-Remove-AVX512_VP2INTERSECT-from-PTA_SAPPHIRERAPIDS.patch +Patch3: 0004-Add-attribute-hot-judgement-for-INLINE_HINT_known_ho.patch # On ARM EABI systems, we do want -gnueabi to be part of the @@ -591,6 +594,9 @@ not stable, so plugins must be rebuilt any time GCC is updated. %setup -q -n gcc-12.2.0 %patch0 -p1 +%patch1 -p1 +%patch2 -p1 +%patch3 -p1 echo 'openEuler %{version}-%{release}' > gcc/DEV-PHASE @@ -2647,6 +2653,15 @@ end %doc rpm.doc/changelogs/libcc1/ChangeLog* %changelog +* Wed Nov 09 2022 Cui Lili 12.2.1-8 +- Add attribute hot judgement for INLINE_HINT_known_hot hint + +* Wed Nov 09 2022 Cui Lili 12.2.1-7 +- Remove AVX512_VP2INTERSECT from PTA_SAPPHIRERAPIDS + +* Wed Nov 09 2022 Haochen Jiang 12.2.1-6 +- i386: Add syscall to enable AMX for latest kernels + * Wed Nov 02 2022 liyancheng <412998149@qq.com> 12.2.1-5 - Change isl dependency from Source to BuildRequire