From cac43828d26b178807d194b4bd7c5df69603df29 Mon Sep 17 00:00:00 2001 From: xiajingze Date: Wed, 31 Jul 2024 18:37:29 +0800 Subject: [PATCH] [AArch64] Support HiSilicon's HIP09 Processor Signed-off-by: xiajingze --- clang/test/Driver/aarch64-hip09.c | 9 ++ .../test/Misc/target-invalid-cpu-note-hip09.c | 97 +++++++++++++++++++ clang/test/Misc/target-invalid-cpu-note.c | 1 + clang/test/lit.site.cfg.py.in | 4 + llvm/cmake/modules/HandleLLVMOptions.cmake | 8 ++ .../llvm/TargetParser/AArch64TargetParser.h | 7 ++ llvm/lib/Target/AArch64/AArch64.td | 36 +++++++ .../lib/Target/AArch64/AArch64MacroFusion.cpp | 55 +++++++++++ llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 9 ++ llvm/lib/Target/AArch64/AArch64Subtarget.h | 9 +- llvm/lib/Target/CMakeLists.txt | 4 + llvm/lib/TargetParser/Host.cpp | 3 + llvm/test/CodeGen/AArch64/cpus-hip09.ll | 11 +++ .../CodeGen/AArch64/macro-fusion-mvnclz.mir | 20 ++++ .../AArch64/misched-fusion-lit-hip09.ll | 73 ++++++++++++++ llvm/test/CodeGen/AArch64/remat-hip09.ll | 18 ++++ llvm/test/lit.site.cfg.py.in | 4 + llvm/unittests/TargetParser/Host.cpp | 5 + .../TargetParser/TargetParserTest.cpp | 16 +++ 19 files changed, 388 insertions(+), 1 deletion(-) create mode 100644 clang/test/Driver/aarch64-hip09.c create mode 100644 clang/test/Misc/target-invalid-cpu-note-hip09.c create mode 100644 llvm/test/CodeGen/AArch64/cpus-hip09.ll create mode 100644 llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir create mode 100644 llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll create mode 100644 llvm/test/CodeGen/AArch64/remat-hip09.ll diff --git a/clang/test/Driver/aarch64-hip09.c b/clang/test/Driver/aarch64-hip09.c new file mode 100644 index 000000000000..156be3f38cde --- /dev/null +++ b/clang/test/Driver/aarch64-hip09.c @@ -0,0 +1,9 @@ +// REQUIRES: enable_enable_aarch64_hip09 +// RUN: %clang -target aarch64_be -mcpu=hip09 -### -c %s 2>&1 | FileCheck -check-prefix=hip09-BE %s +// RUN: %clang -target aarch64 -mbig-endian -mcpu=hip09 -### -c %s 2>&1 | FileCheck -check-prefix=hip09-BE %s +// RUN: %clang -target aarch64_be -mbig-endian -mcpu=hip09 -### -c %s 2>&1 | FileCheck -check-prefix=hip09-BE %s +// RUN: %clang -target aarch64_be -mtune=hip09 -### -c %s 2>&1 | FileCheck -check-prefix=hip09-BE-TUNE %s +// RUN: %clang -target aarch64 -mbig-endian -mtune=hip09 -### -c %s 2>&1 | FileCheck -check-prefix=hip09-BE-TUNE %s +// RUN: %clang -target aarch64_be -mbig-endian -mtune=hip09 -### -c %s 2>&1 | FileCheck -check-prefix=hip09-BE-TUNE %s +// hip09-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "hip09" +// hip09-BE-TUNE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic" diff --git a/clang/test/Misc/target-invalid-cpu-note-hip09.c b/clang/test/Misc/target-invalid-cpu-note-hip09.c new file mode 100644 index 000000000000..f2561a0890fc --- /dev/null +++ b/clang/test/Misc/target-invalid-cpu-note-hip09.c @@ -0,0 +1,97 @@ +// REQUIRES: enable_enable_aarch64_hip09 +// Use CHECK-NEXT instead of multiple CHECK-SAME to ensure we will fail if there is anything extra in the output. +// RUN: not %clang_cc1 -triple armv5--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix ARM +// ARM: error: unknown target CPU 'not-a-cpu' +// ARM-NEXT: note: valid target CPU values are: arm8, arm810, strongarm, strongarm110, strongarm1100, strongarm1110, arm7tdmi, arm7tdmi-s, arm710t, arm720t, arm9, arm9tdmi, arm920, arm920t, arm922t, arm940t, ep9312, arm10tdmi, arm1020t, arm9e, arm946e-s, arm966e-s, arm968e-s, arm10e, arm1020e, arm1022e, arm926ej-s, arm1136j-s, arm1136jf-s, mpcore, mpcorenovfp, arm1176jz-s, arm1176jzf-s, arm1156t2-s, arm1156t2f-s, cortex-m0, cortex-m0plus, cortex-m1, sc000, cortex-a5, cortex-a7, cortex-a8, cortex-a9, cortex-a12, cortex-a15, cortex-a17, krait, cortex-r4, cortex-r4f, cortex-r5, cortex-r7, cortex-r8, cortex-r52, sc300, cortex-m3, cortex-m4, cortex-m7, cortex-m23, cortex-m33, cortex-m35p, cortex-m55, cortex-m85, cortex-a32, cortex-a35, cortex-a53, cortex-a55, cortex-a57, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-x1, cortex-x1c, neoverse-n1, neoverse-n2, neoverse-v1, cyclone, exynos-m3, exynos-m4, exynos-m5, kryo, iwmmxt, xscale, swift{{$}} + +// RUN: not %clang_cc1 -triple arm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AARCH64 +// AARCH64: error: unknown target CPU 'not-a-cpu' +// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-a715, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, neoverse-v2, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-m1, apple-m2, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, hip09, grace{{$}} + +// RUN: not %clang_cc1 -triple arm64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_AARCH64 +// TUNE_AARCH64: error: unknown target CPU 'not-a-cpu' +// TUNE_AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-a715, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, neoverse-v2, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-m1, apple-m2, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, hip09, grace{{$}} + +// RUN: not %clang_cc1 -triple i386--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86 +// X86: error: unknown target CPU 'not-a-cpu' +// X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, x86-64-v2, x86-64-v3, x86-64-v4, geode{{$}} + +// RUN: not %clang_cc1 -triple x86_64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86_64 +// X86_64: error: unknown target CPU 'not-a-cpu' +// X86_64-NEXT: note: valid target CPU values are: nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, knl, knm, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, x86-64-v2, x86-64-v3, x86-64-v4{{$}} + +// RUN: not %clang_cc1 -triple i386--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86 +// TUNE_X86: error: unknown target CPU 'not-a-cpu' +// TUNE_X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, geode{{$}} + +// RUN: not %clang_cc1 -triple x86_64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86_64 +// TUNE_X86_64: error: unknown target CPU 'not-a-cpu' +// TUNE_X86_64-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, geode{{$}} + +// RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX +// NVPTX: error: unknown target CPU 'not-a-cpu' +// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151{{$}} + +// RUN: not %clang_cc1 -triple r600--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix R600 +// R600: error: unknown target CPU 'not-a-cpu' +// R600-NEXT: note: valid target CPU values are: r600, rv630, rv635, r630, rs780, rs880, rv610, rv620, rv670, rv710, rv730, rv740, rv770, cedar, palm, cypress, hemlock, juniper, redwood, sumo, sumo2, barts, caicos, aruba, cayman, turks{{$}} + +// RUN: not %clang_cc1 -triple amdgcn--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AMDGCN +// AMDGCN: error: unknown target CPU 'not-a-cpu' +// AMDGCN-NEXT: note: valid target CPU values are: gfx600, tahiti, gfx601, pitcairn, verde, gfx602, hainan, oland, gfx700, kaveri, gfx701, hawaii, gfx702, gfx703, kabini, mullins, gfx704, bonaire, gfx705, gfx801, carrizo, gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11, gfx805, tongapro, gfx810, stoney, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151{{$}} + +// RUN: not %clang_cc1 -triple wasm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix WEBASM +// WEBASM: error: unknown target CPU 'not-a-cpu' +// WEBASM-NEXT: note: valid target CPU values are: mvp, bleeding-edge, generic{{$}} + +// RUN: not %clang_cc1 -triple systemz--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix SYSTEMZ +// SYSTEMZ: error: unknown target CPU 'not-a-cpu' +// SYSTEMZ-NEXT: note: valid target CPU values are: arch8, z10, arch9, z196, arch10, zEC12, arch11, z13, arch12, z14, arch13, z15, arch14, z16{{$}} + +// RUN: not %clang_cc1 -triple sparc--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix SPARC +// SPARC: error: unknown target CPU 'not-a-cpu' +// SPARC-NEXT: note: valid target CPU values are: v8, supersparc, sparclite, f934, hypersparc, sparclite86x, sparclet, tsc701, v9, ultrasparc, ultrasparc3, niagara, niagara2, niagara3, niagara4, ma2100, ma2150, ma2155, ma2450, ma2455, ma2x5x, ma2080, ma2085, ma2480, ma2485, ma2x8x, myriad2, myriad2.1, myriad2.2, myriad2.3, leon2, at697e, at697f, leon3, ut699, gr712rc, leon4, gr740{{$}} + +// RUN: not %clang_cc1 -triple sparcv9--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix SPARCV9 +// SPARCV9: error: unknown target CPU 'not-a-cpu' +// SPARCV9-NEXT: note: valid target CPU values are: v9, ultrasparc, ultrasparc3, niagara, niagara2, niagara3, niagara4{{$}} + +// RUN: not %clang_cc1 -triple powerpc--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix PPC +// PPC: error: unknown target CPU 'not-a-cpu' +// PPC-NEXT: note: valid target CPU values are: generic, 440, 450, 601, 602, 603, 603e, 603ev, 604, 604e, 620, 630, g3, 7400, g4, 7450, g4+, 750, 8548, 970, g5, a2, e500, e500mc, e5500, power3, pwr3, power4, pwr4, power5, pwr5, power5x, pwr5x, power6, pwr6, power6x, pwr6x, power7, pwr7, power8, pwr8, power9, pwr9, power10, pwr10, powerpc, ppc, ppc32, powerpc64, ppc64, powerpc64le, ppc64le, future{{$}} + +// RUN: not %clang_cc1 -triple mips--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix MIPS +// MIPS: error: unknown target CPU 'not-a-cpu' +// MIPS-NEXT: note: valid target CPU values are: mips1, mips2, mips3, mips4, mips5, mips32, mips32r2, mips32r3, mips32r5, mips32r6, mips64, mips64r2, mips64r3, mips64r5, mips64r6, octeon, octeon+, p5600{{$}} + +// RUN: not %clang_cc1 -triple lanai--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix LANAI +// LANAI: error: unknown target CPU 'not-a-cpu' +// LANAI-NEXT: note: valid target CPU values are: v11{{$}} + +// RUN: not %clang_cc1 -triple hexagon--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix HEXAGON +// HEXAGON: error: unknown target CPU 'not-a-cpu' +// HEXAGON-NEXT: note: valid target CPU values are: hexagonv5, hexagonv55, hexagonv60, hexagonv62, hexagonv65, hexagonv66, hexagonv67, hexagonv67t, hexagonv68, hexagonv69, hexagonv71, hexagonv71t, hexagonv73{{$}} + +// RUN: not %clang_cc1 -triple bpf--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix BPF +// BPF: error: unknown target CPU 'not-a-cpu' +// BPF-NEXT: note: valid target CPU values are: generic, v1, v2, v3, probe{{$}} + +// RUN: not %clang_cc1 -triple avr--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AVR +// AVR: error: unknown target CPU 'not-a-cpu' +// AVR-NEXT: note: valid target CPU values are: avr1, at90s1200, attiny11, attiny12, attiny15, attiny28, avr2, at90s2313, at90s2323, at90s2333, at90s2343, attiny22, attiny26, at86rf401, at90s4414, at90s4433, at90s4434, at90s8515, at90c8534, at90s8535, avr25, ata5272, ata6616c, attiny13, attiny13a, attiny2313, attiny2313a, attiny24, attiny24a, attiny4313, attiny44, attiny44a, attiny84, attiny84a, attiny25, attiny45, attiny85, attiny261, attiny261a, attiny441, attiny461, attiny461a, attiny841, attiny861, attiny861a, attiny87, attiny43u, attiny48, attiny88, attiny828, avr3, at43usb355, at76c711, avr31, atmega103, at43usb320, avr35, attiny167, at90usb82, at90usb162, ata5505, ata6617c, ata664251, atmega8u2, atmega16u2, atmega32u2, attiny1634, avr4, atmega8, ata6289, atmega8a, ata6285, ata6286, ata6612c, atmega48, atmega48a, atmega48pa, atmega48pb, atmega48p, atmega88, atmega88a, atmega88p, atmega88pa, atmega88pb, atmega8515, atmega8535, atmega8hva, at90pwm1, at90pwm2, at90pwm2b, at90pwm3, at90pwm3b, at90pwm81, avr5, ata5702m322, ata5782, ata5790, ata5790n, ata5791, ata5795, ata5831, ata6613c, ata6614q, ata8210, ata8510, atmega16, atmega16a, atmega161, atmega162, atmega163, atmega164a, atmega164p, atmega164pa, atmega165, atmega165a, atmega165p, atmega165pa, atmega168, atmega168a, atmega168p, atmega168pa, atmega168pb, atmega169, atmega169a, atmega169p, atmega169pa, atmega32, atmega32a, atmega323, atmega324a, atmega324p, atmega324pa, atmega324pb, atmega325, atmega325a, atmega325p, atmega325pa, atmega3250, atmega3250a, atmega3250p, atmega3250pa, atmega328, atmega328p, atmega328pb, atmega329, atmega329a, atmega329p, atmega329pa, atmega3290, atmega3290a, atmega3290p, atmega3290pa, atmega406, atmega64, atmega64a, atmega640, atmega644, atmega644a, atmega644p, atmega644pa, atmega645, atmega645a, atmega645p, atmega649, atmega649a, atmega649p, atmega6450, atmega6450a, atmega6450p, atmega6490, atmega6490a, atmega6490p, atmega64rfr2, atmega644rfr2, atmega16hva, atmega16hva2, atmega16hvb, atmega16hvbrevb, atmega32hvb, atmega32hvbrevb, atmega64hve, atmega64hve2, at90can32, at90can64, at90pwm161, at90pwm216, at90pwm316, atmega32c1, atmega64c1, atmega16m1, atmega32m1, atmega64m1, atmega16u4, atmega32u4, atmega32u6, at90usb646, at90usb647, at90scr100, at94k, m3000, avr51, atmega128, atmega128a, atmega1280, atmega1281, atmega1284, atmega1284p, atmega128rfa1, atmega128rfr2, atmega1284rfr2, at90can128, at90usb1286, at90usb1287, avr6, atmega2560, atmega2561, atmega256rfr2, atmega2564rfr2, avrxmega2, atxmega16a4, atxmega16a4u, atxmega16c4, atxmega16d4, atxmega32a4, atxmega32a4u, atxmega32c3, atxmega32c4, atxmega32d3, atxmega32d4, atxmega32e5, atxmega16e5, atxmega8e5, avrxmega4, atxmega64a3, atxmega64a3u, atxmega64a4u, atxmega64b1, atxmega64b3, atxmega64c3, atxmega64d3, atxmega64d4, avrxmega5, atxmega64a1, atxmega64a1u, avrxmega6, atxmega128a3, atxmega128a3u, atxmega128b1, atxmega128b3, atxmega128c3, atxmega128d3, atxmega128d4, atxmega192a3, atxmega192a3u, atxmega192c3, atxmega192d3, atxmega256a3, atxmega256a3u, atxmega256a3b, atxmega256a3bu, atxmega256c3, atxmega256d3, atxmega384c3, atxmega384d3, avrxmega7, atxmega128a1, atxmega128a1u, atxmega128a4u, avrtiny, attiny4, attiny5, attiny9, attiny10, attiny20, attiny40, attiny102, attiny104, avrxmega3, attiny202, attiny402, attiny204, attiny404, attiny804, attiny1604, attiny406, attiny806, attiny1606, attiny807, attiny1607, attiny212, attiny412, attiny214, attiny414, attiny814, attiny1614, attiny416, attiny816, attiny1616, attiny3216, attiny417, attiny817, attiny1617, attiny3217, attiny1624, attiny1626, attiny1627, atmega808, atmega809, atmega1608, atmega1609, atmega3208, atmega3209, atmega4808, atmega4809 + +// RUN: not %clang_cc1 -triple riscv32 -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix RISCV32 +// RISCV32: error: unknown target CPU 'not-a-cpu' +// RISCV32-NEXT: note: valid target CPU values are: generic-rv32, rocket-rv32, sifive-e20, sifive-e21, sifive-e24, sifive-e31, sifive-e34, sifive-e76, syntacore-scr1-base, syntacore-scr1-max{{$}} + +// RUN: not %clang_cc1 -triple riscv64 -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix RISCV64 +// RISCV64: error: unknown target CPU 'not-a-cpu' +// RISCV64-NEXT: note: valid target CPU values are: generic-rv64, rocket-rv64, sifive-s21, sifive-s51, sifive-s54, sifive-s76, sifive-u54, sifive-u74, sifive-x280{{$}} + +// RUN: not %clang_cc1 -triple riscv32 -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE-RISCV32 +// TUNE-RISCV32: error: unknown target CPU 'not-a-cpu' +// TUNE-RISCV32-NEXT: note: valid target CPU values are: generic-rv32, rocket-rv32, sifive-e20, sifive-e21, sifive-e24, sifive-e31, sifive-e34, sifive-e76, syntacore-scr1-base, syntacore-scr1-max, generic, rocket, sifive-7-series{{$}} + +// RUN: not %clang_cc1 -triple riscv64 -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE-RISCV64 +// TUNE-RISCV64: error: unknown target CPU 'not-a-cpu' +// TUNE-RISCV64-NEXT: note: valid target CPU values are: generic-rv64, rocket-rv64, sifive-s21, sifive-s51, sifive-s54, sifive-s76, sifive-u54, sifive-u74, sifive-x280, generic, rocket, sifive-7-series{{$}} diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c index cd1b0bc157cc..466b262639a2 100644 --- a/clang/test/Misc/target-invalid-cpu-note.c +++ b/clang/test/Misc/target-invalid-cpu-note.c @@ -1,3 +1,4 @@ +// UNSUPPORTED: enable_enable_aarch64_hip09 // Use CHECK-NEXT instead of multiple CHECK-SAME to ensure we will fail if there is anything extra in the output. // RUN: not %clang_cc1 -triple armv5--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix ARM // ARM: error: unknown target CPU 'not-a-cpu' diff --git a/clang/test/lit.site.cfg.py.in b/clang/test/lit.site.cfg.py.in index 89b7cafdc1d8..7728be7d4d8d 100644 --- a/clang/test/lit.site.cfg.py.in +++ b/clang/test/lit.site.cfg.py.in @@ -44,10 +44,14 @@ config.have_llvm_driver = @LLVM_TOOL_LLVM_DRIVER_BUILD@ config.use_classic_flang = @LLVM_ENABLE_CLASSIC_FLANG@ config.build_for_openeuler = @BUILD_FOR_OPENEULER@ config.enable_enable_autotuner = @LLVM_ENABLE_AUTOTUNER@ +config.enable_enable_aarch64_hip09 = @LLVM_ENABLE_AARCH64_HIP09@ import lit.llvm lit.llvm.initialize(lit_config, config) +if config.enable_enable_aarch64_hip09: + config.available_features.add("enable_enable_aarch64_hip09") + # Let the main config do the real work. lit_config.load_config( config, os.path.join(config.clang_src_dir, "test/lit.cfg.py")) diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake index 8be5d4ba52c2..74e68e25d85c 100644 --- a/llvm/cmake/modules/HandleLLVMOptions.cmake +++ b/llvm/cmake/modules/HandleLLVMOptions.cmake @@ -128,6 +128,14 @@ else() set(LLVM_BUILD_FOR_COMMON 0) endif() +option(LLVM_ENABLE_AARCH64_HIP09 "Enable HIP09 Processor" ON) +if(LLVM_ENABLE_AARCH64_HIP09) + set(LLVM_ENABLE_AARCH64_HIP09 1) + add_definitions( -DENABLE_AARCH64_HIP09 ) +else() + set(LLVM_ENABLE_AARCH64_HIP09 0) +endif() + if(LLVM_ENABLE_EXPENSIVE_CHECKS) add_compile_definitions(EXPENSIVE_CHECKS) diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h index dc4cdfa8e90a..07cd2fcbb68d 100644 --- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h +++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h @@ -542,6 +542,13 @@ inline constexpr CpuInfo CpuInfos[] = { (AArch64::AEK_FP16 | AArch64::AEK_RAND | AArch64::AEK_SM4 | AArch64::AEK_SHA3 | AArch64::AEK_SHA2 | AArch64::AEK_AES | AArch64::AEK_MTE | AArch64::AEK_SB | AArch64::AEK_SSBS)}, +#if defined(ENABLE_AARCH64_HIP09) + {"hip09", ARMV8_5A, + (AArch64::AEK_AES | AArch64::AEK_SM4 | AArch64::AEK_SHA2 | + AArch64::AEK_SHA3 | AArch64::AEK_FP16 | AArch64::AEK_PROFILE | + AArch64::AEK_FP16FML | AArch64::AEK_SVE | AArch64::AEK_I8MM | + AArch64::AEK_F32MM | AArch64::AEK_F64MM | AArch64::AEK_BF16)}, +#endif }; // An alias for a CPU. diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index 8f50af4b71fd..c8bfd770f55f 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -296,6 +296,12 @@ def FeatureFuseAddSub2RegAndConstOne : SubtargetFeature< "fuse-addsub-2reg-const1", "HasFuseAddSub2RegAndConstOne", "true", "CPU fuses (a + b + 1) and (a - b - 1)">; +#ifdef ENABLE_AARCH64_HIP09 +def FeatureFuseMvnClz : SubtargetFeature< + "fuse-mvn-clz", "HasFuseMvnClz", "true", + "CPU fuses mvn+clz operations">; +#endif + def FeatureDisableLatencySchedHeuristic : SubtargetFeature< "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true", "Disable latency scheduling heuristic">; @@ -1205,6 +1211,21 @@ def TuneTSV110 : SubtargetFeature<"tsv110", "ARMProcFamily", "TSV110", FeatureFuseAES, FeaturePostRAScheduler]>; +#ifdef ENABLE_AARCH64_HIP09 +def TuneHIP09 : SubtargetFeature<"hip09", "ARMProcFamily", "HIP09", + "HiSilicon HIP-09 processors", [ + FeatureCustomCheapAsMoveHandling, + FeatureExperimentalZeroingPseudos, + FeatureFuseAES, + FeatureLSLFast, + FeatureAscendStoreAddress, + FeatureCmpBccFusion, + FeatureArithmeticBccFusion, + FeatureFuseLiterals, + FeatureFuseMvnClz, + FeaturePostRAScheduler]>; +#endif + def TuneAmpere1 : SubtargetFeature<"ampere1", "ARMProcFamily", "Ampere1", "Ampere Computing Ampere-1 processors", [ FeaturePostRAScheduler, @@ -1359,6 +1380,14 @@ def ProcessorFeatures { list TSV110 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, FeatureNEON, FeaturePerfMon, FeatureSPE, FeatureFullFP16, FeatureFP16FML, FeatureDotProd]; +#ifdef ENABLE_AARCH64_HIP09 + list HIP09 = [HasV8_5aOps, FeatureBF16, FeatureCrypto, FeatureFPARMv8, + FeatureMatMulInt8, FeatureMatMulFP32, FeatureMatMulFP64, + FeatureNEON, FeaturePerfMon, FeatureRandGen, FeatureSPE, + FeatureFullFP16, FeatureFP16FML, FeatureDotProd, + FeatureJS, FeatureComplxNum, FeatureSHA3, FeatureSM4, + FeatureSVE]; +#endif list Ampere1 = [HasV8_6aOps, FeatureNEON, FeaturePerfMon, FeatureSSBS, FeatureRandGen, FeatureSB, FeatureSHA2, FeatureSHA3, FeatureAES]; @@ -1464,8 +1493,15 @@ def : ProcessorModel<"thunderx2t99", ThunderX2T99Model, // Marvell ThunderX3T110 Processors. def : ProcessorModel<"thunderx3t110", ThunderX3T110Model, ProcessorFeatures.ThunderX3T110, [TuneThunderX3T110]>; + +// HiSilicon Processors. def : ProcessorModel<"tsv110", TSV110Model, ProcessorFeatures.TSV110, [TuneTSV110]>; +#ifdef ENABLE_AARCH64_HIP09 +// FIXME: HiSilicon HIP09 is currently modeled as a Cortex-A57. +def : ProcessorModel<"hip09", CortexA57Model, ProcessorFeatures.HIP09, + [TuneHIP09]>; +#endif // Support cyclone as an alias for apple-a7 so we can still LTO old bitcode. def : ProcessorModel<"cyclone", CycloneModel, ProcessorFeatures.AppleA7, diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp index 05d60872bf51..4963ec350db2 100644 --- a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp +++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp @@ -51,6 +51,12 @@ static bool isArithmeticBccPair(const MachineInstr *FirstMI, case AArch64::SUBSXrr: case AArch64::BICSWrr: case AArch64::BICSXrr: +#if defined(ENABLE_AARCH64_HIP09) + case AArch64::ADCSWr: + case AArch64::ADCSXr: + case AArch64::SBCSWr: + case AArch64::SBCSXr: +#endif return true; case AArch64::ADDSWrs: case AArch64::ADDSXrs: @@ -183,6 +189,20 @@ static bool isLiteralsPair(const MachineInstr *FirstMI, SecondMI.getOperand(3).getImm() == 16)) return true; +#if defined(ENABLE_AARCH64_HIP09) + // 32 bit immediate. + if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::MOVNWi) && + (SecondMI.getOpcode() == AArch64::MOVKWi && + SecondMI.getOperand(3).getImm() == 16)) + return true; + + // Lower half of 64 bit immediate. + if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::MOVNXi) && + (SecondMI.getOpcode() == AArch64::MOVKWi && + SecondMI.getOperand(3).getImm() == 16)) + return true; +#endif + // Upper half of 64 bit immediate. if ((FirstMI == nullptr || (FirstMI->getOpcode() == AArch64::MOVKXi && @@ -437,6 +457,37 @@ static bool isAddSub2RegAndConstOnePair(const MachineInstr *FirstMI, return false; } +#if defined(ENABLE_AARCH64_HIP09) +static bool isMvnClzPair(const MachineInstr *FirstMI, + const MachineInstr &SecondMI) { + // HIP09 supports fusion of MVN + CLZ. + // The CLZ can be fused with MVN and make execution faster. + // And the fusion is not allowed for shifted forms. + // + // Instruction alias info: + // 1. MVN , {, #} is equivalent to + // ORN , WZR, {, #} + // 2. MVN , {, #} is equivalent to + // ORN , XZR, {, #} + // Assume the 1st instr to be a wildcard if it is unspecified. + if ((FirstMI == nullptr || + ((FirstMI->getOpcode() == AArch64::ORNWrs) && + (FirstMI->getOperand(1).getReg() == AArch64::WZR) && + (!AArch64InstrInfo::hasShiftedReg(*FirstMI)))) && + (SecondMI.getOpcode() == AArch64::CLZWr)) + return true; + + if ((FirstMI == nullptr || + ((FirstMI->getOpcode() == AArch64::ORNXrs) && + (FirstMI->getOperand(1).getReg() == AArch64::XZR) && + (!AArch64InstrInfo::hasShiftedReg(*FirstMI)))) && + (SecondMI.getOpcode() == AArch64::CLZXr)) + return true; + + return false; +} +#endif + /// \brief Check if the instr pair, FirstMI and SecondMI, should be fused /// together. Given SecondMI, when FirstMI is unspecified, then check if /// SecondMI may be part of a fused pair at all. @@ -472,6 +523,10 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, if (ST.hasFuseAddSub2RegAndConstOne() && isAddSub2RegAndConstOnePair(FirstMI, SecondMI)) return true; +#if defined(ENABLE_AARCH64_HIP09) + if (ST.hasFuseMvnClz() && isMvnClzPair(FirstMI, SecondMI)) + return true; +#endif return false; } diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index 450e27b8a2af..ddf22364c78e 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -266,6 +266,15 @@ void AArch64Subtarget::initializeProperties() { PrefFunctionAlignment = Align(16); PrefLoopAlignment = Align(4); break; +#if defined(ENABLE_AARCH64_HIP09) + case HIP09: + CacheLineSize = 64; + PrefFunctionAlignment = Align(16); + PrefLoopAlignment = Align(4); + VScaleForTuning = 2; + DefaultSVETFOpts = TailFoldingOpts::Simple; + break; +#endif case ThunderX3T110: CacheLineSize = 64; PrefFunctionAlignment = Align(16); diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 5e20d16464c4..5f481f4f976a 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -87,7 +87,10 @@ public: ThunderXT83, ThunderXT88, ThunderX3T110, - TSV110 + TSV110, +#if defined(ENABLE_AARCH64_HIP09) + HIP09 +#endif }; protected: @@ -239,7 +242,11 @@ public: bool hasFusion() const { return hasArithmeticBccFusion() || hasArithmeticCbzFusion() || hasFuseAES() || hasFuseArithmeticLogic() || hasFuseCCSelect() || +#if defined(ENABLE_AARCH64_HIP09) + hasFuseAdrpAdd() || hasFuseLiterals() || hasFuseMvnClz(); +#else hasFuseAdrpAdd() || hasFuseLiterals(); +#endif } unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; } diff --git a/llvm/lib/Target/CMakeLists.txt b/llvm/lib/Target/CMakeLists.txt index 2739233f9ccb..501ce1f2fe53 100644 --- a/llvm/lib/Target/CMakeLists.txt +++ b/llvm/lib/Target/CMakeLists.txt @@ -2,6 +2,10 @@ list(APPEND LLVM_COMMON_DEPENDS intrinsics_gen) list(APPEND LLVM_TABLEGEN_FLAGS -I ${LLVM_MAIN_SRC_DIR}/lib/Target) +if(LLVM_ENABLE_AARCH64_HIP09) + list(APPEND LLVM_TABLEGEN_FLAGS "-DENABLE_AARCH64_HIP09") +endif() + add_llvm_component_library(LLVMTarget Target.cpp TargetIntrinsicInfo.cpp diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index d11dc605e188..8b23be02edc0 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -257,6 +257,9 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { // contents are specified in the various processor manuals. return StringSwitch(Part) .Case("0xd01", "tsv110") +#if defined(ENABLE_AARCH64_HIP09) + .Case("0xd02", "hip09") +#endif .Default("generic"); if (Implementer == "0x51") // Qualcomm Technologies, Inc. diff --git a/llvm/test/CodeGen/AArch64/cpus-hip09.ll b/llvm/test/CodeGen/AArch64/cpus-hip09.ll new file mode 100644 index 000000000000..dcf32e4dca89 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/cpus-hip09.ll @@ -0,0 +1,11 @@ +; REQUIRES: enable_enable_aarch64_hip09 +; This tests that llc accepts all valid AArch64 CPUs + +; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=hip09 2>&1 | FileCheck %s + +; CHECK-NOT: {{.*}} is not a recognized processor for this target +; INVALID: {{.*}} is not a recognized processor for this target + +define i32 @f(i64 %z) { + ret i32 0 +} diff --git a/llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir b/llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir new file mode 100644 index 000000000000..64bf159370f9 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir @@ -0,0 +1,20 @@ +# REQUIRES: enable_enable_aarch64_hip09 +# RUN: llc -o - %s -mtriple=aarch64-- -mattr=+fuse-mvn-clz -run-pass postmisched | FileCheck %s --check-prefixes=CHECK,FUSION +# RUN: llc -o - %s -mtriple=aarch64-- -mattr=-fuse-mvn-clz -run-pass postmisched | FileCheck %s --check-prefixes=CHECK,NOFUSION +--- +# CHECK-LABEL: name: fuse-mvn-clz +# CHECK: $w2 = ORNWrs $wzr, $w1, 0 +# FUSION: $w0 = CLZWr killed renamable $w2 +# CHECK: $w3 = ADDWri killed renamable $w1, 1, 0 +# NOFUSION: $w0 = CLZWr killed renamable $w2 +name: fuse-mvn-clz +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2, $w3 + + $w2 = ORNWrs $wzr, $w1, 0 + $w3 = ADDWri killed renamable $w1, 1, 0 + $w0 = CLZWr killed renamable $w2 + RET undef $lr, implicit $w0 +... diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll b/llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll new file mode 100644 index 000000000000..d67fa5b4374c --- /dev/null +++ b/llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll @@ -0,0 +1,73 @@ +; REQUIRES: enable_enable_aarch64_hip09 +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=hip09 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE-HIP09 + +@g = common local_unnamed_addr global ptr null, align 8 + +define dso_local ptr @litp(i32 %a, i32 %b) { +entry: + %add = add nsw i32 %b, %a + %idx.ext = sext i32 %add to i64 + %add.ptr = getelementptr i8, ptr @litp, i64 %idx.ext + store ptr %add.ptr, ptr @g, align 8 + ret ptr %add.ptr + +; CHECK-LABEL: litp: +; CHECK: adrp [[R:x[0-9]+]], litp +; CHECKFUSE-NEXT: add {{x[0-9]+}}, [[R]], :lo12:litp +} + +define dso_local ptr @litp_tune_generic(i32 %a, i32 %b) "tune-cpu"="generic" { +entry: + %add = add nsw i32 %b, %a + %idx.ext = sext i32 %add to i64 + %add.ptr = getelementptr i8, ptr @litp_tune_generic, i64 %idx.ext + store ptr %add.ptr, ptr @g, align 8 + ret ptr %add.ptr + +; CHECK-LABEL: litp_tune_generic: +; CHECK: adrp [[R:x[0-9]+]], litp_tune_generic +; CHECK-NEXT: add {{x[0-9]+}}, [[R]], :lo12:litp_tune_generic +} + +define dso_local i32 @liti(i32 %a, i32 %b) { +entry: + %add = add i32 %a, -262095121 + %add1 = add i32 %add, %b + ret i32 %add1 + +; CHECK-LABEL: liti: +; CHECK: mov [[R:w[0-9]+]], {{#[0-9]+}} +; CHECKDONT-NEXT: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECKFUSE-NEXT: movk [[R]], {{#[0-9]+}}, lsl #16 +; CHECKFUSE-HIP09: movk [[R]], {{#[0-9]+}}, lsl #16 +} + +; Function Attrs: norecurse nounwind readnone +define dso_local i64 @litl(i64 %a, i64 %b) { +entry: + %add = add i64 %a, 2208998440489107183 + %add1 = add i64 %add, %b + ret i64 %add1 + +; CHECK-LABEL: litl: +; CHECK: mov [[R:x[0-9]+]], {{#[0-9]+}} +; CHECKDONT-NEXT: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} +; CHECK-NEXT: movk [[R]], {{#[0-9]+}}, lsl #16 +; CHECK: movk [[R]], {{#[0-9]+}}, lsl #32 +; CHECK-NEXT: movk [[R]], {{#[0-9]+}}, lsl #48 +} + +; Function Attrs: norecurse nounwind readnone +define dso_local double @litf() { +entry: + ret double 0x400921FB54442D18 + +; CHECK-LABEL: litf: +; CHECK-DONT: adrp [[ADDR:x[0-9]+]], [[CSTLABEL:.LCP.*]] +; CHECK-DONT-NEXT: ldr {{d[0-9]+}}, {{[[]}}[[ADDR]], :lo12:[[CSTLABEL]]{{[]]}} +; CHECKFUSE-HIP09: mov [[R:x[0-9]+]], #11544 +; CHECKFUSE-HIP09: movk [[R]], #21572, lsl #16 +; CHECKFUSE-HIP09: movk [[R]], #8699, lsl #32 +; CHECKFUSE-HIP09: movk [[R]], #16393, lsl #48 +; CHECKFUSE-HIP09: fmov {{d[0-9]+}}, [[R]] +} diff --git a/llvm/test/CodeGen/AArch64/remat-hip09.ll b/llvm/test/CodeGen/AArch64/remat-hip09.ll new file mode 100644 index 000000000000..aec0d18ae73f --- /dev/null +++ b/llvm/test/CodeGen/AArch64/remat-hip09.ll @@ -0,0 +1,18 @@ +; REQUIRES: enable_enable_aarch64_hip09 +; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=hip09 -o - %s | FileCheck %s + +%X = type { i64, i64, i64 } +declare void @f(ptr) +define void @t() { +entry: + %tmp = alloca %X + call void @f(ptr %tmp) +; CHECK: add x0, sp, #8 +; CHECK-NOT: mov +; CHECK-NEXT: bl f + call void @f(ptr %tmp) +; CHECK: add x0, sp, #8 +; CHECK-NOT: mov +; CHECK-NEXT: bl f + ret void +} diff --git a/llvm/test/lit.site.cfg.py.in b/llvm/test/lit.site.cfg.py.in index 20c1ecca1d43..6145a514f008 100644 --- a/llvm/test/lit.site.cfg.py.in +++ b/llvm/test/lit.site.cfg.py.in @@ -64,10 +64,14 @@ config.have_llvm_driver = @LLVM_TOOL_LLVM_DRIVER_BUILD@ config.use_classic_flang = @LLVM_ENABLE_CLASSIC_FLANG@ config.enable_enable_autotuner = @LLVM_ENABLE_AUTOTUNER@ config.enable_build_for_common = @LLVM_BUILD_FOR_COMMON@ +config.enable_enable_aarch64_hip09 = @LLVM_ENABLE_AARCH64_HIP09@ import lit.llvm lit.llvm.initialize(lit_config, config) +if config.enable_enable_aarch64_hip09: + config.available_features.add("enable_enable_aarch64_hip09") + # Let the main config do the real work. lit_config.load_config( config, os.path.join(config.llvm_src_root, "test/lit.cfg.py")) diff --git a/llvm/unittests/TargetParser/Host.cpp b/llvm/unittests/TargetParser/Host.cpp index 452d0326c1e2..4b4c81514896 100644 --- a/llvm/unittests/TargetParser/Host.cpp +++ b/llvm/unittests/TargetParser/Host.cpp @@ -250,6 +250,11 @@ CPU part : 0x0a1 EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x48\n" "CPU part : 0xd01"), "tsv110"); +#if defined(ENABLE_AARCH64_HIP09) + EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x48\n" + "CPU part : 0xd02"), + "hip09"); +#endif // Verify A64FX. const std::string A64FXProcCpuInfo = R"( diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp index 741d5a2d4b48..94e0047e567b 100644 --- a/llvm/unittests/TargetParser/TargetParserTest.cpp +++ b/llvm/unittests/TargetParser/TargetParserTest.cpp @@ -1421,6 +1421,18 @@ INSTANTIATE_TEST_SUITE_P( AArch64::AEK_PROFILE | AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_DOTPROD, "8.2-A"), +#if defined(ENABLE_AARCH64_HIP09) + ARMCPUTestParams( + "hip09", "armv8.5-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_FP | AArch64::AEK_SIMD | + AArch64::AEK_RAS | AArch64::AEK_LSE | AArch64::AEK_RDM | + AArch64::AEK_RCPC | AArch64::AEK_DOTPROD | AArch64::AEK_AES | + AArch64::AEK_SM4 | AArch64::AEK_SHA2 | AArch64::AEK_SHA3 | + AArch64::AEK_FP16 | AArch64::AEK_PROFILE | + AArch64::AEK_FP16FML | AArch64::AEK_SVE | AArch64::AEK_I8MM | + AArch64::AEK_F32MM | AArch64::AEK_F64MM | AArch64::AEK_BF16, + "8.5-A"), +#endif ARMCPUTestParams("a64fx", "armv8.2-a", "crypto-neon-fp-armv8", AArch64::AEK_CRC | AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_FP | @@ -1437,7 +1449,11 @@ INSTANTIATE_TEST_SUITE_P( "8.2-A"))); // Note: number of CPUs includes aliases. +#if defined(ENABLE_AARCH64_HIP09) +static constexpr unsigned NumAArch64CPUArchs = 63; +#else static constexpr unsigned NumAArch64CPUArchs = 62; +#endif TEST(TargetParserTest, testAArch64CPUArchList) { SmallVector List; -- Gitee