1 Star 0 Fork 118

杨晨光/gcc

forked from src-openEuler/gcc 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
0079-loop-vect-Transfer-arrays-using-registers-between-lo.patch 7.64 KB
一键复制 编辑 原始数据 按行查看 历史
From 013544d0b477647c8835a8806c75e7b09155b8ed Mon Sep 17 00:00:00 2001
From: benniaobufeijiushiji <linda7@huawei.com>
Date: Mon, 8 Aug 2022 09:13:53 +0800
Subject: [PATCH 31/35] [loop-vect] Transfer arrays using registers between
loops For vectorized stores in loop, if all succeed loops immediately use the
data, transfer data using registers instead of load store to prevent overhead
from memory access.
---
gcc/testsuite/gcc.dg/vect/vect-perm-1.c | 45 ++++++
gcc/tree-vect-stmts.c | 181 ++++++++++++++++++++++++
2 files changed, 226 insertions(+)
create mode 100644 gcc/testsuite/gcc.dg/vect/vect-perm-1.c
diff --git a/gcc/testsuite/gcc.dg/vect/vect-perm-1.c b/gcc/testsuite/gcc.dg/vect/vect-perm-1.c
new file mode 100644
index 000000000..d8b29fbd5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-perm-1.c
@@ -0,0 +1,45 @@
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
+/* { dg-options "-O3 -fdump-tree-vect-all-details -save-temps" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+static unsigned inline abs2 (unsigned a)
+{
+ unsigned s = ((a>>15)&0x10001)*0xffff;
+ return (a+s)^s;
+}
+
+int foo (unsigned *a00, unsigned *a11, unsigned *a22, unsigned *a33)
+{
+ unsigned tmp[4][4];
+ unsigned a0, a1, a2, a3;
+ int sum = 0;
+ for (int i = 0; i < 4; i++)
+ {
+ int t0 = a00[i] + a11[i];
+ int t1 = a00[i] - a11[i];
+ int t2 = a22[i] + a33[i];
+ int t3 = a22[i] - a33[i];
+ tmp[i][0] = t0 + t2;
+ tmp[i][2] = t0 - t2;
+ tmp[i][1] = t1 + t3;
+ tmp[i][3] = t1 - t3;
+ }
+ for (int i = 0; i < 4; i++)
+ {
+ int t0 = tmp[0][i] + tmp[1][i];
+ int t1 = tmp[0][i] - tmp[1][i];
+ int t2 = tmp[2][i] + tmp[3][i];
+ int t3 = tmp[2][i] - tmp[3][i];
+ a0 = t0 + t2;
+ a2 = t0 - t2;
+ a1 = t1 + t3;
+ a3 = t1 - t3;
+ sum += abs2 (a0) + abs2 (a1) + abs2 (a2) + abs2 (a3);
+ }
+ return (((unsigned short) sum) + ((unsigned) sum >>16)) >> 1;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 16 "vect" } } */
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 2c2197022..98b233718 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -2276,6 +2276,173 @@ vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
return NULL_TREE;
}
+/* Check succeedor BB, BB without load is regarded as empty BB. Ignore empty
+ BB in DFS. */
+
+static unsigned
+mem_refs_in_bb (basic_block bb, vec<gimple *> &stmts)
+{
+ unsigned num = 0;
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple *stmt = gsi_stmt (gsi);
+ if (is_gimple_debug (stmt))
+ continue;
+ if (is_gimple_assign (stmt) && gimple_has_mem_ops (stmt)
+ && !gimple_has_volatile_ops (stmt))
+ {
+ if (gimple_assign_rhs_code (stmt) == MEM_REF
+ || gimple_assign_rhs_code (stmt) == ARRAY_REF)
+ {
+ stmts.safe_push (stmt);
+ num++;
+ }
+ else if (TREE_CODE (gimple_get_lhs (stmt)) == MEM_REF
+ || TREE_CODE (gimple_get_lhs (stmt)) == ARRAY_REF)
+ num++;
+ }
+ }
+ return num;
+}
+
+static bool
+check_same_base (vec<data_reference_p> *datarefs, data_reference_p dr)
+{
+ for (unsigned ui = 0; ui < datarefs->length (); ui++)
+ {
+ tree op1 = TREE_OPERAND (DR_BASE_OBJECT (dr), 0);
+ tree op2 = TREE_OPERAND (DR_BASE_OBJECT ((*datarefs)[ui]), 0);
+ if (TREE_CODE (op1) != TREE_CODE (op2))
+ continue;
+ if (TREE_CODE (op1) == ADDR_EXPR)
+ {
+ op1 = TREE_OPERAND (op1, 0);
+ op2 = TREE_OPERAND (op2, 0);
+ }
+ enum tree_code code = TREE_CODE (op1);
+ switch (code)
+ {
+ case VAR_DECL:
+ if (DECL_NAME (op1) == DECL_NAME (op2)
+ && DR_IS_READ ((*datarefs)[ui]))
+ return true;
+ break;
+ case SSA_NAME:
+ if (SSA_NAME_VERSION (op1) == SSA_NAME_VERSION (op2)
+ && DR_IS_READ ((*datarefs)[ui]))
+ return true;
+ break;
+ default:
+ break;
+ }
+ }
+ return false;
+}
+
+/* Iterate all load STMTS, if staisfying same base vectorized stmt, then return,
+ Otherwise, set false to SUCCESS. */
+
+static void
+check_vec_use (loop_vec_info loop_vinfo, vec<gimple *> &stmts,
+ stmt_vec_info stmt_info, bool &success)
+{
+ if (stmt_info == NULL)
+ {
+ success = false;
+ return;
+ }
+ if (DR_IS_READ (stmt_info->dr_aux.dr))
+ {
+ success = false;
+ return;
+ }
+ unsigned ui = 0;
+ gimple *candidate = NULL;
+ FOR_EACH_VEC_ELT (stmts, ui, candidate)
+ {
+ if (TREE_CODE (TREE_TYPE (gimple_get_lhs (candidate))) != VECTOR_TYPE)
+ continue;
+
+ if (candidate->bb != candidate->bb->loop_father->header)
+ {
+ success = false;
+ return;
+ }
+ auto_vec<data_reference_p> datarefs;
+ tree res = find_data_references_in_bb (candidate->bb->loop_father,
+ candidate->bb, &datarefs);
+ if (res == chrec_dont_know)
+ {
+ success = false;
+ return;
+ }
+ if (check_same_base (&datarefs, stmt_info->dr_aux.dr))
+ return;
+ }
+ success = false;
+}
+
+/* Deep first search from present BB. If succeedor has load STMTS,
+ stop further searching. */
+
+static void
+dfs_check_bb (loop_vec_info loop_vinfo, basic_block bb, stmt_vec_info stmt_info,
+ bool &success, vec<basic_block> &visited_bbs)
+{
+ if (bb == cfun->cfg->x_exit_block_ptr)
+ {
+ success = false;
+ return;
+ }
+ if (!success || visited_bbs.contains (bb) || bb == loop_vinfo->loop->latch)
+ return;
+
+ visited_bbs.safe_push (bb);
+ auto_vec<gimple *> stmts;
+ unsigned num = mem_refs_in_bb (bb, stmts);
+ /* Empty BB. */
+ if (num == 0)
+ {
+ edge e;
+ edge_iterator ei;
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ dfs_check_bb (loop_vinfo, e->dest, stmt_info, success, visited_bbs);
+ if (!success)
+ return;
+ }
+ return;
+ }
+ /* Non-empty BB. */
+ check_vec_use (loop_vinfo, stmts, stmt_info, success);
+}
+
+/* For grouped store, if all succeedors of present BB have vectorized load
+ from same base of store. If so, set memory_access_type using
+ VMAT_CONTIGUOUS_PERMUTE instead of VMAT_LOAD_STORE_LANES. */
+
+static bool
+conti_perm (stmt_vec_info stmt_vinfo, loop_vec_info loop_vinfo)
+{
+ gimple *stmt = stmt_vinfo->stmt;
+ if (gimple_code (stmt) != GIMPLE_ASSIGN)
+ return false;
+
+ if (DR_IS_READ (stmt_vinfo->dr_aux.dr))
+ return false;
+
+ basic_block bb = stmt->bb;
+ bool success = true;
+ auto_vec<basic_block> visited_bbs;
+ visited_bbs.safe_push (bb);
+ edge e;
+ edge_iterator ei;
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ dfs_check_bb (loop_vinfo, e->dest, stmt_vinfo, success, visited_bbs);
+ return success;
+}
+
/* A subroutine of get_load_store_type, with a subset of the same
arguments. Handle the case where STMT_INFO is part of a grouped load
or store.
@@ -2434,6 +2601,20 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
*memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
overrun_p = would_overrun_p;
}
+
+ if (*memory_access_type == VMAT_LOAD_STORE_LANES
+ && TREE_CODE (loop_vinfo->num_iters) == INTEGER_CST
+ && maybe_eq (tree_to_shwi (loop_vinfo->num_iters),
+ loop_vinfo->vectorization_factor)
+ && conti_perm (stmt_info, loop_vinfo)
+ && (vls_type == VLS_LOAD
+ ? vect_grouped_load_supported (vectype, single_element_p,
+ group_size)
+ : vect_grouped_store_supported (vectype, group_size)))
+ {
+ *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
+ overrun_p = would_overrun_p;
+ }
}
/* As a last resort, trying using a gather load or scatter store.
--
2.27.0.windows.1
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/yangchen_guang/gcc.git
git@gitee.com:yangchen_guang/gcc.git
yangchen_guang
gcc
gcc
master

搜索帮助