代码拉取完成,页面将自动刷新
同步操作将从 src-openEuler/gcc 强制同步,此操作会覆盖自 Fork 仓库以来所做的任何修改,且无法恢复!!!
确定后同步将在后台操作,完成时将刷新页面,请耐心等待。
From 013544d0b477647c8835a8806c75e7b09155b8ed Mon Sep 17 00:00:00 2001
From: benniaobufeijiushiji <linda7@huawei.com>
Date: Mon, 8 Aug 2022 09:13:53 +0800
Subject: [PATCH 31/35] [loop-vect] Transfer arrays using registers between
loops For vectorized stores in loop, if all succeed loops immediately use the
data, transfer data using registers instead of load store to prevent overhead
from memory access.
---
gcc/testsuite/gcc.dg/vect/vect-perm-1.c | 45 ++++++
gcc/tree-vect-stmts.c | 181 ++++++++++++++++++++++++
2 files changed, 226 insertions(+)
create mode 100644 gcc/testsuite/gcc.dg/vect/vect-perm-1.c
diff --git a/gcc/testsuite/gcc.dg/vect/vect-perm-1.c b/gcc/testsuite/gcc.dg/vect/vect-perm-1.c
new file mode 100644
index 000000000..d8b29fbd5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-perm-1.c
@@ -0,0 +1,45 @@
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
+/* { dg-options "-O3 -fdump-tree-vect-all-details -save-temps" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+static unsigned inline abs2 (unsigned a)
+{
+ unsigned s = ((a>>15)&0x10001)*0xffff;
+ return (a+s)^s;
+}
+
+int foo (unsigned *a00, unsigned *a11, unsigned *a22, unsigned *a33)
+{
+ unsigned tmp[4][4];
+ unsigned a0, a1, a2, a3;
+ int sum = 0;
+ for (int i = 0; i < 4; i++)
+ {
+ int t0 = a00[i] + a11[i];
+ int t1 = a00[i] - a11[i];
+ int t2 = a22[i] + a33[i];
+ int t3 = a22[i] - a33[i];
+ tmp[i][0] = t0 + t2;
+ tmp[i][2] = t0 - t2;
+ tmp[i][1] = t1 + t3;
+ tmp[i][3] = t1 - t3;
+ }
+ for (int i = 0; i < 4; i++)
+ {
+ int t0 = tmp[0][i] + tmp[1][i];
+ int t1 = tmp[0][i] - tmp[1][i];
+ int t2 = tmp[2][i] + tmp[3][i];
+ int t3 = tmp[2][i] - tmp[3][i];
+ a0 = t0 + t2;
+ a2 = t0 - t2;
+ a1 = t1 + t3;
+ a3 = t1 - t3;
+ sum += abs2 (a0) + abs2 (a1) + abs2 (a2) + abs2 (a3);
+ }
+ return (((unsigned short) sum) + ((unsigned) sum >>16)) >> 1;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 16 "vect" } } */
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 2c2197022..98b233718 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -2276,6 +2276,173 @@ vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
return NULL_TREE;
}
+/* Check succeedor BB, BB without load is regarded as empty BB. Ignore empty
+ BB in DFS. */
+
+static unsigned
+mem_refs_in_bb (basic_block bb, vec<gimple *> &stmts)
+{
+ unsigned num = 0;
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple *stmt = gsi_stmt (gsi);
+ if (is_gimple_debug (stmt))
+ continue;
+ if (is_gimple_assign (stmt) && gimple_has_mem_ops (stmt)
+ && !gimple_has_volatile_ops (stmt))
+ {
+ if (gimple_assign_rhs_code (stmt) == MEM_REF
+ || gimple_assign_rhs_code (stmt) == ARRAY_REF)
+ {
+ stmts.safe_push (stmt);
+ num++;
+ }
+ else if (TREE_CODE (gimple_get_lhs (stmt)) == MEM_REF
+ || TREE_CODE (gimple_get_lhs (stmt)) == ARRAY_REF)
+ num++;
+ }
+ }
+ return num;
+}
+
+static bool
+check_same_base (vec<data_reference_p> *datarefs, data_reference_p dr)
+{
+ for (unsigned ui = 0; ui < datarefs->length (); ui++)
+ {
+ tree op1 = TREE_OPERAND (DR_BASE_OBJECT (dr), 0);
+ tree op2 = TREE_OPERAND (DR_BASE_OBJECT ((*datarefs)[ui]), 0);
+ if (TREE_CODE (op1) != TREE_CODE (op2))
+ continue;
+ if (TREE_CODE (op1) == ADDR_EXPR)
+ {
+ op1 = TREE_OPERAND (op1, 0);
+ op2 = TREE_OPERAND (op2, 0);
+ }
+ enum tree_code code = TREE_CODE (op1);
+ switch (code)
+ {
+ case VAR_DECL:
+ if (DECL_NAME (op1) == DECL_NAME (op2)
+ && DR_IS_READ ((*datarefs)[ui]))
+ return true;
+ break;
+ case SSA_NAME:
+ if (SSA_NAME_VERSION (op1) == SSA_NAME_VERSION (op2)
+ && DR_IS_READ ((*datarefs)[ui]))
+ return true;
+ break;
+ default:
+ break;
+ }
+ }
+ return false;
+}
+
+/* Iterate all load STMTS, if staisfying same base vectorized stmt, then return,
+ Otherwise, set false to SUCCESS. */
+
+static void
+check_vec_use (loop_vec_info loop_vinfo, vec<gimple *> &stmts,
+ stmt_vec_info stmt_info, bool &success)
+{
+ if (stmt_info == NULL)
+ {
+ success = false;
+ return;
+ }
+ if (DR_IS_READ (stmt_info->dr_aux.dr))
+ {
+ success = false;
+ return;
+ }
+ unsigned ui = 0;
+ gimple *candidate = NULL;
+ FOR_EACH_VEC_ELT (stmts, ui, candidate)
+ {
+ if (TREE_CODE (TREE_TYPE (gimple_get_lhs (candidate))) != VECTOR_TYPE)
+ continue;
+
+ if (candidate->bb != candidate->bb->loop_father->header)
+ {
+ success = false;
+ return;
+ }
+ auto_vec<data_reference_p> datarefs;
+ tree res = find_data_references_in_bb (candidate->bb->loop_father,
+ candidate->bb, &datarefs);
+ if (res == chrec_dont_know)
+ {
+ success = false;
+ return;
+ }
+ if (check_same_base (&datarefs, stmt_info->dr_aux.dr))
+ return;
+ }
+ success = false;
+}
+
+/* Deep first search from present BB. If succeedor has load STMTS,
+ stop further searching. */
+
+static void
+dfs_check_bb (loop_vec_info loop_vinfo, basic_block bb, stmt_vec_info stmt_info,
+ bool &success, vec<basic_block> &visited_bbs)
+{
+ if (bb == cfun->cfg->x_exit_block_ptr)
+ {
+ success = false;
+ return;
+ }
+ if (!success || visited_bbs.contains (bb) || bb == loop_vinfo->loop->latch)
+ return;
+
+ visited_bbs.safe_push (bb);
+ auto_vec<gimple *> stmts;
+ unsigned num = mem_refs_in_bb (bb, stmts);
+ /* Empty BB. */
+ if (num == 0)
+ {
+ edge e;
+ edge_iterator ei;
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ dfs_check_bb (loop_vinfo, e->dest, stmt_info, success, visited_bbs);
+ if (!success)
+ return;
+ }
+ return;
+ }
+ /* Non-empty BB. */
+ check_vec_use (loop_vinfo, stmts, stmt_info, success);
+}
+
+/* For grouped store, if all succeedors of present BB have vectorized load
+ from same base of store. If so, set memory_access_type using
+ VMAT_CONTIGUOUS_PERMUTE instead of VMAT_LOAD_STORE_LANES. */
+
+static bool
+conti_perm (stmt_vec_info stmt_vinfo, loop_vec_info loop_vinfo)
+{
+ gimple *stmt = stmt_vinfo->stmt;
+ if (gimple_code (stmt) != GIMPLE_ASSIGN)
+ return false;
+
+ if (DR_IS_READ (stmt_vinfo->dr_aux.dr))
+ return false;
+
+ basic_block bb = stmt->bb;
+ bool success = true;
+ auto_vec<basic_block> visited_bbs;
+ visited_bbs.safe_push (bb);
+ edge e;
+ edge_iterator ei;
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ dfs_check_bb (loop_vinfo, e->dest, stmt_vinfo, success, visited_bbs);
+ return success;
+}
+
/* A subroutine of get_load_store_type, with a subset of the same
arguments. Handle the case where STMT_INFO is part of a grouped load
or store.
@@ -2434,6 +2601,20 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
*memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
overrun_p = would_overrun_p;
}
+
+ if (*memory_access_type == VMAT_LOAD_STORE_LANES
+ && TREE_CODE (loop_vinfo->num_iters) == INTEGER_CST
+ && maybe_eq (tree_to_shwi (loop_vinfo->num_iters),
+ loop_vinfo->vectorization_factor)
+ && conti_perm (stmt_info, loop_vinfo)
+ && (vls_type == VLS_LOAD
+ ? vect_grouped_load_supported (vectype, single_element_p,
+ group_size)
+ : vect_grouped_store_supported (vectype, group_size)))
+ {
+ *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
+ overrun_p = would_overrun_p;
+ }
}
/* As a last resort, trying using a gather load or scatter store.
--
2.27.0.windows.1
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。