代码拉取完成,页面将自动刷新
For matmul + transdata post fusion with padding condtion, current akg in AutoPoly generate uncorrect halide IR without the else block statement
realize T_matmul_input_0_input_1<float32>([0, 1908], [0, 114], [0, 16], [0, 16]) {
3 produce T_matmul_input_0_input_1 {
4 for (ax0, 0, 1908) {
5 for (ax1, 0, 114) {
6 for (ax2, 0, 16) {
7 for (ax3, 0, 16) {
8 T_matmul_input_0_input_1(ax0, ax1, ax2, ax3) = 0f
9 for (ko, 0, 64) {
10 for (ki, 0, 16) {
11 // attr [[iter_var(ko, range(min=0, ext=64)), iter_var(ki, range(min=0, ext=16))]] reduce_update = ""
12 T_matmul_input_0_input_1(ax0, ax1, ax2, ax3) = mad(T_matmul_input_0_input_1(ax0, ax1, ax2, ax3), float32((input_0(ko, ax1, ax2, ki)*input_1(ko, ax0, ax3, ki)))):float32:PI
13 }
14 }
15 }
16 }
17 }
18 }
19 }
20 // attr [extern(T_transdata_T_matmul_input_0_input_1, 0x12a07d0)] realize_scope = ""
21 realize T_transdata_T_matmul_input_0_input_1<float32>([0, 1824], [0, 30522]) {
22 produce T_transdata_T_matmul_input_0_input_1 {
23 // attr [[buffer(T_matmul_input_0_input_1, 0x129aa70), Tensor(shape=[1908, 114, 16, 16], op.name=T_matmul_input_0_input_1)]] buffer_bind_scope = tvm_tuple(0, 1908, 0, 114, 0, 16, 0, 16):handle:I
24 // attr [[buffer(T_transdata_T_matmul_input_0_input_1, 0x129e600), Tensor(shape=[1824, 30522], op.name=T_transdata_T_matmul_input_0_input_1)]] buffer_bind_scope = tvm_tuple(0, 1824, 0, 30522):ha ndle:I
25 // attr [0] extern_scope = 0
26 for (i, 0, 1908) {
27 for (j, 0, 114) {
28 for (k, 0, 16) {
29 for (i_0, 0, 16) {
30 if (((((j*16) + k) < 1824) && (((i*16) + i_0) < 30522))) {
31 tensor_store(T_transdata_T_matmul_input_0_input_1, tensor_load(T_matmul_input_0_input_1, i, j, k, i_0):float32:PI, ((j*16) + k), ((i*16) + i_0)):float32:PI
32 }
33 }
34 }
35 }
36 }
37 }
38 }
39 }
We need to optimize the AutoPoly Pass to generate the correct Halide IR as follows:
76 // attr [0] pragma_cube_l0write = 1
77 for (cc10, 0, 16) {
78 for (cc11, 0, 16) {
79 T_matmul_input_0_input_1_local_UB(0, 0, cc10, cc11) = T_matmul_input_0_input_1_local_UB_local_L0C(0, 0, cc10, cc11)
80 }
81 }
82 // attr [0] pragma_fuse_vector = 1
83 if ((cc0 <= 1906)) {
84 // attr [placeholder(T_transdata_T_matmul_input_0_input_1_local_UB, 0x146cae0)] realize_scope = "local.UB"
85 realize T_transdata_T_matmul_input_0_input_1_local_UB<float32>([0, 16], [0, 16]) {
86 for (cc12, 0, 16) {
87 for (cc13, 0, 16) {
88 T_transdata_T_matmul_input_0_input_1_local_UB(cc12, cc13) = T_matmul_input_0_input_1_local_UB(0, 0, cc12, cc13)
89 }
90 }
91 for (cc10, 0, 16) {
92 for (cc11, 0, 16) {
93 T_transdata_T_matmul_input_0_input_1(((16*cc1) + cc10), ((16*cc0) + cc11)) = T_transdata_T_matmul_input_0_input_1_local_UB(cc10, cc11)
94 }
95 }
96 }
97 } else {
98 // attr [placeholder(T_transdata_T_matmul_input_0_input_1_local_UB, 0x1356490)] realize_scope = "local.UB"
99 realize T_transdata_T_matmul_input_0_input_1_local_UB<float32>([0, 16], [0, 16]) {
100 for (cc12, 0, 16) {
101 for (cc13, 0, 10) {
102 T_transdata_T_matmul_input_0_input_1_local_UB(cc12, cc13) = T_matmul_input_0_input_1_local_UB(0, 0, cc12, cc13)
103 }
104 }
105 for (cc10, 0, 16) {
106 for (cc11, 0, 10) {
107 T_transdata_T_matmul_input_0_input_1(((16*cc1) + cc10), (cc11 + 30512)) = T_transdata_T_matmul_input_0_input_1_local_UB(cc10, cc11)
108 }
109 }
110 }
111 }
112 }
113 }
114 }
Hey dylangeng, Welcome to MindSpore Community.
All of the projects in MindSpore Community are maintained by @mindspore-ci-bot.
That means the developers can comment below every pull request or issue to trigger Bot Commands.
Please follow instructions at https://gitee.com/mindspore/community/blob/master/command.md to find the details.
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。
登录 后才可以发表评论