代码拉取完成,页面将自动刷新
name | about | labels |
---|---|---|
Bug Report | Use this template for reporting a bug | kind/bug |
Ascend
/GPU
/CPU
):device gpu
{
"composite": true,
"composite_graph": "3979.3979",
"id": 4031,
"input_desc": [
[
{
"data_type": "float16",
"format": "DefaultFormat",
"name": "input_0",
"shape": [
1024,
768
],
"tensor_name": "output_0_6"
}
],
[
{
"data_type": "float32",
"format": "DefaultFormat",
"name": "input_0",
"shape": [
1024,
1
],
"tensor_name": "output_0_9"
}
]
],
"op": "Fused_Cast_LessEqual_Cast_Add_Mul_Cast_Mul_Add_Cast_Cast_ReduceSum_Cast_Mul_Sub__more_split_18218696765951249700",
"op_desc": [
{
"attr": [
{
"data_type": "str",
"name": "dst_type",
"value": "float16"
}
],
"impl_path": "",
"input_desc": [
[
{
"data_type": "float32",
"format": "DefaultFormat",
"name": "input_0",
"shape": [
1024,
1
],
"tensor_name": "output_0_9"
}
]
],
"name": "Cast",
"output_desc": [
{
"data_type": "float16",
"format": "DefaultFormat",
"name": "output_0",
"shape": [
1024,
1
],
"tensor_name": "output_0_10"
}
]
},
{
"attr": null,
"impl_path": "",
"input_desc": [
[
{
"data_type": "float16",
"format": "DefaultFormat",
"name": "input_0",
"shape": [
1024,
1
],
"tensor_name": "output_0_10"
}
],
[
{
"data_type": "float16",
"format": "DefaultFormat",
"name": "input_1",
"shape": [
1
],
"tensor_name": "input_17",
"value": 0.0013017654418945312
}
]
],
"name": "Mul",
"output_desc": [
{
"data_type": "float16",
"format": "DefaultFormat",
"name": "output_0",
"shape": [
1024,
1
],
"tensor_name": "output_0_11"
}
]
},
{
"attr": null,
"impl_path": "",
"input_desc": [
[
{
"data_type": "float16",
"format": "DefaultFormat",
"name": "input_0",
"shape": [
1024,
768
],
"tensor_name": "output_0_6"
}
],
[
{
"data_type": "float16",
"format": "DefaultFormat",
"name": "input_1",
"shape": [
1024,
1
],
"tensor_name": "output_0_11"
}
]
],
"name": "Sub",
"output_desc": [
{
"data_type": "float16",
"format": "DefaultFormat",
"name": "output_0",
"shape": [
1024,
768
],
"tensor_name": "output_0_12"
}
]
},
{
"attr": null,
"impl_path": "",
"input_desc": [
[
{
"data_type": "float16",
"format": "DefaultFormat",
"name": "input_0",
"shape": [
1024,
768
],
"tensor_name": "output_0_12"
}
],
[
{
"data_type": "float16",
"format": "DefaultFormat",
"name": "input_1",
"shape": [
1024,
768
],
"tensor_name": "output_0_12"
}
]
],
"name": "Mul",
"output_desc": [
{
"data_type": "float16",
"format": "DefaultFormat",
"name": "output_0",
"shape": [
1024,
768
],
"tensor_name": "output_0_13"
}
]
},
{
"attr": [
{
"data_type": "str",
"name": "dst_type",
"value": "float32"
}
],
"impl_path": "",
"input_desc": [
[
{
"data_type": "float16",
"format": "DefaultFormat",
"name": "input_0",
"shape": [
1024,
768
],
"tensor_name": "output_0_13"
}
]
],
"name": "Cast",
"output_desc": [
{
"data_type": "float32",
"format": "DefaultFormat",
"name": "output_0",
"shape": [
1024,
768
],
"tensor_name": "output_0_14"
}
]
},
{
"attr": [
{
"data_type": "str",
"name": "stitch",
"value": "common"
},
{
"data_type": "listInt",
"name": "axis",
"value": [
1
]
},
{
"data_type": "bool",
"name": "keep_dims",
"value": true
}
],
"impl_path": "",
"input_desc": [
[
{
"data_type": "float32",
"format": "DefaultFormat",
"name": "input_0",
"shape": [
1024,
768
],
"tensor_name": "output_0_14"
}
]
],
"name": "ReduceSum",
"output_desc": [
{
"data_type": "float32",
"format": "DefaultFormat",
"name": "output_0",
"shape": [
1024,
1
],
"tensor_name": "output_0_15"
}
]
}
],
"output_desc": [
{
"data_type": "float32",
"format": "DefaultFormat",
"name": "output_0",
"shape": [
1024,
1
],
"tensor_name": "output_0_15"
},
{
"data_type": "float16",
"format": "DefaultFormat",
"name": "output_0",
"shape": [
1024,
768
],
"tensor_name": "output_0_12"
},
{
"data_type": "float16",
"format": "DefaultFormat",
"name": "output_0",
"shape": [
1024,
1
],
"tensor_name": "output_0_11"
}
],
"platform": "AKG",
"process": "cuda"
}
test/st/composite
Precision error of Sub output and Reduce output.
Test pass.
Fail cuda:
extern "C" __global__ void Fused_Cast_LessEqual_Cast_Add_Mul_Cast_Mul_Add_Cast_Cast_ReduceSum_Cast_Mul_Sub__more_split_18218696765951249700_kernel0( half* __restrict__ output_0_6, float* __restrict__ output_0_9, float* __restrict__ T_cast_T_multiply_T_subtract_output_0_6_T_multiply_T_cast_output_0_9_T_subtract_output_0_6_T_multiply_T_cast_output_0_9_red, half* __restrict__ T_subtract_output_0_6_T_multiply_T_cast_output_0_9, half* __restrict__ T_multiply_T_cast_output_0_9) {
float output_0_9_local[2];
__shared__ half T_multiply_T_cast_output_0_9_shared[2];
__shared__ half T_subtract_output_0_6_T_multiply_T_cast_output_0_9_shared[1536];
__shared__ float T_cast_T_multiply_T_subtract_output_0_6_T_multiply_T_cast_output_0_9_T_subtract_output_0_6_T_multiply_T_cast_output_0_9_red_shared[2];
float acc_0[1];
__shared__ float red_buf0[256];
for (int cc1 = 0; cc1 < 2; ++cc1) {
output_0_9_local[cc1] = output_0_9[((((int)blockIdx.y) * 2) + cc1)];
}
for (int cc2 = 0; cc2 < 2; ++cc2) {
for (int cc3 = 0; cc3 < 3; ++cc3) {
T_multiply_T_cast_output_0_9_shared[cc2] = (((half)output_0_9_local[cc2]) * __float2half_rn(1.301765e-03f));
T_subtract_output_0_6_T_multiply_T_cast_output_0_9_shared[(((cc2 * 768) + (cc3 * 256)) + ((int)threadIdx.x))] = (output_0_6[((((((int)blockIdx.y) * 1536) + (cc2 * 768)) + (cc3 * 256)) + ((int)threadIdx.x))] - T_multiply_T_cast_output_0_9_shared[cc2]);
if ((((int)threadIdx.x) == 0) && (cc3 == 0)) {
T_cast_T_multiply_T_subtract_output_0_6_T_multiply_T_cast_output_0_9_T_subtract_output_0_6_T_multiply_T_cast_output_0_9_red_shared[cc2] = 0.000000e+00f;
}
}
}
__syncthreads();
for (int cc21 = 0; cc21 < 2; ++cc21) {
acc_0[0] = 0.000000e+00f;
for (int cc31 = 0; cc31 < 3; ++cc31) {
acc_0[0] = (acc_0[0] + ((float)(T_subtract_output_0_6_T_multiply_T_cast_output_0_9_shared[(((cc21 * 768) + (cc31 * 256)) + ((int)threadIdx.x))] * T_subtract_output_0_6_T_multiply_T_cast_output_0_9_shared[(((cc21 * 768) + (cc31 * 256)) + ((int)threadIdx.x))])));
}
(void)akg_reduce::AkgReduce<float,akg_reduce::SumOp, 256, 1, akg_reduce::REDUCE2D_X>(akg_reduce::SumOp(), &(T_cast_T_multiply_T_subtract_output_0_6_T_multiply_T_cast_output_0_9_T_subtract_output_0_6_T_multiply_T_cast_output_0_9_red_shared[cc21]), red_buf0, acc_0[0], 256);
}
__syncthreads();
if (((int)threadIdx.x) <= 1) {
T_cast_T_multiply_T_subtract_output_0_6_T_multiply_T_cast_output_0_9_T_subtract_output_0_6_T_multiply_T_cast_output_0_9_red[((((int)blockIdx.y) * 2) + ((int)threadIdx.x))] = T_cast_T_multiply_T_subtract_output_0_6_T_multiply_T_cast_output_0_9_T_subtract_output_0_6_T_multiply_T_cast_output_0_9_red_shared[((int)threadIdx.x)];
T_multiply_T_cast_output_0_9[((((int)blockIdx.y) * 2) + ((int)threadIdx.x))] = T_multiply_T_cast_output_0_9_shared[((int)threadIdx.x)];
}
for (int cc22 = 0; cc22 < 2; ++cc22) {
for (int cc32 = 0; cc32 < 3; ++cc32) {
T_subtract_output_0_6_T_multiply_T_cast_output_0_9[((((((int)blockIdx.y) * 1536) + (cc22 * 768)) + (cc32 * 256)) + ((int)threadIdx.x))] = T_subtract_output_0_6_T_multiply_T_cast_output_0_9_shared[(((cc22 * 768) + (cc32 * 256)) + ((int)threadIdx.x))];
}
}
__syncthreads();
}
Hey dabaiji, Welcome to MindSpore Community.
All of the projects in MindSpore Community are maintained by @mindspore-ci-bot.
That means the developers can comment below every pull request or issue to trigger Bot Commands.
Please follow instructions at https://gitee.com/mindspore/community/blob/master/command.md to find the details.
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。
登录 后才可以发表评论