From 3d45d3501240762c414bd890382783021670843a Mon Sep 17 00:00:00 2001 From: bluesky90 Date: Tue, 4 Jul 2023 20:22:29 +0800 Subject: [PATCH 1/7] add tensorflow inference case for AscendC --- .../tensorflow_inference/LeakyRelu/README.md | 30 +++++++++ .../LeakyRelu/fusion_off.cfg | 14 ++++ .../LeakyRelu/leakyrelu.json | 29 +++++++++ .../LeakyRelu/op_host/leaky_relu.cpp | 64 +++++++++++++++++++ .../LeakyRelu/op_host/leaky_relu_tiling.h | 10 +++ .../LeakyRelu/op_kernel/kernel_leaky_relu.h | 61 ++++++++++++++++++ .../op_kernel/kernel_leaky_relu_tiling.h | 54 ++++++++++++++++ .../LeakyRelu/op_kernel/leaky_relu.cpp | 11 ++++ 8 files changed, 273 insertions(+) create mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/README.md create mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/fusion_off.cfg create mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/leakyrelu.json create mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu.cpp create mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu_tiling.h create mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu.h create mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu_tiling.h create mode 100644 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/leaky_relu.cpp diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/README.md b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/README.md new file mode 100644 index 000000000..fa6bded3b --- /dev/null +++ b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/README.md @@ -0,0 +1,30 @@ +AscendC 自定义算子入TensorFlow网络示例教程: +以Yolov3 TensorFlow离线推理为例 +推理平台:Ascend310P3 + +一、自定义算子准备 +1.先构建AscendC-LeakyRelu算子工程 +/usr/local/python3.7/bin/msopgen gen -i leakyrelu.json -f tf -c ai_core-ascend310p -lan cpp -out ./custom_op +2.将目录下的op_host和op_kernel实现同步至生成的custom_op工程目录下,可以替换之前msopgen生成的默认文件 +3.确认CMakePresets.json中 "ASCEND_CANN_PACKAGE_PATH" 为CANN软件包安装路径,执行 ./build.sh编译出自定义算子包 +4.安装在custom_op/build_out/目录下生成的自定义算子run包 + +二、离线推理验证流程 +1.先下载yolov3 tensorflow离线pb模型: +https://gitee.com/link?target=https%3A%2F%2Fobs-9be7.obs.cn-east-2.myhuaweicloud.com%2F003_Atc_Models%2Fmodelzoo%2Fyolov3_tf.pb + +2.Pb模型转换为om模型 +For Ascend310P3: +atc --model=./yolov3_tf.pb --framework=3 --output=./YOLOv3_TF --input_shape="input:4,416,416,3" --soc_version=Ascend310P3 --fusion_switch_file=fusion_off.cfg +其中 --fusion_switch_file为关闭算子融合配置,此处若不关闭融合,LeakyRelu算子会进行融合,因此会无法单独编译LeakyRelu算子进行验证 + +若出现: +start compile Ascend C operator LeakyRelu. kernel name is leaky_relu +compile Ascend C operator: LeakyRelu success! +打印,表明进入了AscendC算子编译 + +出现ATC run success, welcome to the next use 表明离线om模型转换成功 + +3.执行离线推理 +可使用https://gitee.com/ascend/tools/tree/master/msame 该工具 + diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/fusion_off.cfg b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/fusion_off.cfg new file mode 100644 index 000000000..2472195ec --- /dev/null +++ b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/fusion_off.cfg @@ -0,0 +1,14 @@ +{ + "Switch": { + "GraphFusion": { + "BatchNormPreprocessFusionPass": "off", + "ConvBatchnormFusionPass": "off", + "BatchNormBnInferFusionPass": "off", + "HostBNFusionPass": "off" + }, + "UBFusion": { + "ALL": "off" + } + } +} + diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/leakyrelu.json b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/leakyrelu.json new file mode 100644 index 000000000..e38679d0f --- /dev/null +++ b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/leakyrelu.json @@ -0,0 +1,29 @@ +[ + { + "op": "LeakyRelu", + "input_desc": [ + { + "name": "x", + "param_type": "required", + "format": [ + "ND" + ], + "type": [ + "fp16" + ] + } + ], + "output_desc": [ + { + "name": "y", + "param_type": "required", + "format": [ + "ND" + ], + "type": [ + "fp16" + ] + } + ] + } +] diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu.cpp new file mode 100644 index 000000000..b6d0528e5 --- /dev/null +++ b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu.cpp @@ -0,0 +1,64 @@ + +#include "leaky_relu_tiling.h" +#include "register/op_def_registry.h" + + +namespace optiling { +static ge::graphStatus TilingFunc(gert::TilingContext* context) +{ + + TilingData tiling; + const gert::StorageShape* x1_shape = context->GetInputShape(0); + int32_t data_sz = 1; + for (int i = 0; i < x1_shape->GetStorageShape().GetDimNum(); i++) + data_sz *= x1_shape->GetStorageShape().GetDim(i); + tiling.set_size(data_sz); + context->SetBlockDim(8); + tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity()); + context->GetRawTilingData()->SetDataSize(tiling.GetDataSize()); + + return ge::GRAPH_SUCCESS; +} +} + + +namespace ge { +static ge::graphStatus InferShape(gert::InferShapeContext* context) +{ + const gert::Shape* x1_shape = context->GetInputShape(0); + gert::Shape* y_shape = context->GetOutputShape(0); + *y_shape = *x1_shape; + return GRAPH_SUCCESS; +} +} + + +namespace ops { +class LeakyRelu : public OpDef { +public: + LeakyRelu(const char* name) : OpDef(name) + { + this->Input("x") + .ParamType(REQUIRED) + .DataType({ge::DT_FLOAT16}) + .Format({ge::FORMAT_ND}) + .UnknownShapeFormat({ge::FORMAT_ND}); + this->Output("y") + .ParamType(REQUIRED) + .DataType({ge::DT_FLOAT16}) + .Format({ge::FORMAT_ND}) + .UnknownShapeFormat({ge::FORMAT_ND}); + + this->SetInferShape(ge::InferShape); + + this->AICore() + .SetTiling(optiling::TilingFunc); + this->AICore().AddConfig("ascend310p"); + + this->Attr("negative_slope") + .Float(0.01f); + } +}; + +OP_ADD(LeakyRelu); +} diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu_tiling.h b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu_tiling.h new file mode 100644 index 000000000..f880ff3fd --- /dev/null +++ b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu_tiling.h @@ -0,0 +1,10 @@ + +#include "register/tilingdata_base.h" + +namespace optiling { +BEGIN_TILING_DATA_DEF(TilingData) + TILING_DATA_FIELD_DEF(uint32_t, size); +END_TILING_DATA_DEF; + +REGISTER_TILING_DATA_CLASS(LeakyRelu, TilingData) +} diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu.h b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu.h new file mode 100644 index 000000000..c549f0be3 --- /dev/null +++ b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2022-2023. All rights reserved. + */ +#ifndef KERNEL_LEAKY_RELU_H +#define KERNEL_LEAKY_RELU_H +#include "op_frame/elemwise_frame.h" +#include "kernel_leaky_relu_tiling.h" + +namespace leaky_relu_ascendc { +template class KernelLeakyRelu : public tik2::ElemwiseOpBase { +public: + using DType = T; + __aicore__ KernelLeakyRelu() {} + __aicore__ inline void Init(GM_ADDR x, GM_ADDR y); + +public: + __aicore__ inline void MyCopyIn(int32_t progress, tik2::LocalTensor& inBuf); + __aicore__ inline void MyCompute(int32_t progress, tik2::LocalTensor& inBuf, tik2::LocalTensor& outBuf); + __aicore__ inline void MyCopyOut(int32_t progress, tik2::LocalTensor& outBuf); + +public: + LeakyReluParam param; + +private: + tik2::GlobalTensor xGm; + tik2::GlobalTensor yGm; +}; + +template __aicore__ inline void KernelLeakyRelu::Init(GM_ADDR x, GM_ADDR y) +{ + ElemwiseOpBase::Init(param.loopSize, param.dataLen, 0, param.dataLen); + xGm.SetGlobalBuffer((__gm__ T*)(x) + block_idx * param.blockFactor); + yGm.SetGlobalBuffer((__gm__ T*)(y) + block_idx * param.blockFactor); +} + +template __aicore__ inline void KernelLeakyRelu::MyCopyIn(int32_t progress, tik2::LocalTensor& x_buf) +{ + auto tailFlag = 0; + if (param.loopSize == progress + 1) { + tailFlag = 1; + } + x_buf.SetUserTag(tailFlag); + tik2::DataCopy(x_buf, xGm[progress * param.ubFactor], param.dmaParam[tailFlag]); +} + +template +__aicore__ inline void KernelLeakyRelu::MyCompute(int32_t progress, tik2::LocalTensor& x_buf, + tik2::LocalTensor& y_buf) +{ + auto x_tag = x_buf.GetUserTag(); + y_buf.SetUserTag(x_tag); + tik2::LeakyRelu(y_buf, x_buf, param.negativeSlope, param.itemSize[x_tag]); +} + +template +__aicore__ inline void KernelLeakyRelu::MyCopyOut(int32_t progress, tik2::LocalTensor& y_buf) +{ + tik2::DataCopy(yGm[progress * param.ubFactor], y_buf, param.dmaParam[y_buf.GetUserTag()]); +} +} // namespace leaky_relu +#endif // KERNEL_LEAKY_RELU_H \ No newline at end of file diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu_tiling.h b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu_tiling.h new file mode 100644 index 000000000..8fb2bbedc --- /dev/null +++ b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu_tiling.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2022-2023. All rights reserved. + */ +#ifndef KERNEL_LEAKY_RELU_TILING_H +#define KERNEL_LEAKY_RELU_TILING_H +#include "kernel_operator.h" + +using namespace tik2; +struct LeakyReluParam { + uint32_t blockFactor; + uint32_t ubFactor; + int32_t dataLen; + half negativeSlope; + tik2::DataCopyParams dmaParam[2] { {}, {} }; + uint32_t itemSize[2]; + int32_t loopSize; +}; + +template +__aicore__ inline void InitTilingParam(int32_t totalSize, LeakyReluParam& param, half slope = static_cast(0.1)) +{ + int32_t splitSize = totalSize / block_num; + int64_t blockFactor = splitSize; + + const auto vec_len = tik2::DEFAULT_BLOCK_SIZE / sizeof(half); + + int64_t ubFactor = blockFactor; + int64_t blockNum = (splitSize + blockFactor - 1) / blockFactor; + + int64_t ub_for_num = (ubFactor + limit - 1) / limit; + int64_t adjust_factor = (ubFactor + ub_for_num - 1) / ub_for_num; + int64_t align_factor = (adjust_factor + vec_len - 1) / vec_len; + + ubFactor = align_factor * vec_len; + if (ubFactor > limit) { + ubFactor = (adjust_factor / vec_len) * vec_len; + } + param.negativeSlope = slope; + param.blockFactor = blockFactor; + param.ubFactor = ubFactor; + param.loopSize = (blockFactor + ubFactor - 1) / ubFactor; + param.dataLen = limit * sizeof(half); + + param.itemSize[0] = ubFactor; + param.itemSize[1] = splitSize % ubFactor; + param.dmaParam[0].blockLen = (ubFactor * sizeof(half) + tik2::DEFAULT_C0_SIZE - 1) / tik2::DEFAULT_C0_SIZE; + param.dmaParam[1].blockLen = (param.itemSize[1] * sizeof(half) + tik2::DEFAULT_C0_SIZE - 1) / tik2::DEFAULT_C0_SIZE; + + if (param.itemSize[1] == 0) { + param.itemSize[1] = ubFactor; + param.dmaParam[1].blockLen = param.dmaParam[0].blockLen; + } +}; +#endif // KERNEL_LEAKY_RELU_TILING_H diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/leaky_relu.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/leaky_relu.cpp new file mode 100644 index 000000000..998f13bce --- /dev/null +++ b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/leaky_relu.cpp @@ -0,0 +1,11 @@ +#include "kernel_operator.h" +#include "kernel_leaky_relu.h" +#define UB_LIMIT ((tik2::TOTAL_UB_SIZE) / 4 / sizeof(half)) + +extern "C" __global__ __aicore__ void leaky_relu(GM_ADDR x, GM_ADDR y, GM_ADDR workspace, GM_ADDR tiling) { + GET_TILING_DATA(tiling_data, tiling); + tik2::ElemwiseFrame> op; + InitTilingParam(tiling_data.size,op.param); + op.Init(x, y); + op.Process(); +} \ No newline at end of file -- Gitee From 8e082a005f854350986c62071460aa9b23bf31a2 Mon Sep 17 00:00:00 2001 From: bluesky90 Date: Tue, 4 Jul 2023 21:15:46 +0800 Subject: [PATCH 2/7] fix sc --- .../tensorflow_inference/LeakyRelu/op_host/leaky_relu.cpp | 8 +++----- .../LeakyRelu/op_kernel/kernel_leaky_relu_tiling.h | 1 - 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu.cpp index b6d0528e5..3d5d7ced4 100644 --- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu.cpp +++ b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu.cpp @@ -1,22 +1,20 @@ #include "leaky_relu_tiling.h" #include "register/op_def_registry.h" - +const uint32_t BLOCK_DIM = 8; namespace optiling { static ge::graphStatus TilingFunc(gert::TilingContext* context) { - TilingData tiling; const gert::StorageShape* x1_shape = context->GetInputShape(0); int32_t data_sz = 1; for (int i = 0; i < x1_shape->GetStorageShape().GetDimNum(); i++) data_sz *= x1_shape->GetStorageShape().GetDim(i); tiling.set_size(data_sz); - context->SetBlockDim(8); + context->SetBlockDim(BLOCK_DIM); tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity()); context->GetRawTilingData()->SetDataSize(tiling.GetDataSize()); - return ge::GRAPH_SUCCESS; } } @@ -36,7 +34,7 @@ static ge::graphStatus InferShape(gert::InferShapeContext* context) namespace ops { class LeakyRelu : public OpDef { public: - LeakyRelu(const char* name) : OpDef(name) + explicit LeakyRelu(const char* name) : OpDef(name) { this->Input("x") .ParamType(REQUIRED) diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu_tiling.h b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu_tiling.h index 8fb2bbedc..b040d5d7b 100644 --- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu_tiling.h +++ b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu_tiling.h @@ -5,7 +5,6 @@ #define KERNEL_LEAKY_RELU_TILING_H #include "kernel_operator.h" -using namespace tik2; struct LeakyReluParam { uint32_t blockFactor; uint32_t ubFactor; -- Gitee From 9df7ea32adb76f8b391ab93b10760533b0e7c4f1 Mon Sep 17 00:00:00 2001 From: bluesky90 Date: Tue, 11 Jul 2023 22:22:23 +0800 Subject: [PATCH 3/7] Description:fix with namespace ascendc --- .../LeakyRelu/op_kernel/kernel_leaky_relu.h | 26 +++++++++---------- .../op_kernel/kernel_leaky_relu_tiling.h | 9 ++++--- .../LeakyRelu/op_kernel/leaky_relu.cpp | 7 ++--- 3 files changed, 22 insertions(+), 20 deletions(-) mode change 100644 => 100755 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu.h mode change 100644 => 100755 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu_tiling.h mode change 100644 => 100755 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/leaky_relu.cpp diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu.h b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu.h old mode 100644 new mode 100755 index c549f0be3..e4246105e --- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu.h +++ b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu.h @@ -7,23 +7,23 @@ #include "kernel_leaky_relu_tiling.h" namespace leaky_relu_ascendc { -template class KernelLeakyRelu : public tik2::ElemwiseOpBase { +template class KernelLeakyRelu : public AscendC::ElemwiseOpBase { public: using DType = T; __aicore__ KernelLeakyRelu() {} __aicore__ inline void Init(GM_ADDR x, GM_ADDR y); public: - __aicore__ inline void MyCopyIn(int32_t progress, tik2::LocalTensor& inBuf); - __aicore__ inline void MyCompute(int32_t progress, tik2::LocalTensor& inBuf, tik2::LocalTensor& outBuf); - __aicore__ inline void MyCopyOut(int32_t progress, tik2::LocalTensor& outBuf); + __aicore__ inline void MyCopyIn(int32_t progress, AscendC::LocalTensor& inBuf); + __aicore__ inline void MyCompute(int32_t progress, AscendC::LocalTensor& inBuf, AscendC::LocalTensor& outBuf); + __aicore__ inline void MyCopyOut(int32_t progress, AscendC::LocalTensor& outBuf); public: LeakyReluParam param; private: - tik2::GlobalTensor xGm; - tik2::GlobalTensor yGm; + AscendC::GlobalTensor xGm; + AscendC::GlobalTensor yGm; }; template __aicore__ inline void KernelLeakyRelu::Init(GM_ADDR x, GM_ADDR y) @@ -33,29 +33,29 @@ template __aicore__ inline void KernelLeakyRelu::Init(GM_ADDR x, yGm.SetGlobalBuffer((__gm__ T*)(y) + block_idx * param.blockFactor); } -template __aicore__ inline void KernelLeakyRelu::MyCopyIn(int32_t progress, tik2::LocalTensor& x_buf) +template __aicore__ inline void KernelLeakyRelu::MyCopyIn(int32_t progress, AscendC::LocalTensor& x_buf) { auto tailFlag = 0; if (param.loopSize == progress + 1) { tailFlag = 1; } x_buf.SetUserTag(tailFlag); - tik2::DataCopy(x_buf, xGm[progress * param.ubFactor], param.dmaParam[tailFlag]); + AscendC::DataCopy(x_buf, xGm[progress * param.ubFactor], param.dmaParam[tailFlag]); } template -__aicore__ inline void KernelLeakyRelu::MyCompute(int32_t progress, tik2::LocalTensor& x_buf, - tik2::LocalTensor& y_buf) +__aicore__ inline void KernelLeakyRelu::MyCompute(int32_t progress, AscendC::LocalTensor& x_buf, + AscendC::LocalTensor& y_buf) { auto x_tag = x_buf.GetUserTag(); y_buf.SetUserTag(x_tag); - tik2::LeakyRelu(y_buf, x_buf, param.negativeSlope, param.itemSize[x_tag]); + AscendC::LeakyRelu(y_buf, x_buf, param.negativeSlope, param.itemSize[x_tag]); } template -__aicore__ inline void KernelLeakyRelu::MyCopyOut(int32_t progress, tik2::LocalTensor& y_buf) +__aicore__ inline void KernelLeakyRelu::MyCopyOut(int32_t progress, AscendC::LocalTensor& y_buf) { - tik2::DataCopy(yGm[progress * param.ubFactor], y_buf, param.dmaParam[y_buf.GetUserTag()]); + AscendC::DataCopy(yGm[progress * param.ubFactor], y_buf, param.dmaParam[y_buf.GetUserTag()]); } } // namespace leaky_relu #endif // KERNEL_LEAKY_RELU_H \ No newline at end of file diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu_tiling.h b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu_tiling.h old mode 100644 new mode 100755 index b040d5d7b..adaf748df --- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu_tiling.h +++ b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu_tiling.h @@ -5,12 +5,13 @@ #define KERNEL_LEAKY_RELU_TILING_H #include "kernel_operator.h" +using namespace AscendC; struct LeakyReluParam { uint32_t blockFactor; uint32_t ubFactor; int32_t dataLen; half negativeSlope; - tik2::DataCopyParams dmaParam[2] { {}, {} }; + AscendC::DataCopyParams dmaParam[2] { {}, {} }; uint32_t itemSize[2]; int32_t loopSize; }; @@ -21,7 +22,7 @@ __aicore__ inline void InitTilingParam(int32_t totalSize, LeakyReluParam& param, int32_t splitSize = totalSize / block_num; int64_t blockFactor = splitSize; - const auto vec_len = tik2::DEFAULT_BLOCK_SIZE / sizeof(half); + const auto vec_len = AscendC::DEFAULT_BLOCK_SIZE / sizeof(half); int64_t ubFactor = blockFactor; int64_t blockNum = (splitSize + blockFactor - 1) / blockFactor; @@ -42,8 +43,8 @@ __aicore__ inline void InitTilingParam(int32_t totalSize, LeakyReluParam& param, param.itemSize[0] = ubFactor; param.itemSize[1] = splitSize % ubFactor; - param.dmaParam[0].blockLen = (ubFactor * sizeof(half) + tik2::DEFAULT_C0_SIZE - 1) / tik2::DEFAULT_C0_SIZE; - param.dmaParam[1].blockLen = (param.itemSize[1] * sizeof(half) + tik2::DEFAULT_C0_SIZE - 1) / tik2::DEFAULT_C0_SIZE; + param.dmaParam[0].blockLen = (ubFactor * sizeof(half) + AscendC::DEFAULT_C0_SIZE - 1) / AscendC::DEFAULT_C0_SIZE; + param.dmaParam[1].blockLen = (param.itemSize[1] * sizeof(half) + AscendC::DEFAULT_C0_SIZE - 1) / AscendC::DEFAULT_C0_SIZE; if (param.itemSize[1] == 0) { param.itemSize[1] = ubFactor; diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/leaky_relu.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/leaky_relu.cpp old mode 100644 new mode 100755 index 998f13bce..79278747a --- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/leaky_relu.cpp +++ b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/leaky_relu.cpp @@ -1,11 +1,12 @@ #include "kernel_operator.h" #include "kernel_leaky_relu.h" -#define UB_LIMIT ((tik2::TOTAL_UB_SIZE) / 4 / sizeof(half)) +#define UB_LIMIT ((AscendC::TOTAL_UB_SIZE) / 4 / sizeof(half)) extern "C" __global__ __aicore__ void leaky_relu(GM_ADDR x, GM_ADDR y, GM_ADDR workspace, GM_ADDR tiling) { GET_TILING_DATA(tiling_data, tiling); - tik2::ElemwiseFrame> op; + // TODO: user kernel impl + AscendC::ElemwiseFrame> op; InitTilingParam(tiling_data.size,op.param); op.Init(x, y); op.Process(); -} \ No newline at end of file +} -- Gitee From 19de35f28c6e507d0a9fb9aa5e7e862999c3e9cc Mon Sep 17 00:00:00 2001 From: bluesky90 Date: Tue, 11 Jul 2023 23:40:39 +0800 Subject: [PATCH 4/7] Description:fix sc --- .../LeakyRelu/op_host/leaky_relu.cpp | 39 +++++++++---------- .../LeakyRelu/op_kernel/kernel_leaky_relu.h | 3 +- .../op_kernel/kernel_leaky_relu_tiling.h | 4 +- .../LeakyRelu/op_kernel/leaky_relu.cpp | 7 ++-- 4 files changed, 26 insertions(+), 27 deletions(-) mode change 100644 => 100755 cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu.cpp diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu.cpp old mode 100644 new mode 100755 index 3d5d7ced4..71ead43d8 --- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu.cpp +++ b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu.cpp @@ -1,4 +1,3 @@ - #include "leaky_relu_tiling.h" #include "register/op_def_registry.h" const uint32_t BLOCK_DIM = 8; @@ -6,16 +5,16 @@ const uint32_t BLOCK_DIM = 8; namespace optiling { static ge::graphStatus TilingFunc(gert::TilingContext* context) { - TilingData tiling; - const gert::StorageShape* x1_shape = context->GetInputShape(0); - int32_t data_sz = 1; - for (int i = 0; i < x1_shape->GetStorageShape().GetDimNum(); i++) - data_sz *= x1_shape->GetStorageShape().GetDim(i); - tiling.set_size(data_sz); - context->SetBlockDim(BLOCK_DIM); - tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity()); - context->GetRawTilingData()->SetDataSize(tiling.GetDataSize()); - return ge::GRAPH_SUCCESS; + TilingData tiling; + const gert::StorageShape* x1_shape = context->GetInputShape(0); + int32_t data_sz = 1; + for (int i = 0; i < x1_shape->GetStorageShape().GetDimNum(); i++) + data_sz *= x1_shape->GetStorageShape().GetDim(i); + tiling.set_size(data_sz); + context->SetBlockDim(BLOCK_DIM); + tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity()); + context->GetRawTilingData()->SetDataSize(tiling.GetDataSize()); + return ge::GRAPH_SUCCESS; } } @@ -38,23 +37,21 @@ public: { this->Input("x") .ParamType(REQUIRED) - .DataType({ge::DT_FLOAT16}) - .Format({ge::FORMAT_ND}) - .UnknownShapeFormat({ge::FORMAT_ND}); + .DataType({ ge::DT_FLOAT16 }) + .Format({ ge::FORMAT_ND }) + .UnknownShapeFormat({ ge::FORMAT_ND }); this->Output("y") .ParamType(REQUIRED) - .DataType({ge::DT_FLOAT16}) - .Format({ge::FORMAT_ND}) - .UnknownShapeFormat({ge::FORMAT_ND}); + .DataType({ ge::DT_FLOAT16 }) + .Format({ ge::FORMAT_ND }) + .UnknownShapeFormat({ ge::FORMAT_ND }); this->SetInferShape(ge::InferShape); - this->AICore() - .SetTiling(optiling::TilingFunc); + this->AICore().SetTiling(optiling::TilingFunc); this->AICore().AddConfig("ascend310p"); - this->Attr("negative_slope") - .Float(0.01f); + this->Attr("negative_slope").Float(0.01f); } }; diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu.h b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu.h index e4246105e..fa414c5b3 100755 --- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu.h +++ b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu.h @@ -33,7 +33,8 @@ template __aicore__ inline void KernelLeakyRelu::Init(GM_ADDR x, yGm.SetGlobalBuffer((__gm__ T*)(y) + block_idx * param.blockFactor); } -template __aicore__ inline void KernelLeakyRelu::MyCopyIn(int32_t progress, AscendC::LocalTensor& x_buf) +template +__aicore__ inline void KernelLeakyRelu::MyCopyIn(int32_t progress, AscendC::LocalTensor& x_buf) { auto tailFlag = 0; if (param.loopSize == progress + 1) { diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu_tiling.h b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu_tiling.h index adaf748df..6deeccc87 100755 --- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu_tiling.h +++ b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/kernel_leaky_relu_tiling.h @@ -5,7 +5,6 @@ #define KERNEL_LEAKY_RELU_TILING_H #include "kernel_operator.h" -using namespace AscendC; struct LeakyReluParam { uint32_t blockFactor; uint32_t ubFactor; @@ -44,7 +43,8 @@ __aicore__ inline void InitTilingParam(int32_t totalSize, LeakyReluParam& param, param.itemSize[0] = ubFactor; param.itemSize[1] = splitSize % ubFactor; param.dmaParam[0].blockLen = (ubFactor * sizeof(half) + AscendC::DEFAULT_C0_SIZE - 1) / AscendC::DEFAULT_C0_SIZE; - param.dmaParam[1].blockLen = (param.itemSize[1] * sizeof(half) + AscendC::DEFAULT_C0_SIZE - 1) / AscendC::DEFAULT_C0_SIZE; + param.dmaParam[1].blockLen = + (param.itemSize[1] * sizeof(half) + AscendC::DEFAULT_C0_SIZE - 1) / AscendC::DEFAULT_C0_SIZE; if (param.itemSize[1] == 0) { param.itemSize[1] = ubFactor; diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/leaky_relu.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/leaky_relu.cpp index 79278747a..df4fbdc23 100755 --- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/leaky_relu.cpp +++ b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_kernel/leaky_relu.cpp @@ -2,11 +2,12 @@ #include "kernel_leaky_relu.h" #define UB_LIMIT ((AscendC::TOTAL_UB_SIZE) / 4 / sizeof(half)) -extern "C" __global__ __aicore__ void leaky_relu(GM_ADDR x, GM_ADDR y, GM_ADDR workspace, GM_ADDR tiling) { +extern "C" __global__ __aicore__ void leaky_relu(GM_ADDR x, GM_ADDR y, GM_ADDR workspace, GM_ADDR tiling) +{ GET_TILING_DATA(tiling_data, tiling); - // TODO: user kernel impl + // kernel impl AscendC::ElemwiseFrame> op; - InitTilingParam(tiling_data.size,op.param); + InitTilingParam(tiling_data.size, op.param); op.Init(x, y); op.Process(); } -- Gitee From f011ba19081896938da46ba3f0c234ac10c43ef8 Mon Sep 17 00:00:00 2001 From: bluesky90 Date: Tue, 11 Jul 2023 23:47:34 +0800 Subject: [PATCH 5/7] Description:fix sc --- .../LeakyRelu/op_host/leaky_relu.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu.cpp index 71ead43d8..7e40df176 100755 --- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu.cpp +++ b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu.cpp @@ -37,14 +37,14 @@ public: { this->Input("x") .ParamType(REQUIRED) - .DataType({ ge::DT_FLOAT16 }) - .Format({ ge::FORMAT_ND }) - .UnknownShapeFormat({ ge::FORMAT_ND }); + .DataType({ge::DT_FLOAT16}) + .Format({ge::FORMAT_ND}) + .UnknownShapeFormat({ge::FORMAT_ND}); this->Output("y") .ParamType(REQUIRED) - .DataType({ ge::DT_FLOAT16 }) - .Format({ ge::FORMAT_ND }) - .UnknownShapeFormat({ ge::FORMAT_ND }); + .DataType({ge::DT_FLOAT16}) + .Format({ge::FORMAT_ND}) + .UnknownShapeFormat({ge::FORMAT_ND}); this->SetInferShape(ge::InferShape); -- Gitee From 79fa998f8969f5590f9564b5fea7b37a9449bd4d Mon Sep 17 00:00:00 2001 From: bluesky90 Date: Tue, 11 Jul 2023 23:58:19 +0800 Subject: [PATCH 6/7] Description:fix sc --- .../LeakyRelu/op_host/leaky_relu.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu.cpp index 7e40df176..9a82679e3 100755 --- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu.cpp +++ b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu.cpp @@ -37,14 +37,14 @@ public: { this->Input("x") .ParamType(REQUIRED) - .DataType({ge::DT_FLOAT16}) - .Format({ge::FORMAT_ND}) - .UnknownShapeFormat({ge::FORMAT_ND}); + .DataType( { ge::DT_FLOAT16 } ) + .Format( { ge::FORMAT_ND } ) + .UnknownShapeFormat( { ge::FORMAT_ND } ); this->Output("y") .ParamType(REQUIRED) - .DataType({ge::DT_FLOAT16}) - .Format({ge::FORMAT_ND}) - .UnknownShapeFormat({ge::FORMAT_ND}); + .DataType( { ge::DT_FLOAT16 } ) + .Format( { ge::FORMAT_ND } ) + .UnknownShapeFormat( { ge::FORMAT_ND } ); this->SetInferShape(ge::InferShape); -- Gitee From 29724a95440e8c3d8524012a91240dc2cadb1fec Mon Sep 17 00:00:00 2001 From: bluesky90 Date: Wed, 12 Jul 2023 00:03:19 +0800 Subject: [PATCH 7/7] Description:fix sc --- .../LeakyRelu/op_host/leaky_relu.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu.cpp b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu.cpp index 9a82679e3..7e40df176 100755 --- a/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu.cpp +++ b/cplusplus/level1_single_api/4_op_dev/6_ascendc_custom_op/tensorflow_inference/LeakyRelu/op_host/leaky_relu.cpp @@ -37,14 +37,14 @@ public: { this->Input("x") .ParamType(REQUIRED) - .DataType( { ge::DT_FLOAT16 } ) - .Format( { ge::FORMAT_ND } ) - .UnknownShapeFormat( { ge::FORMAT_ND } ); + .DataType({ge::DT_FLOAT16}) + .Format({ge::FORMAT_ND}) + .UnknownShapeFormat({ge::FORMAT_ND}); this->Output("y") .ParamType(REQUIRED) - .DataType( { ge::DT_FLOAT16 } ) - .Format( { ge::FORMAT_ND } ) - .UnknownShapeFormat( { ge::FORMAT_ND } ); + .DataType({ge::DT_FLOAT16}) + .Format({ge::FORMAT_ND}) + .UnknownShapeFormat({ge::FORMAT_ND}); this->SetInferShape(ge::InferShape); -- Gitee