From 00092203dfe631b7692391271668216fe5771100 Mon Sep 17 00:00:00 2001 From: ApeiriaNode_Booker Date: Tue, 23 Sep 2025 14:32:17 +0800 Subject: [PATCH] simplify simple matmul leakyrelu case --- .../24_simple_hello_world/README.md | 2 +- .../0_introduction/25_simple_add/README.md | 2 +- .../26_simple_matmulleakyrelu/README.md | 4 +- .../matmul_leakyrelu.asc | 66 +++++-------------- .../27_simple_add_cpp_extensions/README.md | 2 +- 5 files changed, 20 insertions(+), 56 deletions(-) diff --git a/operator/ascendc/0_introduction/24_simple_hello_world/README.md b/operator/ascendc/0_introduction/24_simple_hello_world/README.md index ddcaf96e9..483b3d8c3 100644 --- a/operator/ascendc/0_introduction/24_simple_hello_world/README.md +++ b/operator/ascendc/0_introduction/24_simple_hello_world/README.md @@ -33,7 +33,7 @@ ```bash export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest ``` - 配置按安装径后,执行以下命令统一配置环境变量。 + 配置安装路径后,执行以下命令统一配置环境变量。 ```bash # 配置CANN环境变量 source ${ASCEND_INSTALL_PATH}/bin/setenv.bash diff --git a/operator/ascendc/0_introduction/25_simple_add/README.md b/operator/ascendc/0_introduction/25_simple_add/README.md index 470c3edef..439264e6a 100644 --- a/operator/ascendc/0_introduction/25_simple_add/README.md +++ b/operator/ascendc/0_introduction/25_simple_add/README.md @@ -64,7 +64,7 @@ z = x + y ```bash export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest ``` - 配置按安装径后,执行以下命令统一配置环境变量。 + 配置安装路径后,执行以下命令统一配置环境变量。 ```bash # 配置CANN环境变量 source ${ASCEND_INSTALL_PATH}/bin/setenv.bash diff --git a/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/README.md b/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/README.md index da5b62293..6c2eea0b4 100644 --- a/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/README.md +++ b/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/README.md @@ -51,7 +51,7 @@ C = C > 0 ? C : C * 0.001 C = A * B + Bias C = C > 0 ? C : C * 0.001 ``` - 其中A的形状为[1024, 256],B的形状为[256, 640],C的形状为[1024, 640],Bias的形状为[640]。具体请参考[matmul_leakyrelu.cpp](./matmul_leakyrelu.cpp)。 + 其中A的形状为[1024, 256],B的形状为[256, 640],C的形状为[1024, 640],Bias的形状为[640]。具体请参考[matmul_leakyrelu.asc](./matmul_leakyrelu.asc)。 - 调用实现 使用内核调用符<<<>>>调用核函数。 @@ -81,7 +81,7 @@ C = C > 0 ? C : C * 0.001 ```bash export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest ``` - 配置按安装径后,执行以下命令统一配置环境变量。 + 配置安装路径后,执行以下命令统一配置环境变量。 ```bash # 配置CANN环境变量 source ${ASCEND_INSTALL_PATH}/bin/setenv.bash diff --git a/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/matmul_leakyrelu.asc b/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/matmul_leakyrelu.asc index 430d62328..7c057d79a 100644 --- a/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/matmul_leakyrelu.asc +++ b/operator/ascendc/0_introduction/26_simple_matmulleakyrelu/matmul_leakyrelu.asc @@ -10,35 +10,16 @@ #include "data_utils.h" #include "kernel_tiling/kernel_tiling.h" #include "tiling/platform/platform_ascendc.h" -#include "acl/acl.h" #include "tiling/tiling_api.h" +#include "acl/acl.h" #include "kernel_operator.h" #include "lib/matmul_intf.h" -using namespace matmul; - __aicore__ inline uint32_t Ceiling(uint32_t a, uint32_t b) { return (a + b - 1) / b; } -/** - * @brief Copy tiling data to TCubeTiling ptr from tiling gm addr. - * @param tiling: TCubeTiling ptr which needs to copy tiling data. - * @param tilingGM: tiling gm addr. - * @retval None - */ -__aicore__ inline void CopyTiling(TCubeTiling *tiling, GM_ADDR tilingGM) -{ - uint32_t *ptr = reinterpret_cast(tiling); - auto tiling32 = reinterpret_cast<__gm__ uint32_t *>(tilingGM); - - for (uint32_t i = 0; i < sizeof(TCubeTiling) / sizeof(uint32_t); i++, ptr++) { - *ptr = *(tiling32 + i); - } - return; -} - template class MatmulLeakyKernel { public: __aicore__ inline MatmulLeakyKernel(){}; @@ -52,8 +33,10 @@ public: __aicore__ inline void CalcOffset(int32_t blockIdx, const TCubeTiling &tiling, int32_t &offsetA, int32_t &offsetB, int32_t &offsetC, int32_t &offsetBias); - Matmul, MatmulType, - MatmulType, MatmulType> + matmul::Matmul, + matmul::MatmulType, + matmul::MatmulType, + matmul::MatmulType> matmulObj; AscendC::GlobalTensor aGlobal; @@ -183,16 +166,13 @@ MatmulLeakyKernel::CalcOffset(int32_t blockIdx, c * @param bias: Bias gm addr. * @param c: Out gm addr. * @param workspace: Temporary gm space addr required by matmul calc. - * @param tilingGm: Tiling data addr. + * @param tiling: Tiling data. * @retval None */ __global__ __aicore__ void matmul_leakyrelu_custom(GM_ADDR a, GM_ADDR b, GM_ADDR bias, GM_ADDR c, - GM_ADDR workspace, GM_ADDR tilingGm) + GM_ADDR workspace, AscendC::tiling::TCubeTiling tiling) { AscendC::TPipe pipe; - TCubeTiling tiling; - CopyTiling(&tiling, tilingGm); - MatmulLeakyKernel matmulLeakyKernel; matmulLeakyKernel.Init(a, b, bias, c, workspace, tiling, &pipe); REGIST_MATMUL_OBJ(&pipe, GetSysWorkSpacePtr(), matmulLeakyKernel.matmulObj, &matmulLeakyKernel.tiling); // Initialize the matmul object. @@ -201,15 +181,13 @@ __global__ __aicore__ void matmul_leakyrelu_custom(GM_ADDR a, GM_ADDR b, GM_ADDR /** * @brief Generate matmul tiling. - * @param socVersion: Platform socversion. - * @param tilingBuf data buffer. + * @param ascendcPlatform: platform info. */ -void GenerateTiling(const char *socVersion, uint8_t *tilingBuf) +AscendC::tiling::TCubeTiling GenerateTiling(platform_ascendc::PlatformAscendC* ascendcPlatform) { using TPosition = matmul_tiling::TPosition; using CubeFormat = matmul_tiling::CubeFormat; using DataType = matmul_tiling::DataType; - using namespace std; int M = 1024; int N = 640; int K = 256; @@ -237,8 +215,6 @@ void GenerateTiling(const char *socVersion, uint8_t *tilingBuf) int baseM = 256; int baseN = 128; - optiling::TCubeTiling tilingData; - auto ascendcPlatform = platform_ascendc::PlatformAscendCManager::GetInstance(socVersion); matmul_tiling::MultiCoreMatmulTiling tilingApi(*ascendcPlatform); tilingApi.SetDim(usedCoreNum); // Set the number of cores that participate in multi-core computaion is 2. @@ -254,15 +230,14 @@ void GenerateTiling(const char *socVersion, uint8_t *tilingBuf) tilingApi.SetFixSplit(baseM, baseN, -1); // Set the fixed baseM=128, baseN=256. tilingApi.SetBufferSpace(-1, -1, -1); + AscendC::tiling::TCubeTiling tilingData; int64_t res = tilingApi.GetTiling(tilingData); // Get matmul tiling data. - tilingData.set_stepM(1); // Set the matmul tiling stepM=1. - tilingData.set_stepN(1); // Set the matmul tiling stepN=1. if (res == -1) { std::cout << "gen tiling failed" << std::endl; } - uint32_t tcubeTilingSize = tilingData.GetDataSize(); - tilingData.SaveToBuffer(tilingBuf, tcubeTilingSize); - return; + tilingData.stepM = 1; // Set the matmul tiling stepM=1. + tilingData.stepN = 1; // Set the matmul tiling stepN=1. + return tilingData; } int32_t main(int32_t argc, char *argv[]) @@ -277,8 +252,7 @@ int32_t main(int32_t argc, char *argv[]) size_t userWorkspaceSize = 0; size_t systemWorkspaceSize = static_cast(ascendcPlatform->GetLibApiWorkSpaceSize()); size_t workspaceSize = userWorkspaceSize + systemWorkspaceSize; - uint8_t *tilingBuf = (uint8_t *)malloc(tilingFileSize); - GenerateTiling(socVersion, tilingBuf); + auto tiling = GenerateTiling(ascendcPlatform); uint32_t blockDim = 1; aclInit(nullptr); @@ -313,18 +287,11 @@ int32_t main(int32_t argc, char *argv[]) ReadFile("./input/bias.bin", biasFileSize, inputBiasHost, biasFileSize); aclrtMemcpy(inputBiasDevice, biasFileSize, inputBiasHost, biasFileSize, ACL_MEMCPY_HOST_TO_DEVICE); - uint8_t *tilingHost; - uint8_t *tilingDevice; - aclrtMallocHost((void **)(&tilingHost), tilingFileSize); - aclrtMalloc((void **)&tilingDevice, tilingFileSize, ACL_MEM_MALLOC_HUGE_FIRST); - aclrtMemcpy(tilingHost, tilingFileSize, tilingBuf, tilingFileSize, ACL_MEMCPY_HOST_TO_HOST); - aclrtMemcpy(tilingDevice, tilingFileSize, tilingHost, tilingFileSize, ACL_MEMCPY_HOST_TO_DEVICE); - uint8_t *workspaceDevice; aclrtMalloc((void **)&workspaceDevice, workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST); matmul_leakyrelu_custom<<>>(inputADevice, inputBDevice, inputBiasDevice, outputCDevice, - workspaceDevice, tilingDevice); + workspaceDevice, tiling); aclrtSynchronizeStream(stream); @@ -338,13 +305,10 @@ int32_t main(int32_t argc, char *argv[]) aclrtFreeHost(outputCHost); aclrtFree(inputBiasDevice); aclrtFreeHost(inputBiasHost); - aclrtFree(tilingDevice); - aclrtFreeHost(tilingHost); aclrtFree(workspaceDevice); aclrtDestroyStream(stream); aclrtResetDevice(deviceId); aclFinalize(); - free(tilingBuf); return 0; } \ No newline at end of file diff --git a/operator/ascendc/0_introduction/27_simple_add_cpp_extensions/README.md b/operator/ascendc/0_introduction/27_simple_add_cpp_extensions/README.md index 3769caeb1..c04da6102 100644 --- a/operator/ascendc/0_introduction/27_simple_add_cpp_extensions/README.md +++ b/operator/ascendc/0_introduction/27_simple_add_cpp_extensions/README.md @@ -81,7 +81,7 @@ ```bash export ASCEND_INSTALL_PATH=${install_path}/ascend-toolkit/latest ``` - 配置按安装径后,执行以下命令统一配置环境变量。 + 配置安装路径后,执行以下命令统一配置环境变量。 ```bash # 配置CANN环境变量 source ${ASCEND_INSTALL_PATH}/bin/setenv.bash -- Gitee