diff --git a/operator/ascendc/2_features/13_matmul_api_ibshare/MatmulABshareInvocation/README.md b/operator/ascendc/2_features/13_matmul_api_ibshare/MatmulABshareInvocation/README.md index 00e46ae4fbaa87a035bb1a8218d4bbf3458cc275..b71f05b3d1935a2b0bf3f5bf20ca9b7c06258adc 100644 --- a/operator/ascendc/2_features/13_matmul_api_ibshare/MatmulABshareInvocation/README.md +++ b/operator/ascendc/2_features/13_matmul_api_ibshare/MatmulABshareInvocation/README.md @@ -1,6 +1,6 @@ ## 目录结构介绍 ``` -├── MatmutABshareInvocation +├── MatmulABshareInvocation │ ├── cmake // 编译工程文件 │ ├── pictures │ │ ├── matmul_ABshare.png // 算子ABshare的数据处理示意图 @@ -17,14 +17,14 @@ │ └── run.sh // 编译运行算子的脚本 ``` ## 代码实现介绍 -本样例中实现的是[m, n, k]固定为[128, 256, 384]的MatmutlABshare算子和MatmulNoABshare算子。 +本样例中实现的是[m, n, k]固定为[128, 256, 384]的MatmulABshare算子和MatmulNoABshare算子。 - kernel实现 - MatmutABshare算子的数学表达式为: + MatmulABshare算子的数学表达式为: ``` C = A * B ``` 其中A的形状为[128, 384],B的形状为[384, 256],C的形状为[128, 256]。具体请参考[matmul_ABshare_custom.cpp](./matmul_ABshare_custom.cpp)。 - MatmutNoABshare算子数学表达式与MatmutABshare一致,具体请参考[matmul_noABshare_custom.cpp](./matmul_noABshare_custom.cpp)。 + MatmulNoABshare算子数学表达式与MatmulABshare一致,具体请参考[matmul_noABshare_custom.cpp](./matmul_noABshare_custom.cpp)。 MatmulABshare算子代码数据处理说明图示(A矩阵和B矩阵不切分处理): ![alt text](./pictures/matmul_ABshare.png) diff --git a/operator/ascendc/2_features/13_matmul_api_ibshare/MatmulABshareInvocation/matmul_noABshare_custom.cpp b/operator/ascendc/2_features/13_matmul_api_ibshare/MatmulABshareInvocation/matmul_noABshare_custom.cpp index aa216275fcec98560138e5444e09c8433a0a143f..b013efb587072c5da96baf00ea2f77d1731106bb 100644 --- a/operator/ascendc/2_features/13_matmul_api_ibshare/MatmulABshareInvocation/matmul_noABshare_custom.cpp +++ b/operator/ascendc/2_features/13_matmul_api_ibshare/MatmulABshareInvocation/matmul_noABshare_custom.cpp @@ -23,9 +23,9 @@ __aicore__ inline void CopyTiling(TCubeTiling *tiling, GM_ADDR tilingGM) // copy return; } -template class MatmutNoABshareKernel { +template class MatmulNoABshareKernel { public: - __aicore__ inline MatmutNoABshareKernel(){}; + __aicore__ inline MatmulNoABshareKernel(){}; __aicore__ inline void Init(GM_ADDR a, GM_ADDR b, GM_ADDR c, GM_ADDR workspace, const TCubeTiling &tiling, AscendC::TPipe *pipe); __aicore__ inline void Process(AscendC::TPipe *pipe); @@ -44,7 +44,7 @@ public: }; template -__aicore__ inline void MatmutNoABshareKernel::Init(GM_ADDR a, GM_ADDR b, GM_ADDR c, +__aicore__ inline void MatmulNoABshareKernel::Init(GM_ADDR a, GM_ADDR b, GM_ADDR c, GM_ADDR workspace,const TCubeTiling &tiling, AscendC::TPipe *pipe) { this->tiling = tiling; @@ -60,7 +60,7 @@ __aicore__ inline void MatmutNoABshareKernel::Init(GM_ADDR } template -__aicore__ inline void MatmutNoABshareKernel::Process(AscendC::TPipe *pipe) +__aicore__ inline void MatmulNoABshareKernel::Process(AscendC::TPipe *pipe) { AscendC::InitOutput (cGlobal, tiling.M * tiling.N, 0); // init output zero SyncAll(); @@ -74,7 +74,7 @@ __aicore__ inline void MatmutNoABshareKernel::Process(Ascen template __aicore__ inline void -MatmutNoABshareKernel::CalcOffset(int32_t blockIdx, const TCubeTiling &tiling, +MatmulNoABshareKernel::CalcOffset(int32_t blockIdx, const TCubeTiling &tiling, int32_t &offsetA, int32_t &offsetB, int32_t &offsetC) { if (blockIdx ==0) @@ -95,8 +95,8 @@ extern "C" __global__ __aicore__ void matmul_noABshare_custom(GM_ADDR a, GM_ADDR TCubeTiling tiling; CopyTiling(&tiling, tilingGm); - MatmutNoABshareKernel MatmutNoABshareKernel; - MatmutNoABshareKernel.Init(a, b, c, workspace, tiling, &pipe); - REGIST_MATMUL_OBJ(&pipe, GetSysWorkSpacePtr(), MatmutNoABshareKernel.matmulObj, &MatmutNoABshareKernel.tiling); - MatmutNoABshareKernel.Process(&pipe); + MatmulNoABshareKernel MatmulNoABshareKernel; + MatmulNoABshareKernel.Init(a, b, c, workspace, tiling, &pipe); + REGIST_MATMUL_OBJ(&pipe, GetSysWorkSpacePtr(), MatmulNoABshareKernel.matmulObj, &MatmulNoABshareKernel.tiling); + MatmulNoABshareKernel.Process(&pipe); } \ No newline at end of file