From c7b4fa4395ffb1c9cb6b51f527c0caf562a82401 Mon Sep 17 00:00:00 2001 From: jiangchengcheng-on Date: Thu, 19 Sep 2024 09:49:15 +0000 Subject: [PATCH 01/17] add dfx func of mm api Signed-off-by: jiangchengcheng-on --- impl/matmul/kernel_kfc.h | 4 +- impl/matmul/matmul_impl.h | 68 +- impl/matmul/matmul_tiling_algorithm.cpp | 4 +- impl/matmul/matmul_tiling_base.cpp | 12 + impl/matmul/modules/dfx/dfx_config.h | 28 + impl/matmul/modules/dfx/dfx_func_info.h | 29 + impl/matmul/modules/dfx/dfx_handler.h | 39 + impl/matmul/modules/dfx/dfx_proxy.h | 172 ++ impl/matmul/modules/dfx/dfx_registry.h | 26 + impl/matmul/modules/matmul_module.h | 50 +- impl/matmul/modules/matmul_policy.h | 2 +- lib/matmul/matmul.h | 2 + lib/matmul/matmul_client.h | 36 +- lib/matmul/matmul_tiling_base.h | 14 + tests/tiling/test_tiling.cpp | 2193 ++++++++++++++++++++++- 15 files changed, 2561 insertions(+), 118 deletions(-) create mode 100644 impl/matmul/modules/dfx/dfx_config.h create mode 100644 impl/matmul/modules/dfx/dfx_func_info.h create mode 100644 impl/matmul/modules/dfx/dfx_handler.h create mode 100644 impl/matmul/modules/dfx/dfx_proxy.h create mode 100644 impl/matmul/modules/dfx/dfx_registry.h diff --git a/impl/matmul/kernel_kfc.h b/impl/matmul/kernel_kfc.h index 04a50d6e..c6e06859 100644 --- a/impl/matmul/kernel_kfc.h +++ b/impl/matmul/kernel_kfc.h @@ -328,7 +328,7 @@ public: __aicore__ inline void SetUserDefInfo(const uint64_t tilingPtr) {} __aicore__ inline void SetQuantScalar(const uint64_t quantScalar) {} __aicore__ inline void SetQuantVector(const GlobalTensor& quantTensor) {} - template __aicore__ inline void SetWorkspace(__gm__ T* addr, int len) {}; + template __aicore__ inline void SetWorkspace(__gm__ T* addr, int size) {}; template __aicore__ inline void SetWorkspace(GlobalTensor& addr){}; __aicore__ inline void End(){}; __aicore__ inline void SetHF32(bool enableHF32 = false, int32_t transMode = 0){}; @@ -351,7 +351,7 @@ public: __aicore__ inline void GetTensorC(const GlobalTensor& gm, uint8_t enAtomic = 0, bool enSequentialWrite = false){}; template - __aicore__ inline void GetTensorC(const GlobalTensor &c, const LocalTensor &cLocal, + __aicore__ inline void GetTensorC(const GlobalTensor &gm, const LocalTensor &cLocal, uint8_t enAtomic = 0, bool enSequentialWrite = false) {}; template __aicore__ inline GlobalTensor GetTensorC(uint8_t enAtomic = 0, bool enSequentialWrite = false) diff --git a/impl/matmul/matmul_impl.h b/impl/matmul/matmul_impl.h index 78db7293..fdfc46df 100644 --- a/impl/matmul/matmul_impl.h +++ b/impl/matmul/matmul_impl.h @@ -20,10 +20,6 @@ namespace matmul { constexpr int32_t MAX_BLOCK_COUNT_SIZE = 4095; constexpr int32_t DOUBLE_SIZE = 2; -#ifdef ASCENDC_CPU_DEBUG -#define REGIST_MATMUL_OBJ_REMOTE(tpipe, workspace, maxTimes, ...) -#endif - template __aicore__ inline void GlobalCache::Init(const TCubeTiling* __restrict cubeTiling, TPipe* tpipe) { @@ -164,9 +160,9 @@ __aicore__ inline void GlobalCache::ReduceCacheSize() --cacheSize_; } -template -__aicore__ inline void SetTPipe(MatmulImpl &mm, +template +__aicore__ inline void SetTPipe(MatmulImpl &mm, TPipe* tpipe) { mm.var.tpipe_ = tpipe; @@ -6184,21 +6180,21 @@ __aicore__ inline void MatmulImplbaseN]; MatmulInstr::biasType_ = IsSameType::value ? 2 : 1; // 2:f32, 1:f16 MatmulInstr::sL1BiasOffset_ = 0; - MatmulInstr::template Compute(a1, b1, - var.cMatrix_, bias, 0, 0, var.sMadMStep_, var.sMadNStep_); + MatmulInstr::template Compute(a1, b1, var.cMatrix_, bias, + 0, 0, var.sMadMStep_, var.sMadNStep_); if constexpr (A_TYPE::layout == LayoutMode::NONE || MM_CFG.batchMode == BatchMode::SINGLE_LARGE_THAN_L1) { var.qidBias_.FreeTensor(bias); } } else { MatmulInstr::biasType_ = 0; - MatmulInstr::template Compute(a1, b1, - var.cMatrix_, bias, 0, 0, var.sMadMStep_, var.sMadNStep_); + MatmulInstr::template Compute(a1, b1, var.cMatrix_, bias, + 0, 0, var.sMadMStep_, var.sMadNStep_); } } else { MatmulInstr::biasType_ = 0; - MatmulInstr::template Compute(a1, b1, - var.cMatrix_, bias, 0, 0, var.sMadMStep_, var.sMadNStep_); + MatmulInstr::template Compute(a1, b1, var.cMatrix_, bias, + 0, 0, var.sMadMStep_, var.sMadNStep_); } } @@ -6394,20 +6390,20 @@ __aicore__ inline void MatmulImpl::value ? 2 : 1; // 2:f32, 1:f16 MatmulInstr::sL1BiasOffset_ = 0; - MatmulInstr::template Compute(a1, b1, - var.cMatrix_, bias, 0, 0, var.sMadMStep_, var.sMadNStep_); + MatmulInstr::template Compute(a1, b1, var.cMatrix_, bias, + 0, 0, var.sMadMStep_, var.sMadNStep_); if constexpr (A_TYPE::layout == LayoutMode::NONE || MM_CFG.batchMode == BatchMode::SINGLE_LARGE_THAN_L1) { var.qidBias_.FreeTensor(bias); } } else { MatmulInstr::biasType_ = 0; - MatmulInstr::template Compute(a1, b1, - var.cMatrix_, bias, 0, 0, var.sMadMStep_, var.sMadNStep_); + MatmulInstr::template Compute(a1, b1, var.cMatrix_, bias, + 0, 0, var.sMadMStep_, var.sMadNStep_); } } else { MatmulInstr::biasType_ = 0; - MatmulInstr::template Compute(a1, b1, - var.cMatrix_, bias, 0, 0, var.sMadMStep_, var.sMadNStep_); + MatmulInstr::template Compute(a1, b1, var.cMatrix_, bias, + 0, 0, var.sMadMStep_, var.sMadNStep_); } #elif __CCE_AICORE__ == 200 if (var.enableBias_) { @@ -9722,7 +9718,8 @@ __aicore__ inline void MatmulImpl -__aicore__ inline void MatmulImpl::UpdateDataCopyParamForQuant( +__aicore__ inline +void MatmulImpl::UpdateDataCopyParamForQuant( DataCopyEnhancedParams& enhancedParams) { if constexpr (IsSameType::value) { @@ -10127,10 +10124,11 @@ template ::CopyCo22GMNZ2NDOnTheFly( const GlobalTensor& gmC, const LocalTensor& src, bool enSequentialWrite) { + uint32_t dimN = (Kc != 0) ? Kc_ : N_; const int blockCount = sizeof(DstT) == B32_BYTE_SIZE ? BLOCK_CUBE : ONE_BLK_SIZE / sizeof(DstT); const int oneBlockCount = ONE_BLK_SIZE / sizeof(DstT); int calcWidth = var.baseUseN_ / blockCount; - int dstOffset = var.curM_ * var.tiling_->baseM * N_ + var.curN_ * var.tiling_->baseN; + int dstOffset = var.curM_ * var.tiling_->baseM * dimN + var.curN_ * var.tiling_->baseN; int blockLen = blockCount * sizeof(DstT) / ONE_BLK_SIZE; int srcRepeatGap = (var.blockUseM_ * BLOCK_CUBE * blockCount - blockCount) * sizeof(DstT) / ONE_BLK_SIZE; int tail = var.baseUseN_ % blockCount; @@ -10142,7 +10140,7 @@ __aicore__ inline void MatmulImpl::CopyCo22GMNZ2ND( const GlobalTensor& gmC, LocalTensor& src, bool enSequentialWrite) { + uint32_t dimN = (Kc_ != 0) ? Kc_ : N_; const int blockCount = sizeof(DstT) == B32_BYTE_SIZE ? BLOCK_CUBE : ONE_BLK_SIZE / sizeof(DstT); int width = var.blockUseN_ * blockCount; if constexpr (IsSameType::value || IsSameType::value) { @@ -10427,15 +10426,15 @@ __aicore__ inline void MatmulImpl= width), - { KERNEL_LOG(KERNEL_ERROR, "N_ is %d, width is %d, N_ should be no less than width", N_, width); }); - int dstStride = (N_ - width) * sizeof(DstT) / ONE_BLK_SIZE; - int dstOffset = var.curM_ * var.tiling_->baseM * N_ + var.curN_ * var.tiling_->baseN; - int offset = N_; + ASCENDC_ASSERT((dimN >= width), + { KERNEL_LOG(KERNEL_ERROR, "dimN is %d, width is %d, dimN should be no less than width", dimN, width); }); + int dstStride = (dimN - width) * sizeof(DstT) / ONE_BLK_SIZE; + int dstOffset = var.curM_ * var.tiling_->baseM * dimN + var.curN_ * var.tiling_->baseN; + int offset = dimN; if (enSequentialWrite) { isGmAligned = (var.baseUseN_ % blockCount) == 0; dstStride = 0; @@ -10451,7 +10450,7 @@ __aicore__ inline void MatmulImpl::value) { CopyToGMForNotAligned(gmC, trans, blocklen, enSequentialWrite, isTragetAligned); } else { @@ -10522,9 +10521,10 @@ template ::CopyCo22UBNZ2ND( const LocalTensor& dst, const LocalTensor& src, bool enSequentialWrite) { + uint32_t dimN = (Kc_ != 0) ? Kc_ : N_; const int blockCount = sizeof(DstT) == B32_BYTE_SIZE ? BLOCK_CUBE : ONE_BLK_SIZE / sizeof(DstT); - int dstOffset = var.curM_ * var.tiling_->baseM * N_ + var.curN_ * var.tiling_->baseN; - int offset = Ceil(N_, blockCount) * blockCount; + int dstOffset = var.curM_ * var.tiling_->baseM * dimN + var.curN_ * var.tiling_->baseN; + int offset = Ceil(dimN, blockCount) * blockCount; if (enSequentialWrite) { dstOffset = 0; offset = var.tiling_->baseN; @@ -10924,8 +10924,8 @@ __aicore__ inline MatmulImpl 16 or m,n<16 - const int32_t m0 = min(minMNSize, min(coreStatus.m, minTotalSize / n0)); - const int32_t k0 = min(min(minKSize / m0, minKSize / n0), coreStatus.k); + const int32_t m0 = (n0 == 0) ? 0 : min(minMNSize, min(coreStatus.m, minTotalSize / n0)); + const int32_t k0 = (m0 != 0 && n0 != 0) ? min(min(minKSize / m0, minKSize / n0), coreStatus.k) : coreStatus.k; const int32_t dbBuffer = 2; // A/B fullload or A fullload + B Kdim fullload or B fullload + A Kdim fullload(1/2/4) diff --git a/impl/matmul/matmul_tiling_base.cpp b/impl/matmul/matmul_tiling_base.cpp index 3a9d37d6..07d197bb 100644 --- a/impl/matmul/matmul_tiling_base.cpp +++ b/impl/matmul/matmul_tiling_base.cpp @@ -581,6 +581,18 @@ void MatmulApiTilingBase::SetMatmulConfigParams(int32_t mmConfigTypeIn, bool ena this->enableL1CacheUB = enableL1CacheUBIn; } +void MatmulApiTilingBase::SetMatmulConfigParams(const MatmulConfigParams& configParams) +{ + TILING_LOG_DEBUG("Set MatmulConfigType: %d", static_cast(configParams.mmConfigType)); + TILING_LOG_DEBUG("Set EnableL1CacheUB: %d", static_cast(configParams.enableL1CacheUB)); + TILING_LOG_DEBUG("Set ScheduleType: %d", static_cast(configParams.scheduleType)); + TILING_LOG_DEBUG("Set Traverse: %d", static_cast(configParams.traverse)); + this->mmConfigType = configParams.mmConfigType; + this->enableL1CacheUB = configParams.enableL1CacheUB; + this->scheduleType = configParams.scheduleType; + this->traverse_ = configParams.traverse; +} + bool MatmulApiTilingBase::CheckSetParam() { if (socVersion == platform_ascendc::SocVersion::ASCEND910 || diff --git a/impl/matmul/modules/dfx/dfx_config.h b/impl/matmul/modules/dfx/dfx_config.h new file mode 100644 index 00000000..83d0470e --- /dev/null +++ b/impl/matmul/modules/dfx/dfx_config.h @@ -0,0 +1,28 @@ +/** + * Copyright (c) 2024 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + + /*! + * \file dfx_config.h + * \brief + */ + +#ifndef MATMUL_DFX_CONFIG_H +#define MATMUL_DFX_CONFIG_H + +#include "handlers/dfx_chain_handler.h" +#include "dfx_func_info.h" + +namespace matmul { +struct DfxConfig { + static constexpr bool ENABLE = false; + using EnabledHandlers = DfxChainHandler <>; +}; +} +#endif \ No newline at end of file diff --git a/impl/matmul/modules/dfx/dfx_func_info.h b/impl/matmul/modules/dfx/dfx_func_info.h new file mode 100644 index 00000000..02e4fdd1 --- /dev/null +++ b/impl/matmul/modules/dfx/dfx_func_info.h @@ -0,0 +1,29 @@ +/** + * Copyright (c) 2024 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +/*! + * \file dfx_func_info.h + * \brief + */ + + #ifndef MATMUL_DFX_FUNC_INFO_H +#define MATMUL_DFX_FUNC_INFO_H + +namespace matmul { +struct DfxFuncInfo { + __aicore__ inline DfxFuncInfo(__gm__ const char* module, __gm__ const char* func, uint32_t funcId) + :module(module), func(func), funcId(funcId) { + } + __gm__ const char* module; + __gm__ const char* func; + uint32_t funcId; +}; +} +#endif \ No newline at end of file diff --git a/impl/matmul/modules/dfx/dfx_handler.h b/impl/matmul/modules/dfx/dfx_handler.h new file mode 100644 index 00000000..c176cfdd --- /dev/null +++ b/impl/matmul/modules/dfx/dfx_handler.h @@ -0,0 +1,39 @@ +/** + * Copyright (c) 2024 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +/*! + * \file dfx_handler.h + * \brief + */ + + #ifndef MATMUL_DFX_HANDLER_H +#define MATMUL_DFX_HANDLER_H + +#include "dfx_config.h" + +namespace matmul { + +struct DfxHandler ( + template + __aicore__ inline void PreCall(const DfxFuncInfo& info, Agrs&&... args) { + DfxConfig::EnableHandlers::PreCall(info, std::forward + __aicore__ inline void PostCall(const DfxFuncInfo& info, const RT& ret) { + DfxConfig::EnableHandlers::PostCall(info, ret); + } + + __aicore__ inline void PostCall(const DfxFuncInfo& info) { + DfxConfig::EnableHandlers::PostCall(info); + } +); +} +#endif \ No newline at end of file diff --git a/impl/matmul/modules/dfx/dfx_proxy.h b/impl/matmul/modules/dfx/dfx_proxy.h new file mode 100644 index 00000000..e6ad92c1 --- /dev/null +++ b/impl/matmul/modules/dfx/dfx_proxy.h @@ -0,0 +1,172 @@ +/** + * Copyright (c) 2024 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +/*! + * \file dfx_proxy.h + * \brief + */ + +#ifndef MATMUL_DFX_PROXY_H +#define MATMUL_DFX_PROXY_H + +#include +#include "dfx_handler.h" + +namespace matmul { + +template +using enable_if_t = typename std::enable_if::type; + +template +constexpr bool is_void_v = std::is_void::value; + +/////////////////////////////////////////////////////////////////////////////// +template +struct DfxProxy : MODULE { + __aicore__ inline auto operator->() { return this; } + __aicore__ inline operator MODULE*() { return this; } +}; + +/////////////////////////////////////////////////////////////////////////////// +#define MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC) \ +template \ +__aicore__ inline auto FUNC(Args&&... args) -> enable_if_t(args)...))>, \ +decltype(MODULE().MODULE::FUNC(std::forward(args)...))>{ \ + DfxFuncInfo info{#MODULE, #FUNC, __COUNTER__}; \ + DfxHandler::PreCall(info, std::forward(args)...); \ + auto ret = M_.MODULE::FUNC(std::forward(args)...); \ + DfxHandler::PostCall(info, ret); \ + return ret; \ +} \ +template \ +__aicore__ inline auto FUNC(Args&&... args) -> enable_if_t(args)...))>> { \ + DfxFuncInfo info{#MODULE, #FUNC, __COUNTER__}; \ + DfxHandler::PreCall(info, std::forward(args)...); \ + M_.MODULE::FUNC(std::forward(args)...); \ + DfxHandler::PostCall(info); \ +} + +/////////////////////////////////////////////////////////////////////////////// +#define MATMUL_COUNT_ARGS_IMPL(_1, _2, _3, _4, _5, _6, _7, _8, _9, N, ...) N +#define MATMUL_COUNT_ARGS(...) \ +MATMUL_COUNT_ARGS_IMPL(__VA_ARGS__, 9, 8, 7, 6, 5, 4, 3, 2, 1) + +#define MATMUL_DEF_PROXY_FUNC_1(M_, MODULE, FUNC1) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC1) + +#define MATMUL_DEF_PROXY_FUNC_2(M_, MODULE, FUNC1, FUNC2) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC1) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC2) + +#define MATMUL_DEF_PROXY_FUNC_3(M_, MODULE, FUNC1, FUNC2, FUNC3) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC1) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC2) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC3) + +#define MATMUL_DEF_PROXY_FUNC_4(M_, MODULE, FUNC1, FUNC2, FUNC3, FUNC4) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC1) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC2) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC3) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC4) + +#define MATMUL_DEF_PROXY_FUNC_5(M_, MODULE, FUNC1, FUNC2, FUNC3, FUNC4, FUNC5) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC1) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC2) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC3) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC4) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC5) + +#define MATMUL_DEF_PROXY_FUNC_6(M_, MODULE, FUNC1, FUNC2, FUNC3, FUNC4, FUNC5, FUNC6) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC1) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC2) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC3) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC4) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC5) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC6) + +#define MATMUL_DEF_PROXY_FUNC_7(M_, MODULE, FUNC1, FUNC2, FUNC3, FUNC4, FUNC5, FUNC6, FUNC7) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC1) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC2) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC3) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC4) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC5) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC6) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC7) + +#define MATMUL_DEF_PROXY_FUNC_8(M_, MODULE, FUNC1, FUNC2, FUNC3, FUNC4, FUNC5, FUNC6, FUNC7, FUNC8) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC1) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC2) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC3) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC4) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC5) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC6) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC7) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC8) + +#define MATMUL_DEF_PROXY_FUNC_9(M_, MODULE, FUNC1, FUNC2, FUNC3, FUNC4, FUNC5, FUNC6, FUNC7, FUNC8, FUNC9) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC1) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC2) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC3) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC4) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC5) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC6) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC7) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC8) \ +MATMUL_DEF_DFX_PROXY_FUNC(M_, MODULE, FUNC9) + +#define MATMUL_DEF_DFX_FUNCS_IMPL2(N, M_, MODULE, ...) \ +MATMUL_DEF_PROXY_FUNC_##N(M_, MODULE, __VA_ARGS__) + +#define MATMUL_DEF_DFX_FUNCS_IMPL(N, M_, MODULE, ...) \ +MATMUL_DEF_DFX_FUNCS_IMPL2(N, M_, MODULE, __VA_ARGS__) + +#define MATMUL_DEF_DFX_FUNCS(M_, MODULE, ...) \ +MATMUL_DEF_DFX_FUNCS_IMPL(MATMUL_COUNT_ARGS(__VA_ARGS__), M_, MODULE, __VA_ARGS__) + +/////////////////////////////////////////////////////////////////////////////// +#define MATMUL_DFX_PROXY_REGISTER(MODULE, ...) \ +template \ +struct DfxProxy { \ + using MODULE = typename IMPL::MODULE; \ + __aicore__ inline DfxProxy(MODULE& module) : proxy{module} {} \ + struct FuncProxy { \ + __aicore__ inline FuncProxy(MODULE& module) : m_{module} {} \ + __aicore__ inline auto& operator*() { return m_; } \ + MATMUL_DEF_DFX_FUNCS(m_, MODULE, __VA_ARGS__) \ + private: \ + MODULE& m_; \ + }; \ + __aicore__ inline auto operator->() { return &proxy; } \ + __aicore__ inline operator MODULE*() { return &(*proxy); } \ +private: \ + FuncProxy proxy; \ +}; \ +template \ +struct DfxProxy { \ + using MODULE = typename IMPL::MODULE; \ + __aicore__ inline DfxProxy(const MODULE& module) : proxy{module} {} \ + struct FuncProxy { \ + __aicore__ inline FuncProxy(const MODULE& module) : m_{module} {} \ + __aicore__ inline const auto& operator*() { return m_; } \ + MATMUL_DEF_DFX_FUNCS(m_, MODULE, __VA_ARGS__) \ + private: \ + const MODULE& m_; \ + }; \ + __aicore__ inline auto operator->() { return &proxy; } \ + __aicore__ inline operator MODULE*() { return &(*proxy); } \ +private: \ + FuncProxy proxy; \ +} + +} + +#endif \ No newline at end of file diff --git a/impl/matmul/modules/dfx/dfx_registry.h b/impl/matmul/modules/dfx/dfx_registry.h new file mode 100644 index 00000000..400891ab --- /dev/null +++ b/impl/matmul/modules/dfx/dfx_registry.h @@ -0,0 +1,26 @@ +/** + * Copyright (c) 2024 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +/*! + * \file dfx_registry.h + * \brief + */ + + +#ifndef MATMUL_DFX_REGISTRY_H +#define MATMUL_DFX_REGISTRY_H + +#include "dfx_proxy.h" + +namespace matmul { + MATMUL_DFX_PROXY_REGISTER(InputL1Cache, ClearAL1Cache, ClearBL1Cache); +} + +#endif \ No newline at end of file diff --git a/impl/matmul/modules/matmul_module.h b/impl/matmul/modules/matmul_module.h index eed5071e..884dba27 100644 --- a/impl/matmul/modules/matmul_module.h +++ b/impl/matmul/modules/matmul_module.h @@ -15,6 +15,9 @@ #ifndef IMPL_MATMUL_MODULES_MATMUL_MODULE_H #define IMPL_MATMUL_MODULES_MATMUL_MODULE_H +#include "dfx/dfx_registry.h" +#include "dfx/dfx_config.h" + /* MatmulModuleBase */ namespace matmul { template @@ -38,6 +41,7 @@ struct MatmulModuleBase> { /* MatmulImpl */ #define MATMUL_IMPL__ IMPL +#define MATMUL_POLICY__ POLICY #define MATMUL_CAST_TO_IMPL() static_cast(this) #define MATMUL_CAST_TO_CONST_IMPL() static_cast(this) @@ -48,33 +52,61 @@ struct MatmulModuleBase> { #define MATMUL_CAST_TO_CONST_IMPL_OF(...) \ (static_cast(MATMUL_CAST_TO_CONST_IMPL())) +#define MATMUL_CAST_TO_PROXY_OF(NAME) \ +typename matmul::DfxProxy (*MATMUL_CAST_TO_IMPL_OF(NAME)) + +#define MATMUL_CAST_TO_CONST_PROXY_OF(NAME) \ +typename matmul::DfxProxy (*MATMUL_CAST_TO_CONST_IMPL_OF(NAME)) + #define MATMUL_MODULE(NAME) cast_to_##NAME() -#define MATMUL_USE_MODULE(NAME) \ -__aicore__ inline constexpr decltype(auto) MATMUL_MODULE(NAME) { \ - return MATMUL_CAST_TO_IMPL_OF(NAME); \ +#define MATMUL_USE_MODULE(NAME) \ +__aicore__ inline constexpr decltype(auto) MATMUL_MODULE(NAME) { \ + if constexpr (DfxConfig::ENABLE) { \ + return MATMUL_CAST_TO_PROXY_OF(NAME); \ + } else { \ + return MATMUL_CAST_TO_IMPL_OF(NAME); \ + } \ +} \ +__aicore__ inline constexpr decltype(auto) MATMUL_MODULE(NAME) const { \ + if constexpr (DfxConfig::ENABLE) { \ + return MATMUL_CAST_TO_CONST_PROXY_OF(NAME); \ + } else { \ + return MATMUL_CAST_TO_CONST_IMPL_OF(NAME); \ + } \ } -#define MATMUL_USE_MODULE_ON(NAME, ...) \ -__aicore__ inline constexpr decltype(auto) MATMUL_MODULE(NAME) { \ - return MATMUL_CAST_TO_IMPL_OF(template NAME<__VA_ARGS__>); \ +#define MATMUL_USE_MODULE_ON(NAME, ...) \ +__aicore__ inline constexpr decltype(auto) MATMUL_MODULE(NAME) { \ + if constexpr (DfxConfig::ENABLE) { \ + return MATMUL_CAST_TO_PROXY_OF(template NAME<__VA_ARGS__>); \ + } else { \ + return MATMUL_CAST_TO_IMPL_OF(template NAME<__VA_ARGS__>); \ + } \ +} \ +__aicore__ inline constexpr decltype(auto) MATMUL_MODULE(NAME) const { \ + if constexpr (DfxConfig::ENABLE) { \ + return MATMUL_CAST_TO_CONST_PROXY_OF(template NAME<__VA_ARGS__>);\ + } else { \ + return MATMUL_CAST_TO_CONST_IMPL_OF(template NAME<__VA_ARGS__>); \ + } \ } /* MatmulPolicy */ #define MATMUL_POLICY_TEMPLATE MATMUL_POLICY #define MATMUL_POLICY_DEFAULT_OF(DEFAULT) \ -template \ +template \ class MATMUL_POLICY = DEFAULT #define MATMUL_POLICY_TEMPLATE_OF(NAME) \ -template class NAME +template class NAME #define MATMUL_IMPL_TYPE \ MatmulImpl #define MATMUL_MODULE_IN_POLICY(...) \ -MATMUL_POLICY_TEMPLATE::__VA_ARGS__ +MATMUL_POLICY_TEMPLATE::__VA_ARGS__ #define MATMUL_IMPORT_MODULE(...) private MATMUL_MODULE_IN_POLICY(__VA_ARGS__) diff --git a/impl/matmul/modules/matmul_policy.h b/impl/matmul/modules/matmul_policy.h index 0184d5ad..1db8dca5 100644 --- a/impl/matmul/modules/matmul_policy.h +++ b/impl/matmul/modules/matmul_policy.h @@ -21,7 +21,7 @@ namespace matmul { -template struct MatmulPolicy { diff --git a/lib/matmul/matmul.h b/lib/matmul/matmul.h index fcfc558a..ce0be5fd 100644 --- a/lib/matmul/matmul.h +++ b/lib/matmul/matmul.h @@ -212,6 +212,8 @@ public: MATMUL_ALLOW_USING(InputL1Cache); MATMUL_ALLOW_USING(Co1Buffer); +private: + template friend struct DfxProxy; private: template diff --git a/lib/matmul/matmul_client.h b/lib/matmul/matmul_client.h index 1b1ebb9e..94f310fe 100644 --- a/lib/matmul/matmul_client.h +++ b/lib/matmul/matmul_client.h @@ -129,7 +129,7 @@ public: ASSERT(addr.GetSize() > 0); SetWorkspace(addr.GetPhyAddr(), addr.GetSize() * sizeof(T)); } - template __aicore__ inline void SetWorkspace(__gm__ const T* addr, int size) + template __aicore__ inline void SetWorkspace(__gm__ const T* addr, int len) { ASSERT(addr != nullptr); ASSERT(this->cubeTiling != nullptr); @@ -155,6 +155,11 @@ public: PostMessage(); } + __aicore__ inline void SetSingleShape(int singleM, int singleN, int singleK) + { + SetTail(singleM, singleN, singleK); + } + __aicore__ inline void SetTail(int tailM = -1, int tailN = -1, int tailK = -1) { if (tailM != -1) { @@ -337,6 +342,7 @@ public: template __aicore__ inline bool Iterate(bool enPartialSum = false) { + ASSERT(!(A_TYPE::ibShare && B_TYPE::ibShare) && "Iterate not support when samebab is enabled"); TRACE_START(TraceId::KFC_CLIENT_POST_MSG); if (unlikely(kfcMsg_.body.isFirstIter)) { cntIter_ = 0; @@ -369,6 +375,7 @@ public: kfcMsg_.body.sync = sync; kfcMsg_.body.cAddr = reinterpret_cast(cacheWorkspaceAddr); PostMessage(); + SyncCubeWithVec(); TRACE_STOP(TraceId::KFC_CLIENT_POST_MSG); return true; } @@ -386,7 +393,13 @@ public: __aicore__ inline void WaitIterateBatch() { ASSERT(!isSyncGetC); // Must be asynchronous mode - WaitEvent(this->devEvtID); + auto intraId = this->devEvtID; + if constexpr (A_TYPE::ibShare && B_TYPE::ibShare) { + if (GetSubBlockIdxImpl() == 1) { + intraId = this->devEvtID - 1; + } + } + WaitEvent(intraId); } template @@ -395,6 +408,7 @@ public: { TRACE_START(TraceId::KFC_CLIENT_POST_MSG); ASSERT(kfcMsg_.body.isFirstIter == 1); + ASSERT(!(A_TYPE::ibShare && B_TYPE::ibShare) && "Iterate not support when samebab is enabled"); kfcMsg_.body.iterateFakeMsg = fakeMsg; kfcMsg_.body.cAddr = reinterpret_cast(gm.GetPhyAddr()); kfcMsg_.body.enAtomic = (uint8_t)(enAtomic); @@ -423,6 +437,7 @@ public: ASSERT(sync == true); ASSERT(enAtomic == 0); ASSERT(kfcMsg_.body.isFirstIter == 1); + ASSERT(!(A_TYPE::ibShare && B_TYPE::ibShare) && "Iterate not support when samebab is enabled"); ASSERT((PhyPosIsL1(C_TYPE::pos)) && "IterateAll LocalTensor only support QuePosition A1 or B1"); ASSERT(!(A_TYPE::ibShare && B_TYPE::ibShare) && "IterateAll LocalTensor not support when sameab" " is enabled"); @@ -457,6 +472,7 @@ public: { TRACE_START(TraceId::KFC_CLIENT_POST_MSG); ASSERT(kfcMsg_.body.isFirstIter == 1); + ASSERT(!(A_TYPE::ibShare && B_TYPE::ibShare) && "Iterate not support when samebab is enabled"); kfcMsg_.body.cAddr = reinterpret_cast(gm.GetPhyAddr()); kfcMsg_.body.enSequentialWrite = enSequentialWrite; kfcMsg_.body.sync = sync; @@ -785,6 +801,8 @@ public: BIAS_TYPE, MM_CFG, MM_CB>::MATMUL mm; + constexpr static bool aIbShare = A_TYPE::ibShare; + constexpr static bool bIbShare = B_TYPE::ibShare; #endif @@ -795,7 +813,7 @@ private: // Use shared memory to get the queue. KfcCommClient* client; TPipe* tpipe; - TCubeTiling* cubeTiling; + const TCubeTiling* cubeTiling; KfcMsg kfcMsg_; bool isSyncGetC; @@ -809,7 +827,7 @@ private: uint32_t mnIter_; uint64_t cOffset_; template - friend __aicore__ inline void InitKfcClient(T& mm, U* cubeTiling, TPipe* tpipe, KfcCommClient* client, int instIdx, + friend __aicore__ inline void InitKfcClient(T& mm, U* tiling, TPipe* tpipe, KfcCommClient* client, int instIdx, GM_ADDR workspace); private: @@ -819,7 +837,7 @@ private: ASSERT(cubeTiling != nullptr && "cubeTiling cannot be nullptr when init matmul client"); ASSERT(sizeof(TCubeTiling) % sizeof(uint64_t) == 0); - this->cubeTiling = const_cast(cubeTiling); + this->cubeTiling = cubeTiling; *((uint64_t*)&kfcMsg_) = 0; *((uint64_t*)&(kfcMsg_.body)) = 0; @@ -869,6 +887,14 @@ private: } template __aicore__ inline void PostMessage() { + if constexpr (A_TYPE::ibShare && B_TYPE::ibShare) { + ASSERT(DoMatmulNorm(MM_CFG) && "MM_CFG should use norm config when sameabb is enabled"); + if (GetSubBlockIdxImpl() == 1) { + *((uint32_t *)&kfcMsg_.body) = 0; + kfcMsg_.ubAddr = -1; + return; + } + } kfcMsg_.head = KfcMsgMakeFlag(funID, this->instIdx); auto msg = client->AllocMessage(); diff --git a/lib/matmul/matmul_tiling_base.h b/lib/matmul/matmul_tiling_base.h index 33319d47..c537af99 100644 --- a/lib/matmul/matmul_tiling_base.h +++ b/lib/matmul/matmul_tiling_base.h @@ -132,6 +132,11 @@ enum class DequantType : int32_t { TENSOR = 1, }; +enum class ScheduleType : int32_t { + INNER_PRODUCT = 0; + OUTER_PRODUCT = 1; +}; + struct SysTilingTempBufSize { int32_t ubSize = 0; int32_t l1Size = 0; @@ -170,6 +175,13 @@ struct PlatformInfo { uint64_t l0BSize = 0; }; +struct MatmulConfigParams { + int32_t mmConfigType = 1; + bool enableL1CacheUB = false; + ScheduleType scheduleType = ScheduleType::INNER_PRODUCT; + MatrixTraverse traverse = MatrixTraverse::NOSET; +}; + class MatmulApiTilingBase { public: MatmulApiTilingBase(); @@ -207,6 +219,7 @@ public: int32_t SetDoubleBuffer(bool a, bool b, bool c, bool bias, bool transND2NZ = true, bool transNZ2ND = true); void SetMatmulConfigParams(int32_t mmConfigTypeIn = 1, bool enableL1CacheUBIn = false); + void SetMatmulConfigParams(const MatmulConfigParams& configParams); int32_t GetBaseM() const { @@ -288,6 +301,7 @@ public: BufferPool bufferPool_; MatrixTraverse traverse_ = MatrixTraverse::FIRSTM; MatrixMadType madType_ = MatrixMadType::NORMAL; + ScheduleType scheduleType = ScheduleType::NOSET; bool transND2NZ_ = false; bool transNZ2ND_ = false; diff --git a/tests/tiling/test_tiling.cpp b/tests/tiling/test_tiling.cpp index 6d235841..b0e98872 100644 --- a/tests/tiling/test_tiling.cpp +++ b/tests/tiling/test_tiling.cpp @@ -38,11 +38,12 @@ TEST_F(TestTiling, MultiCoreSmallMN) rnnMatmul3.SetBType(matmul_tiling::TPosition::GM, matmul_tiling::CubeFormat::ND, matmul_tiling::DataType ::DT_FLOAT); rnnMatmul3.SetCType(matmul_tiling::TPosition::GM, matmul_tiling::CubeFormat::NZ, matmul_tiling::DataType ::DT_FLOAT); rnnMatmul3.SetBiasType(matmul_tiling::TPosition::VECCALC, matmul_tiling::CubeFormat::ND, matmul_tiling::DataType ::DT_FLOAT); + rnnMatmul3.SetSingleRange(-1,-1,-1,-1,-1,-1); auto ret = rnnMatmul3.SetBias(true); ret = rnnMatmul3.SetDim(24); ret = rnnMatmul3.SetOrgShape(5, 40, 986); ret = rnnMatmul3.SetShape(5, 10, 986); - ret = rnnMatmul3.SetBufferSpace(); // will use all buffer space if not explicitly specified + ret = rnnMatmul3.SetBufferSpace(-1, -1, -1); // will use all buffer space if not explicitly specified optiling::TCubeTiling tilingData; ret = rnnMatmul3.GetTiling(tilingData); rnnMatmul3.PrintTilingData(); @@ -74,9 +75,51 @@ TEST_F(TestTiling, PlatformConstructor) optiling::TCubeTiling tilingData; int ret = tiling.GetTiling(tilingData); tiling.PrintTilingData(); + tiling.PrintTilingDataInfo(tilingData); EXPECT_EQ(ret, 0); } +TEST_F(TestTiling, TestMatmulApiTilingL0DB) +{ + matmul_tiling::PlatformInfo plat {.socVersion = platform_ascendc::SocVersion::ASCEND910B, .l1Size = 524288, + .l0CSize = 131072, .ubSize = 196608, .l0ASize = 65536, .l0BSize = 65536}; + MatmulApiTiling tiling(plat); + tiling.SetAType(TPosition::TSCM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT16); + tiling.SetBType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT16); + tiling.SetCType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT); + tiling.SetBiasType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT); + tiling.SetShape(2048, 20480, 16); + tiling.SetOrgShape(2048, 20480, 16); + tiling.SetBias(false); + tiling.SetBufferSpace(-1, -1, -1, -1); + tiling.SetMatmulConfigParams({1, false, ScheduleType::OUTER_PRODUCT, MatrixTraverse::FIRSTM}); + optiling::TCubeTiling tilingData; + int ret = tiling.GetTiling(tilingData); + tiling.PrintTilingData(); + EXPECT_EQ(ret, 0); +} + +TEST_F(TestTiling, TestMatmulApiTilingL0DBError) +{ + matmul_tiling::PlatformInfo plat {.socVersion = platform_ascendc::SocVersion::ASCEND910B, .l1Size = 524288, + .l0CSize = 131072, .ubSize = 196608, .l0ASize = 65536, .l0BSize = 65536}; + MatmulApiTiling tiling(plat); + tiling.SetAType(TPosition::TSCM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT16); + tiling.SetBType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT16); + tiling.SetCType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT); + tiling.SetBiasType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT); + tiling.SetShape(16, 16, 2048); + tiling.SetOrgShape(16, 16, 2048); + tiling.SetFixSplit(16, 16, -1); + tiling.SetBias(false); + tiling.SetBufferSpace(-1, -1, -1, -1); + tiling.SetMatmulConfigParams({1, false, ScheduleType::OUTER_PRODUCT, MatrixTraverse::FIRSTN}); + optiling::TCubeTiling tilingData; + int ret = tiling.GetTiling(tilingData); + tiling.PrintTilingData(); + EXPECT_EQ(ret, -1); +} + TEST_F(TestTiling, TestInt4BaseK) { matmul_tiling::PlatformInfo plat {.socVersion = platform_ascendc::SocVersion::ASCEND910B, .l1Size = 524288, @@ -171,6 +214,8 @@ TEST_F(TestTiling, Tiling_BatchMatmul) bmm.biasType_.pos = TPosition::TSCM; retParam = bmm.CheckSetParam(); EXPECT_EQ(retParam, false); + + EXPECT_EQ(bmm.SetSingleBatch(2, 2), 0); } TEST_F(TestTiling, ATscmCase) @@ -796,6 +841,105 @@ TEST_F(TestTiling, TestSetBufferSpace) EXPECT_EQ(tiling.bufferPool_.l1Size, 1024); } +TEST_F(TestTiling, TestCosTilingFloat) +{ + std::vector shapeDims = { 128, 128 }; + auto cosShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + AscendC::GetCosMaxMinTmpSize(cosShape, 4, false, maxValue, minValue); + EXPECT_EQ(maxValue, 128 * 128 * 4 * 3); + AscendC::GetCosMaxMinTmpSize(cosShape, 4, true, maxValue, minValue); + EXPECT_EQ(maxValue, 128 * 128 * 4 * 2); + uint32_t maxLiveNodeCnt = 0; + uint32_t extraBuf = 0; + GetCosTmpBufferFactorSize(4, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 3); + EXPECT_EQ(extraBuf, 0); +} + +TEST_F(TestTiling, TestCosTilingFloat512) +{ + std::vector shapeDims = { 512 }; + auto cosShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + AscendC::GetCosMaxMinTmpSize(cosShape, 4, false, maxValue, minValue); + EXPECT_EQ(minValue, 256 * 3); + AscendC::GetCosMaxMinTmpSize(cosShape, 4, true, maxValue, minValue); + EXPECT_EQ(minValue, 256 * 2); +} + +TEST_F(TestTiling, TestCosTilingHalf) +{ + std::vector shapeDims = { 128, 128 }; + auto cosShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + AscendC::GetCosMaxMinTmpSize(cosShape, 2, false, maxValue, minValue); + EXPECT_EQ(maxValue, 128 * 128 * 8 * 2); + EXPECT_EQ(minValue, 256 * 8); + uint32_t maxLiveNodeCnt = 0; + uint32_t extraBuf = 0; + GetCosTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 8); + EXPECT_EQ(extraBuf, 0); +} + +TEST_F(TestTiling, TestAtanTilingFloat) +{ + std::vector shapeDims = { 128, 128 }; + auto atanShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + GetAtanMaxMinTmpSize(atanShape, 4, false, maxValue, minValue); + EXPECT_EQ(maxValue, 128 * 128 * 4 * 5); + EXPECT_EQ(minValue, 256 * 5); +} + +TEST_F(TestTiling, TestAtanTilingHalf) +{ + std::vector shapeDims = { 128, 128 }; + auto atanShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + GetAtanMaxMinTmpSize(atanShape, 2, false, maxValue, minValue); + EXPECT_EQ(minValue, 256 * 12); + EXPECT_EQ(maxValue, 128 * 128 * 2 * 12); + uint32_t maxLiveNodeCnt = 0; + uint32_t extraBuf = 0; + GetAtanTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 12); + EXPECT_EQ(extraBuf, 0); +} + +TEST_F(TestTiling, TestClampTilingFloat) +{ + std::vector shapeDims = { 128, 128 }; + auto atanShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + GetClampMaxMinTmpSize(atanShape, 4, false, maxValue, minValue); + EXPECT_EQ(maxValue, 128 * 128 * 1); + EXPECT_EQ(minValue, 64 * 1); +} + +TEST_F(TestTiling, TestClampTilingHalf) +{ + std::vector shapeDims = { 128, 128 }; + auto atanShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + GetClampMaxMinTmpSize(atanShape, 2, false, maxValue, minValue); + EXPECT_EQ(minValue, 128 * 1); + EXPECT_EQ(maxValue, 128 * 128 * 1); + uint32_t maxLiveNodeCnt = 0; + uint32_t extraBuf = 0; + GetClampTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 1); + EXPECT_EQ(extraBuf, 0); +} + TEST_F(TestTiling, TestSoftMaxTiling) { std::vector shapeDims = { 128, 128 }; @@ -909,7 +1053,6 @@ TEST_F(TestTiling, TestSoftMaxFlashV2Tiling) SoftMaxFlashV2TilingFunc(softmaxShape, inputTypeSize, maxSumTypeSize, workLength, tilingData, true, true); EXPECT_EQ(tilingData.get_reduceM(), 64); } - TEST_F(TestTiling, TestSoftMaxFlashV2TilingBasicBlock) { std::vector shapeDims = { 8, 1024 }; @@ -938,6 +1081,163 @@ TEST_F(TestTiling, TestSoftMaxFlashV2TilingBasicBlock) EXPECT_EQ(tilingData.get_reduceM(), 8); } +TEST_F(TestTiling, TestAsinTmpBufferFacotrHalfWithoutBasicBlock) { + uint32_t maxLivedNodes = 0xffff; + uint32_t extraBuffer = 0xffff; + GetAsinTmpBufferFactorSize(2, maxLivedNodes, extraBuffer); + EXPECT_EQ(maxLivedNodes, 6); + EXPECT_EQ(extraBuffer, 0); +} + +TEST_F(TestTiling, TestAsinTmpBufferFacotrFloatWithoutBasicBlock) { + uint32_t maxLivedNodes = 0xffff; + uint32_t extraBuffer = 0xffff; + GetAsinTmpBufferFactorSize(4, maxLivedNodes, extraBuffer); + EXPECT_EQ(maxLivedNodes, 2); + EXPECT_EQ(extraBuffer, 0); +} + +TEST_F(TestTiling, TestAsinTilingHalf128) +{ + std::vector shapeDims = { 128 }; + auto asinShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + GetAsinMaxMinTmpSize(asinShape, 2, false, maxValue, minValue); + EXPECT_EQ(maxValue, 256 * 6); + EXPECT_EQ(minValue, 256 * 6); +} + +TEST_F(TestTiling, TestAsinTilingFloat) +{ + std::vector shapeDims = { 32 }; + auto asinShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + GetAsinMaxMinTmpSize(asinShape, 4, false, maxValue, minValue); + EXPECT_EQ(maxValue, 256 * 2); + EXPECT_EQ(minValue, 256 * 2); +} + +TEST_F(TestTiling, TestAsinTilingHalf16K) +{ + std::vector shapeDims = { 128, 128 }; + auto asinShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + GetAsinMaxMinTmpSize(asinShape, 2, false, maxValue, minValue); + EXPECT_EQ(maxValue, 128 * 128 * 6 * 2); + EXPECT_EQ(minValue, 256 * 6); +} + +TEST_F(TestTiling, TestAsinTilingFloat16K) +{ + std::vector shapeDims = { 128, 128 }; + auto asinShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + GetAsinMaxMinTmpSize(asinShape, 4, false, maxValue, minValue); + EXPECT_EQ(maxValue, 128 * 128 * 2 * 4); + EXPECT_EQ(minValue, 256 * 2); +} + +TEST_F(TestTiling, TestSinhTilingFloat) +{ + std::vector shapeDims = { 128, 128 }; + auto sinhShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + GetSinhMaxMinTmpSize(sinhShape, 4, true, maxValue, minValue); + EXPECT_EQ(minValue, 256 * 1); + EXPECT_EQ(maxValue, 128 * 128 * 1 * 4); + + uint32_t maxLiveNodeCnt; + uint32_t extraBuf; + GetSinhTmpBufferFactorSize(4, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 1); + EXPECT_EQ(extraBuf, 0); +} + +TEST_F(TestTiling, TestSinhTilingHalf) +{ + std::vector shapeDims = { 128, 128 }; + auto sinhShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + GetSinhMaxMinTmpSize(sinhShape, 2, true, maxValue, minValue); + EXPECT_EQ(minValue, 256 * 4); + EXPECT_EQ(maxValue, 128 * 128 * 4 * 2); + + uint32_t maxLiveNodeCnt; + uint32_t extraBuf; + GetSinhTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 4); + EXPECT_EQ(extraBuf, 0); +} + +TEST_F(TestTiling, TestRoundTiling) +{ + fe::PlatFormInfos platform_info; + auto plat = platform_ascendc::PlatformAscendC(&platform_info); + std::vector shapeDims = { 128, 128 }; + auto tanShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + uint32_t maxLiveNodeCnt = 0; + uint32_t extraBuf = 0; + platform_ascendc::SocVersion socVersion = plat.GetSocVersion(); + GetRoundMaxMinTmpSize(plat, tanShape, 4, false, maxValue, minValue); + GetRoundTmpBufferFactorSize(plat, 4, maxLiveNodeCnt, extraBuf); + GetRoundMaxMinTmpSize(plat, tanShape, 2, false, maxValue, minValue); + GetRoundTmpBufferFactorSize(plat, 2, maxLiveNodeCnt, extraBuf); +} + +TEST_F(TestTiling, TestTanTilingFloat) +{ + std::vector shapeDims = { 128, 128 }; + auto tanShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + GetTanMaxMinTmpSize(tanShape, 4, false, maxValue, minValue); + EXPECT_EQ(maxValue, 128 * 128 * 4 * 4); + uint32_t maxLiveNodeCnt; + uint32_t extraBuf; + GetTanTmpBufferFactorSize(4, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 4); + EXPECT_EQ(extraBuf, 0); +} + +TEST_F(TestTiling, TestTanTilingFloat512) +{ + std::vector shapeDims = { 512 }; + auto tanShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + GetTanMaxMinTmpSize(tanShape, 4, false, maxValue, minValue); + EXPECT_EQ(minValue, 256 * 4); + uint32_t maxLiveNodeCnt; + uint32_t extraBuf; + GetTanTmpBufferFactorSize(4, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 4); + EXPECT_EQ(extraBuf, 0); +} + +TEST_F(TestTiling, TestTanTilingHalf) +{ + std::vector shapeDims = { 128, 128 }; + auto tanShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + GetTanMaxMinTmpSize(tanShape, 2, false, maxValue, minValue); + EXPECT_EQ(maxValue, 128 * 128 * 10 * 2); + EXPECT_EQ(minValue, 256 * 10); + uint32_t maxLiveNodeCnt; + uint32_t extraBuf; + GetTanTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 10); + EXPECT_EQ(extraBuf, 0); +} + TEST_F(TestTiling, TEstSwiGLUTilingHalf) { std::vector shapeDims = {10, 512}; @@ -993,72 +1293,447 @@ TEST_F(TestTiling, TestSwiGLUFactorHalf) EXPECT_EQ(extraBuf, 0); } -TEST_F(TestTiling, TestSigmoidTiling) +TEST_F(TestTiling, TestFmodTilingFloat) { - std::vector shapeDims = { 128 }; - auto sigmoidShape = ge::Shape(shapeDims); - uint32_t maxVal; - uint32_t minVal; - GetSigmoidMaxMinTmpSize(sigmoidShape, 4, false, maxVal, minVal); - EXPECT_EQ(maxVal, 128 * 4); - EXPECT_EQ(minVal, 256); + std::vector shapeDims = { 128, 128 }; + auto fmodShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + GetFmodMaxMinTmpSize(fmodShape, 4, false, maxValue, minValue); + EXPECT_EQ(minValue, 256); + EXPECT_EQ(maxValue, 128 * 128 * 1 * 4); } -TEST_F(TestTiling, TestLayernormTiling) +TEST_F(TestTiling, TestFmodTilingHalf) { - const uint32_t stackBufferSize = 100 * 1024; - const uint32_t typeSize = 4; + std::vector shapeDims = { 128, 128 }; + auto fmodShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + GetFmodMaxMinTmpSize(fmodShape, 2, false, maxValue, minValue); + EXPECT_EQ(minValue, 128 * 128 * 3 * 4); + EXPECT_EQ(maxValue, 128 * 128 * 3 * 4); +} - std::vector shapeDims = { 128, 128, 128, 128, 128, 128 }; - auto layernormShape = ge::Shape(shapeDims); - const bool isReuseSource = false; - optiling::LayerNormTiling tilling; +TEST_F(TestTiling, TestTruncTilingFloat) +{ + std::vector shapeDims = { 128, 128 }; + auto truncShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + GetTruncMaxMinTmpSize(truncShape, 4, false, maxValue, minValue); + EXPECT_EQ(minValue, 256 * 1); + EXPECT_EQ(maxValue, 128 * 128 * 1 * 4); + uint32_t maxLiveNodeCnt; + uint32_t extraBuf; + GetTruncTmpBufferFactorSize(4, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 1); + EXPECT_EQ(extraBuf, 0); +} + +TEST_F(TestTiling, TestTruncTilingHalf) +{ + std::vector shapeDims = { 128, 128 }; + auto truncShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; uint32_t minValue = 0; + GetTruncMaxMinTmpSize(truncShape, 2, false, maxValue, minValue); + EXPECT_EQ(minValue, 256 * 2); + EXPECT_EQ(maxValue, 128 * 128 * 2 * 2); + + uint32_t maxLiveNodeCnt; + uint32_t extraBuf; + GetTruncTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 2); + EXPECT_EQ(extraBuf, 0); +} + +TEST_F(TestTiling, TestTruncTilingHalf512) +{ + std::vector shapeDims = { 512 }; + auto truncShape = ge::Shape(shapeDims); uint32_t maxValue = 0; + uint32_t minValue = 0; + GetTruncMaxMinTmpSize(truncShape, 2, false, maxValue, minValue); + EXPECT_EQ(maxValue, 512 * 2 * 2); + EXPECT_EQ(minValue, 256 * 2); +} - AscendC::GetLayerNormMaxMinTmpSize(layernormShape, typeSize, isReuseSource, maxValue, minValue); - EXPECT_EQ(maxValue, 3 * (128 * 128 * 128) * typeSize + 2 * (128 * 128) * typeSize); - EXPECT_EQ(minValue, 3 * 128 * typeSize + 2 * (128 * 128) * typeSize); +TEST_F(TestTiling, TestAcosTmpBufferFacotrHalfWithoutBasicBlock) { + uint32_t maxLivedNodes = 0xffff; + uint32_t extraBuffer = 0xffff; + GetAcosTmpBufferFactorSize(2, maxLivedNodes, extraBuffer); + EXPECT_EQ(maxLivedNodes, 6); + EXPECT_EQ(extraBuffer, 0); +} - AscendC::GetLayerNormNDTillingInfo(layernormShape, stackBufferSize, typeSize, isReuseSource, tilling); - EXPECT_EQ(tilling.get_tmpBufSize(), stackBufferSize / sizeof(float)); + +TEST_F(TestTiling, TestAcosTmpBufferFacotrFloatWithoutBasicBlock) { + uint32_t maxLivedNodes = 0xffff; + uint32_t extraBuffer = 0xffff; + GetAcosTmpBufferFactorSize(4, maxLivedNodes, extraBuffer); + EXPECT_EQ(maxLivedNodes, 2); + EXPECT_EQ(extraBuffer, 0); } -TEST_F(TestTiling, TestRmsnormTiling) +TEST_F(TestTiling, TestAcosTilingHalf128) { - constexpr uint32_t bLength = 4; - constexpr uint32_t sLength = 32; - constexpr uint32_t hLength = 16; - constexpr uint32_t bsLength = bLength * sLength; - constexpr uint32_t bshLength = bLength * sLength * hLength; - std::vector shapeDims = {bLength, sLength, hLength}; - auto shape = ge::Shape(shapeDims); - constexpr uint32_t typeSize = 4; - constexpr uint32_t ONE_BLK_FLOAT = 8; + std::vector shapeDims = { 128 }; + auto acosShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + GetAcosMaxMinTmpSize(acosShape, 2, false, maxValue, minValue); + EXPECT_EQ(minValue, 256 * 6); + EXPECT_EQ(maxValue, 256 * 6); +} +TEST_F(TestTiling, TestAcosTilingFloat) +{ + std::vector shapeDims = { 32 }; + auto acosShape = ge::Shape(shapeDims); uint32_t maxValue = 0; uint32_t minValue = 0; - // common scene - bool res = AscendC::GetRmsNormMaxMinTmpSize(shape, typeSize, maxValue, minValue); - const uint32_t goldenMax = (bshLength + bsLength) * typeSize; - uint32_t goldenMin = (hLength + ONE_BLK_FLOAT) * typeSize; - EXPECT_EQ(res, true); - EXPECT_EQ(maxValue, goldenMax); - EXPECT_EQ(minValue, goldenMin); + GetAcosMaxMinTmpSize(acosShape, 4, false, maxValue, minValue); + EXPECT_EQ(minValue, 256 * 2); + EXPECT_EQ(maxValue, 256 * 2); +} - // basic block scene 1: input shape is illegal, fail to get minSize - res = AscendC::GetRmsNormMaxMinTmpSize(shape, typeSize, maxValue, minValue, true); - EXPECT_EQ(res, false); +TEST_F(TestTiling, TestTanhTiling) +{ + uint32_t maxVal = 0; + uint32_t minVal = 0; + GetTanhMaxMinTmpSize(ge::Shape({128}), 4, false, maxVal, minVal); + EXPECT_EQ(maxVal, 128 * 4 * 1); + EXPECT_EQ(minVal, 256 * 1); + GetTanhMaxMinTmpSize(ge::Shape({32}), 2, false, maxVal, minVal); + EXPECT_EQ(maxVal, 256 * 4); + EXPECT_EQ(minVal, 256 * 4); + uint32_t extraBuf = 123; + uint32_t maxLivedNodesCnt = 123; + GetTanhTmpBufferFactorSize(4, maxLivedNodesCnt, extraBuf); + EXPECT_EQ(extraBuf, 0); + EXPECT_EQ(maxLivedNodesCnt, 1); + GetTanhTmpBufferFactorSize(2, maxLivedNodesCnt, extraBuf); + EXPECT_EQ(extraBuf, 0); + EXPECT_EQ(maxLivedNodesCnt, 4); +} - constexpr uint32_t BASIC_BLK_HLENGTH = 64; - constexpr uint32_t BASIC_BLK_BSLENGTH = 8; - shapeDims[2] = BASIC_BLK_HLENGTH; - auto shape_basic_blk = ge::Shape(shapeDims);// 4,32,64 - // basic block scene 2: get minSize successfully - res = AscendC::GetRmsNormMaxMinTmpSize(shape_basic_blk, typeSize, maxValue, minValue, true); - goldenMin = (64 + 8) * typeSize; - EXPECT_EQ(res, true); +TEST_F(TestTiling, TestSigmoidTiling) +{ + std::vector shapeDims = { 128 }; + auto sigmoidShape = ge::Shape(shapeDims); + uint32_t maxVal; + uint32_t minVal; + GetSigmoidMaxMinTmpSize(sigmoidShape, 4, false, maxVal, minVal); + EXPECT_EQ(maxVal, 128 * 4); + EXPECT_EQ(minVal, 256); +} + +TEST_F(TestTiling, TestLogTilingMaxMin) +{ + std::vector shapeDims = { 128 }; + auto logShape = ge::Shape(shapeDims); + uint32_t maxVal; + uint32_t minVal; + GetLogMaxMinTmpSize(logShape, 4, false, maxVal, minVal); + EXPECT_EQ(maxVal, 0); + EXPECT_EQ(minVal, 0); + GetLog2MaxMinTmpSize(logShape, 4, false, maxVal, minVal); + EXPECT_EQ(maxVal, 0); + EXPECT_EQ(minVal, 0); + GetLog2MaxMinTmpSize(logShape, 2, false, maxVal, minVal); + EXPECT_EQ(maxVal, 4 * 128); + EXPECT_EQ(minVal, 256); + GetLog10MaxMinTmpSize(logShape, 4, false, maxVal, minVal); + EXPECT_EQ(maxVal, 0); + EXPECT_EQ(minVal, 0); +} + +TEST_F(TestTiling, TestLogTilingFactor) +{ + uint32_t maxLiveNodeCnt; + uint32_t extraBuf; + GetLogTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 0); + EXPECT_EQ(extraBuf, 0); + GetLog10TmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 0); + EXPECT_EQ(extraBuf, 0); + GetLog2TmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 2); + EXPECT_EQ(extraBuf, 0); + GetLog2TmpBufferFactorSize(4, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 0); + EXPECT_EQ(extraBuf, 0); +} + +TEST_F(TestTiling, TestAcosTilingHalf16K) +{ + std::vector shapeDims = { 128, 128 }; + auto acosShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + GetAcosMaxMinTmpSize(acosShape, 2, false, maxValue, minValue); + EXPECT_EQ(maxValue, 128 * 128 * 6 * 2); + EXPECT_EQ(minValue, 256 * 6); +} + +TEST_F(TestTiling, TestAcosTilingFloat16K) +{ + std::vector shapeDims = { 128, 128 }; + auto acosShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + GetAcosMaxMinTmpSize(acosShape, 4, false, maxValue, minValue); + EXPECT_EQ(maxValue, 128 * 128 * 2 * 4); + EXPECT_EQ(minValue, 256 * 2); +} + +TEST_F(TestTiling, TestAsinhTilingFloat) +{ + std::vector shapeDims = { 128, 128 }; + auto asinhShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + AscendC::GetAsinhMaxMinTmpSize(asinhShape, 4, true, maxValue, minValue); + EXPECT_EQ(minValue, 256 * 3); + EXPECT_EQ(maxValue, 128 * 128 * 3 * 4); + + AscendC::GetAsinhMaxMinTmpSize(ge::Shape({32}), 4, true, maxValue, minValue); + EXPECT_EQ(minValue, 256 * 3); + EXPECT_EQ(maxValue, 256 * 3); + + uint32_t maxLiveNodeCnt; + uint32_t extraBuf; + AscendC::GetAsinhTmpBufferFactorSize(4, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 3); + EXPECT_EQ(extraBuf, 0); +} + +TEST_F(TestTiling, TestAsinhTilingHalf) +{ + std::vector shapeDims = { 128, 128 }; + auto asinhShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + AscendC::GetAsinhMaxMinTmpSize(asinhShape, 2, true, maxValue, minValue); + EXPECT_EQ(minValue, 256 * 3); + EXPECT_EQ(maxValue, 128 * 128 * 3 * 2); + + uint32_t maxLiveNodeCnt; + uint32_t extraBuf; + AscendC::GetAsinhTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 3); + EXPECT_EQ(extraBuf, 0); +} + +TEST_F(TestTiling, TestAcoshTilingHalf) +{ + std::vector shapeDims = { 128, 128 }; + auto acoshShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + AscendC::GetAcoshMaxMinTmpSize(acoshShape, 2, true, maxValue, minValue); + EXPECT_EQ(minValue, 256 * 2); + EXPECT_EQ(maxValue, 128 * 128 * 2 * 2); + + uint32_t maxLiveNodeCnt; + uint32_t extraBuf; + GetAcoshTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 2); + EXPECT_EQ(extraBuf, 0); +} + +TEST_F(TestTiling, TestAcoshTilingFloat) +{ + std::vector shapeDims = { 128, 128 }; + auto acoshShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + AscendC::GetAcoshMaxMinTmpSize(acoshShape, 4, true, maxValue, minValue); + EXPECT_EQ(minValue, 256 * 1); + EXPECT_EQ(maxValue, 128 * 128 * 1 * 4); + + uint32_t maxLiveNodeCnt; + uint32_t extraBuf; + AscendC::GetAcoshTmpBufferFactorSize(4, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 1); + EXPECT_EQ(extraBuf, 0); +} + +TEST_F(TestTiling, TestSelectWithBytesMaskTilingSameAxis) +{ + const auto shape = ge::Shape({ 8, 128 }); + const auto scalarShape = ge::Shape({1}); + uint32_t maxValue; + uint32_t minValue; + GetSelectWithBytesMaskMaxMinTmpSize(shape, scalarShape, 2, shape, 1, true, maxValue, minValue); + EXPECT_EQ(minValue, 128 * 8 * 2 + 512); + EXPECT_EQ(maxValue, 128 * 8 * 2 + 512); + GetSelectWithBytesMaskMaxMinTmpSize(scalarShape, shape, 2, shape, 1, false, maxValue, minValue); + EXPECT_EQ(minValue, 128 * 8 * 2 + 512); + EXPECT_EQ(maxValue, 128 * 8 * 2 + 512); +} + +TEST_F(TestTiling, TestSelectWithBytesMaskTilingSameAxisLargeShape) +{ + const auto shape = ge::Shape({ 128, 128 }); + const auto scalarShape = ge::Shape({1}); + uint32_t maxValue; + uint32_t minValue; + GetSelectWithBytesMaskMaxMinTmpSize(shape, scalarShape, 2, shape, 1, true, maxValue, minValue); + EXPECT_EQ(minValue, 4096 * 2 + 512); + EXPECT_EQ(maxValue, 128 * 128 * 2 + 512); + GetSelectWithBytesMaskMaxMinTmpSize(scalarShape, shape, 2, shape, 1, false, maxValue, minValue); + EXPECT_EQ(minValue, 4096 * 2 + 512); + EXPECT_EQ(maxValue, 128 * 128 * 2 + 512); +} + +TEST_F(TestTiling, TestSelectWithBytesMaskTilingSameAxisSmallShape) +{ + const auto shape = ge::Shape({ 1, 16 }); + const auto scalarShape = ge::Shape({1}); + uint32_t maxValue; + uint32_t minValue; + GetSelectWithBytesMaskMaxMinTmpSize(shape, scalarShape, 2, shape, 1, true, maxValue, minValue); + EXPECT_EQ(minValue, 1024); + EXPECT_EQ(maxValue, 1024); + GetSelectWithBytesMaskMaxMinTmpSize(scalarShape, shape, 2, shape, 1, false, maxValue, minValue); + EXPECT_EQ(minValue, 1024); + EXPECT_EQ(maxValue, 1024); +} + +TEST_F(TestTiling, TestSelectWithBytesMaskTilingDiffAxis) +{ + const auto srcShape = ge::Shape({ 8, 128 }); + const auto scalarShape = ge::Shape({1}); + const auto maskShape = ge::Shape({ 8, 160 }); + uint32_t maxValue; + uint32_t minValue; + GetSelectWithBytesMaskMaxMinTmpSize(srcShape, scalarShape, 2, maskShape, 1, true, maxValue, minValue); + EXPECT_EQ(minValue, 128 * 8 * 2 + 512); + EXPECT_EQ(maxValue, 128 * 8 * 2 + 512); + GetSelectWithBytesMaskMaxMinTmpSize(srcShape, scalarShape, 2, maskShape, 1, false, maxValue, minValue); + EXPECT_EQ(minValue, 128 * 8 * 2 + 512 + 8 * 128); + EXPECT_EQ(maxValue, 128 * 8 * 2 + 512 + 8 * 128); + GetSelectWithBytesMaskMaxMinTmpSize(scalarShape, srcShape, 2, maskShape, 1, true, maxValue, minValue); + EXPECT_EQ(minValue, 128 * 8 * 2 + 512); + EXPECT_EQ(maxValue, 128 * 8 * 2 + 512); +} + +TEST_F(TestTiling, TestSelectWithBytesMaskTilingDiffAxisLargeShape) +{ + const auto srcShape = ge::Shape({ 128, 128 }); + const auto scalarShape = ge::Shape({1}); + const auto maskShape = ge::Shape({ 128, 160 }); + uint32_t maxValue; + uint32_t minValue; + GetSelectWithBytesMaskMaxMinTmpSize(srcShape, scalarShape, 2, maskShape, 1, true, maxValue, minValue); + EXPECT_EQ(minValue, 4096 * 2 + 512); + EXPECT_EQ(maxValue, 128 * 128 * 2 + 512); + GetSelectWithBytesMaskMaxMinTmpSize(srcShape, scalarShape, 2, maskShape, 1, false, maxValue, minValue); + EXPECT_EQ(minValue, 4096 * 2 + 512 + 128 * 128); + EXPECT_EQ(maxValue, 128 * 128 * 2 + 512 + 128 * 128); + GetSelectWithBytesMaskMaxMinTmpSize(scalarShape, srcShape, 2, maskShape, 1, true, maxValue, minValue); + EXPECT_EQ(minValue, 4096 * 2 + 512); + EXPECT_EQ(maxValue, 128 * 128 * 2 + 512); +} + +TEST_F(TestTiling, TestSelectWithBytesMaskTilingDiffAxisSmallShape) +{ + const auto srcShape = ge::Shape({ 1, 16 }); + const auto scalarShape = ge::Shape({1}); + const auto maskShape = ge::Shape({ 1, 32 }); + uint32_t maxValue; + uint32_t minValue; + GetSelectWithBytesMaskMaxMinTmpSize(srcShape, scalarShape, 2, maskShape, 1, true, maxValue, minValue); + EXPECT_EQ(minValue, 1024); + EXPECT_EQ(maxValue, 1024); + GetSelectWithBytesMaskMaxMinTmpSize(srcShape, scalarShape, 2, maskShape, 1, false, maxValue, minValue); + EXPECT_EQ(minValue, 1024 + 32); + EXPECT_EQ(maxValue, 1024 + 32); + GetSelectWithBytesMaskMaxMinTmpSize(scalarShape, srcShape, 2, maskShape, 1, true, maxValue, minValue); + EXPECT_EQ(minValue, 1024); + EXPECT_EQ(maxValue, 1024); +} + +TEST_F(TestTiling, TestLayernormTiling) +{ + const uint32_t stackBufferSize = 100 * 1024; + const uint32_t typeSize = 4; + + std::vector shapeDims = { 128, 128, 128, 128, 128, 128 }; + auto layernormShape = ge::Shape(shapeDims); + const bool isReuseSource = false; + optiling::LayerNormTiling tilling; + + uint32_t minValue = 0; + uint32_t maxValue = 0; + + AscendC::GetLayerNormMaxMinTmpSize(layernormShape, typeSize, isReuseSource, maxValue, minValue); + EXPECT_EQ(maxValue, 3 * (128 * 128 * 128) * typeSize + 2 * (128 * 128) * typeSize); + EXPECT_EQ(minValue, 3 * 128 * typeSize + 2 * (128 * 128) * typeSize); + + AscendC::GetLayerNormNDTillingInfo(layernormShape, stackBufferSize, typeSize, isReuseSource, tilling); + EXPECT_EQ(tilling.get_tmpBufSize(), stackBufferSize / sizeof(float)); +} + +TEST_F(TestTiling, TestGroupnormTiling) +{ + const uint32_t stackBufferSize = 100 * 1024; + const uint32_t typeSize = 4; + const uint32_t groupNum = 4; + + std::vector shapeDims = { 16, 16, 8, 8}; + auto groupnormShape = ge::Shape(shapeDims); + const bool isReuseSource = false; + optiling::GroupNormTiling tilling; + + uint32_t minValue = 0; + uint32_t maxValue = 0; + + AscendC::GetGroupNormMaxMinTmpSize(groupnormShape, typeSize, isReuseSource, groupNum, maxValue, minValue); + EXPECT_EQ(maxValue, 3 * (16 * 16 * 8 * 8) * typeSize + 2 * groupNum * 16 * typeSize); + EXPECT_EQ(minValue, 3 * (16 / 4 * 8 * 8) * typeSize + 2 * groupNum * 16 * typeSize); + + AscendC::GetGroupNormNDTilingInfo(groupnormShape, stackBufferSize, typeSize, isReuseSource, groupNum, tilling); + EXPECT_EQ(tilling.get_tmpBufSize(), stackBufferSize / sizeof(float)); +} +TEST_F(TestTiling, TestRmsnormTiling) +{ + constexpr uint32_t bLength = 4; + constexpr uint32_t sLength = 32; + constexpr uint32_t hLength = 16; + constexpr uint32_t bsLength = bLength * sLength; + constexpr uint32_t bshLength = bLength * sLength * hLength; + std::vector shapeDims = {bLength, sLength, hLength}; + auto shape = ge::Shape(shapeDims); + constexpr uint32_t typeSize = 4; + constexpr uint32_t ONE_BLK_FLOAT = 8; + + uint32_t maxValue = 0; + uint32_t minValue = 0; + // common scene + bool res = AscendC::GetRmsNormMaxMinTmpSize(shape, typeSize, maxValue, minValue); + const uint32_t goldenMax = (bshLength + bsLength) * typeSize; + uint32_t goldenMin = (hLength + ONE_BLK_FLOAT) * typeSize; + EXPECT_EQ(res, true); + EXPECT_EQ(maxValue, goldenMax); + EXPECT_EQ(minValue, goldenMin); + + // basic block scene 1: input shape is illegal, fail to get minSize + res = AscendC::GetRmsNormMaxMinTmpSize(shape, typeSize, maxValue, minValue, true); + EXPECT_EQ(res, false); + + constexpr uint32_t BASIC_BLK_HLENGTH = 64; + constexpr uint32_t BASIC_BLK_BSLENGTH = 8; + shapeDims[2] = BASIC_BLK_HLENGTH; + auto shape_basic_blk = ge::Shape(shapeDims);// 4,32,64 + // basic block scene 2: get minSize successfully + res = AscendC::GetRmsNormMaxMinTmpSize(shape_basic_blk, typeSize, maxValue, minValue, true); + goldenMin = (64 + 8) * typeSize; + EXPECT_EQ(res, true); EXPECT_EQ(minValue, goldenMin); // basic block scene: get basic block using minTmpSize @@ -1098,8 +1773,8 @@ TEST_F(TestTiling, TestRmsnormTiling) auto shape2 = ge::Shape({1,8,128}); res = AscendC::GetRmsNormTilingInfo(shape2, shape2, stackBufferSize, typeSize, tiling, true); EXPECT_EQ(res, true); - EXPECT_EQ(tiling.get_mainBshLength(), 128); - EXPECT_EQ(tiling.get_mainBsLength(), 1); + EXPECT_EQ(tiling.get_mainBshLength(), 896); + EXPECT_EQ(tiling.get_mainBsLength(), 7); stackBufferSize = (8*128 + 8)*4; // shape: 1,8,128 res = AscendC::GetRmsNormTilingInfo(shape2, shape2, stackBufferSize, typeSize, tiling, true); @@ -1127,12 +1802,30 @@ TEST_F(TestTiling, TestRmsnormTiling) res = AscendC::GetRmsNormTilingInfo(shape2, shape3, stackBufferSize, typeSize, tiling); EXPECT_EQ(res, false); - // abnormal case: basic block doesnot support h >= 2048 stackBufferSize = 16*2048*4; auto shape4 = ge::Shape({1,8,2048}); res = AscendC::GetRmsNormTilingInfo(shape4, shape4, stackBufferSize, typeSize, tiling, true); EXPECT_EQ(res, false); + + stackBufferSize = 2048; + shape4 = ge::Shape({14,1,56}); + res = AscendC::GetRmsNormTilingInfo(shape4, shape4, stackBufferSize, typeSize, tiling); + EXPECT_EQ(res, true); + EXPECT_EQ(tiling.get_mainBshLength(), 448); + EXPECT_EQ(tiling.get_mainBsLength(), 8); + EXPECT_EQ(tiling.get_tailBshLength(), 336); + EXPECT_EQ(tiling.get_tailBsLength(), 6); + EXPECT_EQ(tiling.get_loopRound(), 1); + + stackBufferSize = 2080; + res = AscendC::GetRmsNormTilingInfo(shape4, shape4, stackBufferSize, typeSize, tiling); + EXPECT_EQ(res, true); + EXPECT_EQ(tiling.get_mainBshLength(),504); + EXPECT_EQ(tiling.get_mainBsLength(), 9); + EXPECT_EQ(tiling.get_tailBshLength(), 280); + EXPECT_EQ(tiling.get_tailBsLength(), 5); + EXPECT_EQ(tiling.get_loopRound(), 1); } TEST_F(TestTiling, TestBatchnormTiling) @@ -1305,6 +1998,39 @@ TEST_F(TestTiling, TestDeepnormTiling) EXPECT_EQ(tiling.get_oneTmpSize(), 512); } +TEST_F(TestTiling, TestExpTiling) +{ + std::vector shapeDims = {128, 128}; + auto expShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + + // float isReuseSrc = false 3 tmpBuffer + AscendC::GetExpMaxMinTmpSize(expShape, 4, false, maxValue, minValue); + EXPECT_EQ(minValue, 3 * 256); + EXPECT_EQ(maxValue, 3 * 128 * 128 * 4); + // float isReuseSrc = true 2 tmpBuffer + AscendC::GetExpMaxMinTmpSize(expShape, 4, true, maxValue, minValue); + EXPECT_EQ(minValue, 2 * 256); + EXPECT_EQ(maxValue, 2 * 128 * 128 * 4); + // half 4 tmpBuffer + AscendC::GetExpMaxMinTmpSize(expShape, 2, false, maxValue, minValue); + EXPECT_EQ(minValue, 4 * 256); + EXPECT_EQ(maxValue, 4 * 128 * 128 * 4); + AscendC::GetExpMaxMinTmpSize(expShape, 2, true, maxValue, minValue); + EXPECT_EQ(minValue, 4 * 256); + EXPECT_EQ(maxValue, 4 * 128 * 128 * 4); + + uint32_t maxLiveNodeCnt; + uint32_t extraBuf; + AscendC::GetExpTmpBufferFactorSize(4, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 3); + EXPECT_EQ(extraBuf, 0); + AscendC::GetExpTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 8); + EXPECT_EQ(extraBuf, 0); +} + TEST_F(TestTiling, TestMatmulApiTilngFactorSplit1) { MatmulApiTiling tiling; @@ -1646,6 +2372,7 @@ TEST_F(TestTiling, TestMatmulApiTilngMultiCoreBTSCM4) EXPECT_EQ(res, 0); } + TEST_F(TestTiling, TestMatmulApiTilngSingleCoreFullLoadCase) { optiling::TCubeTiling tilingData; @@ -1687,6 +2414,59 @@ TEST_F(TestTiling, TestMatmulApiTilngMultiCoreBTSCM5) EXPECT_EQ(res, 0); } +TEST_F(TestTiling, TestConcatTiling) +{ + fe::PlatFormInfos platform_info; + auto plat = platform_ascendc::PlatformAscendC(&platform_info); + const uint32_t elemCount = 128; + + AscendC::GetConcatTmpSize(plat, elemCount, 2); +} + +TEST_F(TestTiling, TestSortTiling) +{ + fe::PlatFormInfos platform_info; + auto plat = platform_ascendc::PlatformAscendC(&platform_info); + const uint32_t elemCount = 128; + + AscendC::GetSortTmpSize(plat, elemCount, 4); +} + +TEST_F(TestTiling, TestUnPadTiling) +{ + const uint32_t stackBufferSize = 100 * 1024; + const uint32_t typeSize = 4; + + std::vector shapeDims = { 32, 32 }; + auto srcShape = ge::Shape(shapeDims); + optiling::UnPadTiling tiling; + + AscendC::UnPadTilingFunc(srcShape, 0, typeSize, tiling); + AscendC::UnPadTilingFunc(srcShape, stackBufferSize, typeSize, tiling); + fe::PlatFormInfos platform_info; + auto plat = platform_ascendc::PlatformAscendC(&platform_info); + uint32_t maxValue = 0; + uint32_t minValue = 0; + AscendC::GetUnPadMaxMinTmpSize(plat, srcShape, typeSize, maxValue, minValue); +} + +TEST_F(TestTiling, TestPadTiling) +{ + const uint32_t stackBufferSize = 100 * 1024; + const uint32_t typeSize = 4; + + std::vector shapeDims = { 32, 32}; + std::vector ori_shape_dims = { 32, 31 }; + auto srcShape = ge::Shape(shapeDims); + auto oriSrcShape = ge::Shape(ori_shape_dims); + optiling::PadTiling tiling; + + AscendC::PadTilingFunc(srcShape, oriSrcShape, stackBufferSize, typeSize, tiling); + uint32_t maxValue = 0; + uint32_t minValue = 0; + AscendC::GetPadMaxMinTmpSize(srcShape, typeSize, maxValue, minValue); +} + TEST_F(TestTiling, TestLayernormGradTiling) { const uint32_t stackBufferSize = 100 * 1024; @@ -1738,6 +2518,23 @@ TEST_F(TestTiling, TestLayernormGradBetaTiling) EXPECT_EQ(tiling.get_stackBufferSize(), stackBufferSize / sizeof(float)); } +TEST_F(TestTiling, TestConfusionTransposeTiling) +{ + const uint32_t stackBufferSize = 100 * 1024; + const uint32_t typeSize = 2; + + std::vector shapeDims = { 1, 2, 64, 32 }; + auto srcShape = ge::Shape(shapeDims); + optiling::ConfusionTransposeTiling tiling; + AscendC::GetConfusionTransposeTilingInfo(srcShape, stackBufferSize, typeSize, 1, tiling); + AscendC::GetConfusionTransposeTilingInfo(srcShape, stackBufferSize, typeSize, 2, tiling); + AscendC::GetConfusionTransposeTilingInfo(srcShape, stackBufferSize, typeSize, 3, tiling); + AscendC::GetConfusionTransposeTilingInfo(srcShape, stackBufferSize, typeSize, 4, tiling); + AscendC::GetConfusionTransposeTilingInfo(srcShape, stackBufferSize, typeSize, 5, tiling); + AscendC::GetConfusionTransposeTilingInfo(srcShape, stackBufferSize, typeSize, 6, tiling); + AscendC::GetConfusionTransposeTilingInfo(srcShape, stackBufferSize, typeSize, 7, tiling); +} + TEST_F(TestTiling, TestMatmulApiTilngL0BNoDB) { MatmulApiTiling tiling; @@ -1864,7 +2661,7 @@ TEST_P(RnnTilingbTestSuite, TestMatmulApiTilngRnnRealCase) matmul_tiling::DataType ::DT_FLOAT); rnnMatmul2.SetBiasType(matmul_tiling::TPosition::VECCALC, matmul_tiling::CubeFormat::ND, (matmul_tiling::DataType)dataType); - // full load + // full loaded auto ret = rnnMatmul.SetBias(true); ret = rnnMatmul.SetDim(rnnParams.sysAivCoreNum / 4); int32_t input_align = MathUtil::CeilDivision(rnnParams.inputSize, 16) * 16; @@ -1922,7 +2719,7 @@ TEST_P(RnnTilingbTestSuite, TestMatmulApiTilngRnnRealCase) sizeof(float); EXPECT_LT(l1UsedSize, 512 * 1024 - 64); rnnParams.usedCoreNum = dim * 4; - } else { // part of full load + } else { // part of full loaded // two matmul time sharing auto ret = rnnMatmul.SetBias(true); ret = rnnMatmul.SetDim(rnnParams.sysAivCoreNum / 4); @@ -2018,7 +2815,7 @@ TEST_P(RnnTilingbTestSuite, TestMatmulApiTilngRnnRealCase) rnnParams.baseN = rnnMatmul.GetBaseN(); rnnParams.baseK = rnnMatmul.GetBaseK(); // get output info ret = rnnMatmul.GetSingleShape(rnnParams.singleM, rnnParams.singleN, - rnnParams.singleK); // get single core data + rnnParams.singleK); // get single core data ret = rnnMatmul.GetCoreNum(dim, mDim, nDim); // get used blockdim after multi-cores cut, carried by user to kernel, contrl Kernel business // input mm @@ -2051,7 +2848,6 @@ TEST_P(RnnTilingbTestSuite, TestMatmulApiTilngRnnRealCase) tilingData.hiddenMMParam.get_depthB1()) * sizeof(float); EXPECT_LT(l1UsedSize, 512 * 1024 - 64); - // mm basic property rnnParams.usedCoreNum = dim; } } @@ -2074,6 +2870,129 @@ TEST_F(TestTiling, TestMatmulApiTilngSetShapeZero) EXPECT_EQ(ret, -1); } +// #if __CCE_AICORE__ == 200 +// TEST_F(TestTiling, TestPlatformAscendC) +// { +// fe::PlatFormInfos platform_info; +// auto plat = platform_ascendc::PlatformAscendC(&platform_info); +// EXPECT_EQ(plat.GetCoreNumVector(), 8); +// EXPECT_EQ(plat.GetCoreNumVector() + plat.GetCoreNumAic() , 18); +// } +// #endif + +// #if __CCE_AICORE__ == 220 +// extern void platfrom_stub_set_num_aic(const char *num); +// extern void platfrom_stub_set_num_aiv(const char *num); +// extern void platfrom_stub_set_num_cub(const char *num); +// extern void platfrom_stub_set_ctl(const char *num); +// extern void platfrom_stub_set_chip_version(const char *num); +// extern void platfrom_stub_set_num(uint32_t num); +// TEST_F(TestTiling, TestPlatformAscendC) +// { +// fe::PlatFormInfos platform_info; +// auto plat = platform_ascendc::PlatformAscendC(&platform_info); +// uint64_t ub_size, l1_size, l0; +// uint64_t l2_bw, hbm_bw, bw; +// plat.GetCoreMemSize(platform_ascendc::CoreMemType::UB, ub_size); +// plat.GetCoreMemSize(platform_ascendc::CoreMemType::L1, l1_size); +// EXPECT_EQ(ub_size, 196352); +// EXPECT_EQ(l1_size, 524032); +// plat.GetCoreMemSize(platform_ascendc::CoreMemType::L0_A, l0); +// EXPECT_EQ(l0, 65536); +// plat.GetCoreMemSize(platform_ascendc::CoreMemType::L0_B, l0); +// EXPECT_EQ(l0, 65536); +// plat.GetCoreMemSize(platform_ascendc::CoreMemType::L0_C, l0); +// EXPECT_EQ(l0, 65536 * 2); +// plat.GetCoreMemBw(platform_ascendc::CoreMemType::L2, l2_bw); +// plat.GetCoreMemBw(platform_ascendc::CoreMemType::HBM, hbm_bw); +// EXPECT_EQ(l2_bw, 110); +// EXPECT_EQ(hbm_bw, 32); +// plat.GetCoreMemBw(platform_ascendc::CoreMemType::UB, bw); +// EXPECT_EQ(plat.GetCoreNum(), 48); +// EXPECT_EQ(plat.GetCoreNumAic(), 24); +// EXPECT_EQ(plat.GetCoreNumAiv(), 48); +// platfrom_stub_set_num_cub("20"); +// EXPECT_EQ(plat.GetCoreNumAic(), 20); +// platfrom_stub_set_num_aiv("40"); +// EXPECT_EQ(plat.GetCoreNumAiv(), 40); +// platfrom_stub_set_ctl("AICore"); +// EXPECT_EQ(plat.GetCoreNumAic(), 24); +// EXPECT_EQ(plat.GetCoreNumAiv(), 24); +// platfrom_stub_set_num_aic("20"); +// EXPECT_EQ(plat.GetCoreNumAic(), 20); +// EXPECT_EQ(plat.GetCoreNumAiv(), 20); +// EXPECT_EQ(bw, 0); +// EXPECT_EQ(plat.CalcTschBlockDim(1, 0, 1), 1); +// EXPECT_EQ(plat.CalcTschBlockDim(1, 1, 0), 1); +// EXPECT_EQ(plat.CalcTschBlockDim(2, 1, 1), 2); +// EXPECT_EQ(plat.CalcTschBlockDim(2, 2, 1), 2); +// EXPECT_EQ(plat.CalcTschBlockDim(2, 1, 2), 1); +// EXPECT_EQ(plat.CalcTschBlockDim(3, 1, 2), 2); +// EXPECT_EQ(plat.CalcTschBlockDim(6, 1, 3), 2); + +// EXPECT_EQ(plat.GetLibApiWorkSpaceSize(), 16 * 1024 * 1024); +// platfrom_stub_set_chip_version("Ascend910"); +// EXPECT_EQ(plat.GetLibApiWorkSpaceSize(), 2 * 1024 * 1024); +// EXPECT_EQ(plat.GetSocVersion(), platform_ascendc::SocVersion::ASCEND910); +// EXPECT_EQ(plat.GetCoreNumVector(), 0); +// } +// #endif + +// #if __CCE_AICORE__ == 300 +// extern void platfrom_stub_set_num_aic(const char *num); +// extern void platfrom_stub_set_num_aiv(const char *num); +// extern void platfrom_stub_set_num_cub(const char *num); +// extern void platfrom_stub_set_ctl(const char *num); +// extern void platfrom_stub_set_chip_version(const char *num); +// extern void platfrom_stub_set_num(uint32_t num); +// TEST_F(TestTiling, TestPlatformAscendC) +// { +// fe::PlatFormInfos platform_info; +// auto plat = platform_ascendc::PlatformAscendC(&platform_info); +// uint64_t ub_size, l1_size, l0; +// uint64_t l2_bw, hbm_bw, bw; +// plat.GetCoreMemSize(platform_ascendc::CoreMemType::UB, ub_size); +// plat.GetCoreMemSize(platform_ascendc::CoreMemType::L1, l1_size); +// EXPECT_EQ(ub_size, 248 * 1024); +// EXPECT_EQ(l1_size, 1024 * 1024); +// plat.GetCoreMemSize(platform_ascendc::CoreMemType::L0_A, l0); +// EXPECT_EQ(l0, 65536); +// plat.GetCoreMemSize(platform_ascendc::CoreMemType::L0_B, l0); +// EXPECT_EQ(l0, 65536); +// plat.GetCoreMemSize(platform_ascendc::CoreMemType::L0_C, l0); +// EXPECT_EQ(l0, 65536 * 2); +// plat.GetCoreMemBw(platform_ascendc::CoreMemType::L2, l2_bw); +// plat.GetCoreMemBw(platform_ascendc::CoreMemType::HBM, hbm_bw); +// EXPECT_EQ(l2_bw, 256); +// EXPECT_EQ(hbm_bw, 17); +// plat.GetCoreMemBw(platform_ascendc::CoreMemType::UB, bw); +// EXPECT_EQ(plat.GetCoreNum(), 1); +// EXPECT_EQ(plat.GetCoreNumAic(), 1); +// EXPECT_EQ(plat.GetCoreNumAiv(), 1); +// platfrom_stub_set_num_cub("1"); +// EXPECT_EQ(plat.GetCoreNumAic(), 1); +// platfrom_stub_set_num_aiv("1"); +// EXPECT_EQ(plat.GetCoreNumAiv(), 1); +// platfrom_stub_set_ctl("AICore"); +// EXPECT_EQ(plat.GetCoreNumAic(), 1); +// EXPECT_EQ(plat.GetCoreNumAiv(), 1); +// platfrom_stub_set_num_aic("2"); +// EXPECT_EQ(plat.GetCoreNumAic(), 2); +// EXPECT_EQ(plat.GetCoreNumAiv(), 2); +// EXPECT_EQ(bw, 0); +// EXPECT_EQ(plat.CalcTschBlockDim(1, 0, 1), 1); +// EXPECT_EQ(plat.CalcTschBlockDim(1, 1, 0), 1); +// EXPECT_EQ(plat.CalcTschBlockDim(2, 1, 1), 2); +// EXPECT_EQ(plat.CalcTschBlockDim(2, 2, 1), 2); +// EXPECT_EQ(plat.CalcTschBlockDim(2, 1, 2), 1); +// EXPECT_EQ(plat.CalcTschBlockDim(3, 1, 2), 2); +// EXPECT_EQ(plat.CalcTschBlockDim(6, 1, 3), 2); + +// EXPECT_EQ(plat.GetLibApiWorkSpaceSize(), 2097152); +// EXPECT_EQ(plat.GetCoreNumVector(), 0); +// } +// #endif + TEST_F(TestTiling, TestMatmulApiTilngInt8Case1) { MatmulApiTiling tiling; @@ -2352,14 +3271,68 @@ TEST_F(TestTiling, TestMatmulApiTilngInt8Case9) EXPECT_EQ(ret, 0); } -TEST_F(TestTiling, TestMatmulApiTilngInt8Case10) +TEST_F(TestTiling, TestErfTilingFloat) { - MultiCoreMatmulTiling tiling; - tiling.SetDim(2); - tiling.SetAType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_INT8); - tiling.SetBType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_INT8); - tiling.SetCType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_INT32); - tiling.SetBiasType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_INT32); + std::vector shapeDims = { 128, 128 }; + auto erfShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + GetErfMaxMinTmpSize(erfShape, 4, false, maxValue, minValue); + EXPECT_EQ(maxValue, 128 * 128 * 3 * 4); + EXPECT_EQ(minValue, 256 * 3); +} + +TEST_F(TestTiling, TestErfTilingHalf) +{ + std::vector shapeDims = { 128, 128 }; + auto erfShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + GetErfMaxMinTmpSize(erfShape, 2, false, maxValue, minValue); + EXPECT_EQ(maxValue, 128 * 128 * 2 * 8); + EXPECT_EQ(minValue, 256 * 8); + uint32_t maxLiveNodeCnt; + uint32_t extraBuf; + GetErfTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 8); + EXPECT_EQ(extraBuf, 0); +} + +TEST_F(TestTiling, TestErfcTilingFloat) +{ + std::vector shapeDims = { 128, 128 }; + auto erfcShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + GetErfcMaxMinTmpSize(erfcShape, 4, false, maxValue, minValue); + EXPECT_EQ(maxValue, 128 * 128 * 7 * 4); + EXPECT_EQ(minValue, 256 * 7); +} + +TEST_F(TestTiling, TestErfcTilingHalf) +{ + std::vector shapeDims = { 128, 128 }; + auto erfcShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + GetErfcMaxMinTmpSize(erfcShape, 2, false, maxValue, minValue); + EXPECT_EQ(maxValue, 128 * 128 * 2 * 16); + EXPECT_EQ(minValue, 256 * 16); + uint32_t maxLiveNodeCnt; + uint32_t extraBuf; + GetErfcTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 16); + EXPECT_EQ(extraBuf, 0); +} + +TEST_F(TestTiling, TestMatmulApiTilngInt8Case10) +{ + MultiCoreMatmulTiling tiling; + tiling.SetDim(2); + tiling.SetAType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_INT8); + tiling.SetBType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_INT8); + tiling.SetCType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_INT32); + tiling.SetBiasType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_INT32); tiling.SetOrgShape(65536, 64, 32); tiling.SetShape(65536, 64, 32); tiling.SetBias(false); @@ -2430,6 +3403,95 @@ TEST_F(TestTiling, TestMatmulApiTilngInt8Case13) EXPECT_EQ(ret, 0); } +TEST_F(TestTiling, TestCoshTilingFloat) +{ + std::vector shapeDims = { 128, 128 }; + auto coshShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + GetCoshMaxMinTmpSize(coshShape, 4, false, maxValue, minValue); + EXPECT_EQ(maxValue, 128 * 128 * 2 * 4); + EXPECT_EQ(minValue, 256 * 2); +} + +TEST_F(TestTiling, TestCoshTilingFloat512) +{ + std::vector shapeDims = { 512 }; + auto coshShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + GetCoshMaxMinTmpSize(coshShape, 4, false, maxValue, minValue); + EXPECT_EQ(maxValue, 512 * 4 * 2); + EXPECT_EQ(minValue, 256 * 2); +} + +TEST_F(TestTiling, TestCoshTilingHalf) +{ + std::vector shapeDims = { 128, 128 }; + auto coshShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + GetCoshMaxMinTmpSize(coshShape, 2, false, maxValue, minValue); + EXPECT_EQ(maxValue, 128 * 128 * 2 * 6); + EXPECT_EQ(minValue, 256 * 6); + uint32_t maxLiveNodeCnt; + uint32_t extraBuf; + GetCoshTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 6); + EXPECT_EQ(extraBuf, 0); +} + +TEST_F(TestTiling, TestSinTilingFloat) +{ + std::vector shapeDims = { 128, 128 }; + auto sinShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + GetSinMaxMinTmpSize(sinShape, 4, true, maxValue, minValue); + EXPECT_EQ(minValue, 2 * 256); + EXPECT_EQ(maxValue, 128 * 128 * 2 * 4); + GetSinMaxMinTmpSize(sinShape, 4, false, maxValue, minValue); + EXPECT_EQ(minValue, 3 * 256); + EXPECT_EQ(maxValue, 128 * 128 * 3 * 4); + uint32_t maxLiveNodeCnt; + uint32_t extraBuf; + GetSinTmpBufferFactorSize(4, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 3); + EXPECT_EQ(extraBuf, 0); +} + +TEST_F(TestTiling, TestSinTilingHalf) +{ + std::vector shapeDims = { 128, 128 }; + auto sinShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + GetSinMaxMinTmpSize(sinShape, 2, false, maxValue, minValue); + EXPECT_EQ(maxValue, 128 * 128 * 8 * 2); + EXPECT_EQ(minValue, 8 * 256); + uint32_t maxLiveNodeCnt; + uint32_t extraBuf; + GetSinTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 8); + EXPECT_EQ(extraBuf, 0); +} + +TEST_F(TestTiling, TestAscendSumTiling) +{ + uint32_t n = 8; + uint32_t maxValue; + uint32_t minValue; + GetSumMaxMinTmpSize(n, 2, true, maxValue, minValue); + EXPECT_EQ(minValue, 32); + EXPECT_EQ(maxValue, 32); + + maxValue = 0; + minValue = 0; + GetSumMaxMinTmpSize(n, 4, false, maxValue, minValue); + EXPECT_EQ(minValue, 32); + EXPECT_EQ(maxValue, 32); +} + TEST_F(TestTiling, TestAscendSiluTiling) { std::vector shapeDims = { 512 }; @@ -2452,6 +3514,54 @@ TEST_F(TestTiling, TestAscendSwishTiling) EXPECT_EQ(maxValue, 0); } +TEST_F(TestTiling, TestAscendXorTiling) +{ + std::vector shapeDims = { 128, 128 }; + auto xorShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + uint32_t maxLiveNodeCnt; + uint32_t extraBuf; + GetXorMaxMinTmpSize(xorShape, 2, true, maxValue, minValue); + EXPECT_EQ(maxValue, 128 * 128 * 1 * 2); + EXPECT_EQ(minValue, 1 * 256); + GetXorTmpBufferFactorSize(4, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 1); + EXPECT_EQ(extraBuf, 0); +} + +TEST_F(TestTiling, TestFracTilingFloat) +{ + std::vector shapeDims = { 128, 128 }; + auto fracShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + GetFracMaxMinTmpSize(fracShape, 4, true, maxValue, minValue); + EXPECT_EQ(minValue, 0); + EXPECT_EQ(maxValue, 0); + uint32_t maxLiveNodeCnt; + uint32_t extraBuf; + GetFracTmpBufferFactorSize(4, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 0); + EXPECT_EQ(extraBuf, 0); +} + +TEST_F(TestTiling, TestFracTilingHalf) +{ + std::vector shapeDims = { 128, 128 }; + auto fracShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + GetFracMaxMinTmpSize(fracShape, 2, true, maxValue, minValue); + EXPECT_EQ(minValue, 1024); + EXPECT_EQ(maxValue, 131072); + uint32_t maxLiveNodeCnt; + uint32_t extraBuf; + GetFracTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 4); + EXPECT_EQ(extraBuf, 0); +} + #if __CCE_AICORE__ == 220 extern void platfrom_stub_set_chip_version(const char *num); TEST_F(TestTiling, TestTopkTiling_TopKModeNomal_isInitIndexTrue_Float_Inner64) @@ -2934,6 +4044,15 @@ TEST_F(TestTiling, TestTopkTiling_TopKModeSmall310P_HALF) } #endif +TEST_F(TestTiling, TestArithProgression) +{ + uint32_t maxValue; + uint32_t minValue; + GetArithProgressionMaxMinTmpSize(maxValue, minValue); + EXPECT_EQ(maxValue, 0); + EXPECT_EQ(minValue, 0); +} + TEST_F(TestTiling, TestGeGLUTilingFloat) { std::vector shapeDims = { 128, 128 }; @@ -2968,6 +4087,566 @@ TEST_F(TestTiling, TestGeGLUTilingHalf) EXPECT_EQ(extraBuf, 0); } +TEST_F(TestTiling, TestLgammaTilingFp32) +{ + std::vector shapeDims = { 128, 128 }; + auto shape = ge::Shape(shapeDims); + uint32_t maxSize; + uint32_t minSize; + GetLgammaMaxMinTmpSize(shape, 4, true, maxSize, minSize); + EXPECT_EQ(maxSize, 458752); + EXPECT_EQ(minSize, 1792); + + GetLgammaMaxMinTmpSize(shape, 4, false, maxSize, minSize); + EXPECT_EQ(maxSize, 524288); + EXPECT_EQ(minSize, 2048); + + shapeDims = { 8 }; + shape = ge::Shape(shapeDims); + GetLgammaMaxMinTmpSize(shape, 4, false, maxSize, minSize); + EXPECT_EQ(maxSize, 2048); + EXPECT_EQ(minSize, 2048); + + GetLgammaMaxMinTmpSize(shape, 4, true,maxSize, minSize); + EXPECT_EQ(maxSize, 1792); + EXPECT_EQ(minSize, 1792); + + uint32_t maxLiveNodeCnt = 0xffff; + uint32_t extraBuf = 0xffff; + GetLgammaTmpBufferFactorSize(4, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 8); + EXPECT_EQ(extraBuf, 0); +} + +TEST_F(TestTiling, TestLgammaTilingHalf) +{ + std::vector shapeDims = { 128, 128 }; + auto shape = ge::Shape(shapeDims); + uint32_t maxSize; + uint32_t minSize; + + GetLgammaMaxMinTmpSize(shape, 2, false, maxSize, minSize); + EXPECT_EQ(maxSize, 128 * 128 * 2 * 13 * 2); + EXPECT_EQ(minSize, 13 * 2 * 256); + + shapeDims = { 8 }; + shape = ge::Shape(shapeDims); + GetLgammaMaxMinTmpSize(shape, 2, false, maxSize, minSize); + EXPECT_EQ(maxSize, 256 * 13 * 2); + EXPECT_EQ(minSize, 256 * 13 * 2); + + uint32_t maxLiveNodeCnt = 0xffff; + uint32_t extraBuf = 0xffff; + GetLgammaTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 13); + EXPECT_EQ(extraBuf, 0); +} + +TEST_F(TestTiling, TestDigammaTilingFp32) +{ + std::vector shapeDims = { 128, 128 }; + auto shape = ge::Shape(shapeDims); + uint32_t maxSize; + uint32_t minSize; + GetDigammaMaxMinTmpSize(shape, 4, true, maxSize, minSize); + EXPECT_EQ(maxSize, 393216); + EXPECT_EQ(minSize, 1536); + + GetDigammaMaxMinTmpSize(shape, 4, false, maxSize, minSize); + EXPECT_EQ(maxSize, 458752); + EXPECT_EQ(minSize, 1792); + + shapeDims = { 8 }; + shape = ge::Shape(shapeDims); + GetDigammaMaxMinTmpSize(shape, 4, false, maxSize, minSize); + EXPECT_EQ(maxSize, 1792); + EXPECT_EQ(minSize, 1792); + + GetDigammaMaxMinTmpSize(shape, 4, true,maxSize, minSize); + EXPECT_EQ(maxSize, 1536); + EXPECT_EQ(minSize, 1536); + + uint32_t maxLiveNodeCnt = 0xffff; + uint32_t extraBuf = 0xffff; + GetDigammaTmpBufferFactorSize(4, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 7); + EXPECT_EQ(extraBuf, 0); +} + +TEST_F(TestTiling, TestDigammaTilingHalf) +{ + std::vector shapeDims = { 128, 128 }; + auto shape = ge::Shape(shapeDims); + uint32_t maxSize; + uint32_t minSize; + + GetDigammaMaxMinTmpSize(shape, 2, false, maxSize, minSize); + EXPECT_EQ(maxSize, 128 * 128 * 2 * 8 * 2); + EXPECT_EQ(minSize, 8 * 2 * 256); + + shapeDims = { 8 }; + shape = ge::Shape(shapeDims); + GetDigammaMaxMinTmpSize(shape, 2, false, maxSize, minSize); + EXPECT_EQ(maxSize, 256 * 8 * 2); + EXPECT_EQ(minSize, 256 * 8 * 2); + + uint32_t maxLiveNodeCnt = 0xffff; + uint32_t extraBuf = 0xffff; + GetDigammaTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 16); + EXPECT_EQ(extraBuf, 0); +} + +TEST_F(TestTiling, TestAtanhTilingFloat) +{ + std::vector shapeDims = { 128, 128 }; + auto aTanhShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + AscendC::GetAtanhMaxMinTmpSize(aTanhShape, 4, true, maxValue, minValue); + EXPECT_EQ(minValue, 256 * 1); + EXPECT_EQ(maxValue, 128 * 128 * 4 * 1); + + uint32_t maxLiveNodeCnt; + uint32_t extraBuf; + AscendC::GetAtanhTmpBufferFactorSize(4, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 1); + EXPECT_EQ(extraBuf, 0); +} + +TEST_F(TestTiling, TestAtanhTilingHalf) +{ + std::vector shapeDims = { 128, 128 }; + auto aTanhShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + AscendC::GetAtanhMaxMinTmpSize(aTanhShape, 2, true, maxValue, minValue); + EXPECT_EQ(minValue, 256 * 4); + EXPECT_EQ(maxValue, 128 * 128 * 2 * 4); + + uint32_t maxLiveNodeCnt; + uint32_t extraBuf; + AscendC::GetAtanhTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 4); + EXPECT_EQ(extraBuf, 0); +} + +TEST_F(TestTiling, TestSignTiling) +{ + std::vector shapeDims = { 128, 128 }; + auto signShape = ge::Shape(shapeDims); + uint32_t signNeedMaxSize; + uint32_t signNeedMinSize; + uint32_t maxLiveNodeCnt; + uint32_t extraBuf; + GetSignMaxMinTmpSize(signShape, 2, false, signNeedMaxSize, signNeedMinSize); + EXPECT_EQ(signNeedMaxSize, 3 * 128 * 128 * 2); + EXPECT_EQ(signNeedMinSize, 3 * 256); + + GetSignMaxMinTmpSize(signShape, 4, false, signNeedMaxSize, signNeedMinSize); + EXPECT_EQ(signNeedMaxSize, 3 * 128 * 128 * 4); + EXPECT_EQ(signNeedMinSize, 3 * 256); + + GetSignTmpBufferFactorSize(4, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 3); + EXPECT_EQ(extraBuf, 0); +} + +TEST_F(TestTiling, TestAscendMeanTiling) +{ + uint32_t n = 8; + uint32_t maxValue; + uint32_t minValue; + uint32_t maxLiveNodeCnt; + uint32_t extraBuf; + + GetMeanMaxMinTmpSize(n, 2, 2, true, maxValue, minValue); + EXPECT_EQ(minValue, 32); + EXPECT_EQ(maxValue, 32); + + maxValue = 0; + minValue = 0; + GetMeanMaxMinTmpSize(n, 4, 4, true, maxValue, minValue); + EXPECT_EQ(minValue, 32); + EXPECT_EQ(maxValue, 32); + + GetMeanMaxMinTmpSize(n, 2, 4, true, maxValue, minValue); + EXPECT_EQ(minValue, 96); + EXPECT_EQ(maxValue, 96); + + GetMeanTmpBufferFactorSize(4, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 1); + EXPECT_EQ(extraBuf, 0); +} + +// TEST_F(TestTiling, TestKernelContextBuildBase) +// { +// auto builder = context_ascendc::BuildKernelRunContext(2, 2); +// EXPECT_EQ(builder.kernelInputNum, 2); +// } + + +// TEST_F(TestTiling, TestKernelContextBuild) +// { +// gert::Shape input1_shape = {2, 1, 1, 1, 1, 1, 1, 2, 2}; +// int32_t input1_tensor_buffer[] = {0, 2, 3, 3, 1, 0, 0, 1}; +// gert::TensorData input1_tensor_data{(void*)input1_tensor_buffer, nullptr}; +// gert::Shape output_shape = {5, 3}; +// int64_t output_tensor_buffer[15]; +// gert::TensorData output_tensor_data{(void*)output_tensor_buffer, nullptr}; +// auto kernelHolder = +// context_ascendc::KernelRunContextBuilder() +// .KernelIONum(2, 2) +// .Inputs({reinterpret_cast(&input1_shape), +// reinterpret_cast(&input1_tensor_data)}) +// .Outputs({reinterpret_cast(&output_shape), reinterpret_cast(&output_tensor_data)}) +// .NodeIoNum(1, 1) +// .IrInputNum(1) +// .NodeInputTd(0, ge::DT_INT32, ge::FORMAT_ND, ge::FORMAT_ND) +// .NodeOutputTd(0, ge::DT_INT64, ge::FORMAT_ND, ge::FORMAT_ND) +// .Build(); +// auto context = kernelHolder.GetContext(); +// EXPECT_NE(context, nullptr); +// } + +// TEST_F(TestTiling, TestTilingContextBuildWithConstValue) +// { +// string active_type = "gelu"; +// gert::StorageShape x_shape = {{1024, 5120}, {1024, 5120}}; +// gert::StorageShape expert_tokens_shape = {{16}, {16}}; +// gert::StorageShape weight1_shape = {{16, 5120, 0}, {16, 5120, 0}}; +// gert::StorageShape bias1_shape = {{16, 0}, {16, 0}}; +// gert::StorageShape weight2_shape = {{16, 0, 5120}, {16, 0, 5120}}; +// gert::StorageShape bias2_shape = {{16, 5120}, {16, 5120}}; + +// gert::StorageShape output_shape = {{1024, 5210}, {1024, 5210}}; + +// std::vector expert_tokens_const_value (16, 1); +// std::vector x_const_value (1024 * 5120, 2.f); +// std::vector bias2_value (16 * 5120, 3.f); +// auto param = gert::TilingData::CreateCap(4096); +// auto workspace_size_holer = gert::ContinuousVector::Create(4096); +// auto ws_size = reinterpret_cast(workspace_size_holer.get()); +// auto holder = context_ascendc::TilingContextBuilder() +// .SetOpNameType("name", "tpye") +// .NodeIoNum(6, 1) +// .IrInstanceNum({1, 1, 1, 1, 1, 1}) +// .AddInputTd(0, ge::DT_FLOAT16, ge::FORMAT_ND, ge::FORMAT_ND, &x_shape, reinterpret_cast(x_const_value.data())) +// .AddInputTd(1, ge::DT_FLOAT16, ge::FORMAT_ND, ge::FORMAT_ND, &weight1_shape) +// .AddInputTd(2, ge::DT_FLOAT16, ge::FORMAT_ND, ge::FORMAT_ND, &weight2_shape) +// .AddInputTd(3, ge::DT_INT64, ge::FORMAT_ND, ge::FORMAT_ND, &expert_tokens_shape, reinterpret_cast(expert_tokens_const_value.data())) +// .AddInputTd(4, ge::DT_FLOAT16, ge::FORMAT_ND, ge::FORMAT_ND, &bias1_shape) +// .AddInputTd(5, ge::DT_BF16, ge::FORMAT_ND, ge::FORMAT_ND, &bias2_shape, reinterpret_cast(bias2_value.data())) +// .AddOutputTd(0, ge::DT_FLOAT16, ge::FORMAT_ND, ge::FORMAT_ND, &output_shape) +// .AddAttrs({ +// {"activation", ge::AnyValue::CreateFrom(active_type)}, +// {"inner_precise", ge::AnyValue::CreateFrom(1)} +// }) +// .TilingData(param.get()) +// .Workspace(ws_size) +// .Build(); + +// gert::TilingContext* tiling_context = holder.GetContext(); +// EXPECT_NE(tiling_context, nullptr); + +// } + +// TEST_F(TestTiling, TestTilingContextBuildAddInputs) +// { +// string active_type = "gelu"; +// gert::StorageShape x_shape = {{1024, 5120}, {1024, 5120}}; +// std::vector inputs; +// std::vector outputs; +// context_ascendc::TensorInfo input; +// input.shape = x_shape; +// input.dType = ge::DT_FLOAT16; +// input.oriFormat = ge::FORMAT_ND; +// input.format = ge::FORMAT_ND; +// input.dataPath = "1111"; +// inputs.push_back(input); +// context_ascendc::TensorInfo output; +// output.shape = x_shape; +// output.dType = ge::DT_FLOAT16; +// output.oriFormat = ge::FORMAT_ND; +// output.format = ge::FORMAT_ND; +// output.dataPath = "222"; +// outputs.push_back(output); + +// auto param = gert::TilingData::CreateCap(4096); +// auto workspace_size_holer = gert::ContinuousVector::Create(4096); +// auto ws_size = reinterpret_cast(workspace_size_holer.get()); +// auto holder = context_ascendc::TilingContextBuilder() +// .SetOpNameType("name", "tpye") +// .NodeIoNum(1, 1) +// .IrInstanceNum({1}) +// .AddInputs(inputs) +// .AddOutputs(outputs) +// .AddAttrs({ +// {"activation", ge::AnyValue::CreateFrom(active_type)}, +// {"inner_precise", ge::AnyValue::CreateFrom(1)} +// }) +// .TilingData(param.get()) +// .Workspace(ws_size) +// .Build(); + +// gert::TilingContext* tiling_context = holder.GetContext(); +// EXPECT_NE(tiling_context, nullptr); +// } + +// TEST_F(TestTiling, TestTilingContextBuildFailed) +// { +// string active_type = "gelu"; +// gert::StorageShape x_shape = {{-1, 5120}, {-1, 5120}}; +// std::vector x_const_value (1024 * 5120, 2.f); +// auto param = gert::TilingData::CreateCap(4096); +// auto workspace_size_holer = gert::ContinuousVector::Create(4096); +// auto ws_size = reinterpret_cast(workspace_size_holer.get()); +// auto holder = context_ascendc::TilingContextBuilder() +// .NodeIoNum(1, 1) +// .IrInstanceNum({1, 1}) +// .CompileInfo(nullptr) +// .PlatformInfo(nullptr) +// .AddInputTd(0, ge::DT_FLOAT16, ge::FORMAT_ND, ge::FORMAT_ND, &x_shape, reinterpret_cast(x_const_value.data())) +// .Workspace(ws_size) +// .Build(); + +// gert::TilingContext* tiling_context = holder.GetContext(); +// EXPECT_EQ(tiling_context, nullptr); +// } + +// TEST_F(TestTiling, TestTilingContextBuildWithBinFile) +// { +// string active_type = "gelu"; +// gert::StorageShape x_shape = {{1024, 5120}, {1024, 5120}}; +// gert::StorageShape expert_tokens_shape = {{16}, {16}}; +// gert::StorageShape weight1_shape = {{16, 5120, 0}, {16, 5120, 0}}; +// gert::StorageShape bias1_shape = {{16, 0}, {16, 0}}; +// gert::StorageShape weight2_shape = {{16, 0, 5120}, {16, 0, 5120}}; +// gert::StorageShape bias2_shape = {{16, 5120}, {16, 5120}}; +// gert::StorageShape output_shape = {{1024, 5210}, {1024, 5210}}; + +// std::vector expert_tokens_const_value (16, 1); + +// std::vector x_const_value (1024 * 5120, 2.f); +// std::vector bias2_value (16 * 5120, 3.f); +// auto param = gert::TilingData::CreateCap(4096); +// auto workspace_size_holer = gert::ContinuousVector::Create(4096); +// auto ws_size = reinterpret_cast(workspace_size_holer.get()); +// auto holder = context_ascendc::TilingContextBuilder() +// .SetOpNameType("name", "tpye") +// .NodeIoNum(6, 1) +// .IrInstanceNum({1, 1, 1, 1, 1, 1}) +// .AddInputTd(0, ge::DT_FLOAT16, ge::FORMAT_ND, ge::FORMAT_ND, &x_shape, reinterpret_cast(x_const_value.data())) +// .AddInputTd(1, ge::DT_FLOAT16, ge::FORMAT_ND, ge::FORMAT_ND, &weight1_shape) +// .AddInputTd(2, ge::DT_FLOAT16, ge::FORMAT_ND, ge::FORMAT_ND, &weight2_shape) +// .AddInputTd(3, ge::DT_INT64, ge::FORMAT_ND, ge::FORMAT_ND, &expert_tokens_shape, "./expert_tokens_data.bin") +// .AddInputTd(4, ge::DT_FLOAT16, ge::FORMAT_ND, ge::FORMAT_ND, &bias1_shape) +// .AddInputTd(5, ge::DT_BF16, ge::FORMAT_ND, ge::FORMAT_ND, &bias2_shape, reinterpret_cast(bias2_value.data())) +// .AddOutputTd(0, ge::DT_FLOAT16, ge::FORMAT_ND, ge::FORMAT_ND, &output_shape) +// .AddAttrs({ +// {"activation", ge::AnyValue::CreateFrom(active_type)}, +// {"inner_precise", ge::AnyValue::CreateFrom(1)} +// }) +// .TilingData(param.get()) +// .Workspace(ws_size) +// .Build(); +// gert::TilingContext* tiling_context = holder.GetContext(); +// EXPECT_EQ(tiling_context, nullptr); +// } + +TEST_F(TestTiling, TestAxpyTiling) +{ + uint32_t maxVal = 0; + uint32_t minVal = 0; + GetAxpyMaxMinTmpSize(ge::Shape({128}), 4, false, maxVal, minVal); + EXPECT_EQ(maxVal, 0); + EXPECT_EQ(minVal, 0); + GetAxpyMaxMinTmpSize(ge::Shape({256}), 2, false, maxVal, minVal); + EXPECT_EQ(maxVal, 256 * 4 * 2); + EXPECT_EQ(minVal, 256 * 4); + GetAxpyMaxMinTmpSize(ge::Shape({32}), 2, false, maxVal, minVal); + EXPECT_EQ(maxVal, 256 * 4); + EXPECT_EQ(minVal, 256 * 4); + uint32_t extraBuf = 123; + uint32_t maxLivedNodesCnt = 123; + GetAxpyTmpBufferFactorSize(4, maxLivedNodesCnt, extraBuf); + EXPECT_EQ(extraBuf, 0); + EXPECT_EQ(maxLivedNodesCnt, 1); + GetAxpyTmpBufferFactorSize(2, maxLivedNodesCnt, extraBuf); + EXPECT_EQ(extraBuf, 0); + EXPECT_EQ(maxLivedNodesCnt, 4); +} + +TEST_F(TestTiling, TestCeilTilingFloat) +{ + std::vector shapeDims = { 128, 128 }; + auto ceilShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + GetCeilMaxMinTmpSize(ceilShape, sizeof(float), false, maxValue, minValue); + EXPECT_EQ(minValue, 256 * 1); + EXPECT_EQ(maxValue, 128 * 128 * 1 * 4); + + uint32_t maxLiveNodeCnt; + uint32_t extraBuf; + GetCeilTmpBufferFactorSize(sizeof(float), maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 1); + EXPECT_EQ(extraBuf, 0); +} + +TEST_F(TestTiling, TestCeilTilingHalf) +{ + std::vector shapeDims = { 128, 128 }; + auto ceilShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + GetCeilMaxMinTmpSize(ceilShape, 2, false, maxValue, minValue); + EXPECT_EQ(minValue, 256 * 2); + EXPECT_EQ(maxValue, 128 * 128 * 2 * 2); + + uint32_t maxLiveNodeCnt; + uint32_t extraBuf; + GetCeilTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 2); + EXPECT_EQ(extraBuf, 0); +} + +TEST_F(TestTiling, TestCeilTilingHalf512) +{ + std::vector shapeDims = { 512 }; + auto ceilShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + GetCeilMaxMinTmpSize(ceilShape, 2, false, maxValue, minValue); + EXPECT_EQ(maxValue, 512 * 2 * 2); + EXPECT_EQ(minValue, 256 * 2); +} + +TEST_F(TestTiling, TestFloorTilingFloat) +{ + std::vector shapeDims = { 128, 128 }; + auto floorShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + GetFloorMaxMinTmpSize(floorShape, sizeof(float), false, maxValue, minValue); + EXPECT_EQ(minValue, 0); + EXPECT_EQ(maxValue, 0); + + uint32_t maxLiveNodeCnt; + uint32_t extraBuf; + GetFloorTmpBufferFactorSize(sizeof(float), maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 0); + EXPECT_EQ(extraBuf, 0); +} + +TEST_F(TestTiling, TestFloorTilingHalf) +{ + std::vector shapeDims = { 128, 128 }; + auto floorShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + GetFloorMaxMinTmpSize(floorShape, 2, false, maxValue, minValue); + EXPECT_EQ(minValue, 256 * 2); + EXPECT_EQ(maxValue, 128 * 128 * 2 * 2); + + uint32_t maxLiveNodeCnt; + uint32_t extraBuf; + GetFloorTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 2); + EXPECT_EQ(extraBuf, 0); +} + +TEST_F(TestTiling, TestFloorTilingHalf512) +{ + std::vector shapeDims = { 512 }; + auto floorShape = ge::Shape(shapeDims); + uint32_t maxValue = 0; + uint32_t minValue = 0; + GetFloorMaxMinTmpSize(floorShape, 2, false, maxValue, minValue); + EXPECT_EQ(maxValue, 512 * 2 * 2); + EXPECT_EQ(minValue, 256 * 2); +} + +// TEST_F(TestTiling, TestGetSocVersion) +// { +// fe::PlatFormInfos platform_info; +// auto plat = platform_ascendc::PlatformAscendC(&platform_info); + +// MOCKER_CPP(&fe::PlatFormInfos::GetPlatformResWithLock, +// bool(fe::PlatFormInfos::*)(const std::string &, const std::string &, std::string &)) +// .stubs() +// .will(returnValue(false)); + +// platform_ascendc::SocVersion ret = plat.GetSocVersion(); +// EXPECT_EQ(ret, platform_ascendc::SocVersion::RESERVED_VERSION); +// } + +// TEST_F(TestTiling, TestCoreNum) +// { +// fe::PlatFormInfos platform_info; +// auto plat = platform_ascendc::PlatformAscendC(&platform_info); + +// MOCKER_CPP(&fe::PlatFormInfos::GetPlatformResWithLock, +// bool(fe::PlatFormInfos::*)(const std::string &, const std::string &, std::string &)) +// .stubs() +// .will(returnValue(false)); + +// uint32_t ret1 = plat.GetCoreNumAic(); +// uint32_t ret2 = plat.GetCoreNumAiv(); +// EXPECT_EQ(ret1, 0); +// EXPECT_EQ(ret2, 0); +// } + +// TEST_F(TestTiling, TestGetLibApiWorkSpaceSize) +// { +// fe::PlatFormInfos platform_info; +// auto plat = platform_ascendc::PlatformAscendC(&platform_info); + +// MOCKER_CPP(&fe::PlatFormInfos::GetPlatformResWithLock, +// bool(fe::PlatFormInfos::*)(const std::string &, const std::string &, std::string &)) +// .stubs() +// .will(returnValue(false)); + +// uint32_t ret1 = plat.GetLibApiWorkSpaceSize(); +// EXPECT_EQ(ret1, static_cast(-1)); +// } +// TEST_F(TestTiling, TestPlatformAscendCManager) +// { +// void *handle; +// int a = 7; +// handle = &a; + +// MOCKER_CPP(&fe::PlatFormInfos::GetPlatformResWithLock, +// bool(fe::PlatFormInfos::*)(const std::string &, const std::string &, std::string &)) +// .stubs() +// .will(returnValue(false)); + +// auto ret2 = platform_ascendc::PlatformAscendCManager::GetInstance(); +// } + +// TEST_F(TestTiling, TestGetVectorCoreNum) +// { +// fe::PlatFormInfos platform_info; +// auto plat = platform_ascendc::PlatformAscendC(&platform_info); + +// MOCKER_CPP(&fe::PlatFormInfos::GetPlatformResWithLock, +// bool(fe::PlatFormInfos::*)(const std::string &, const std::string &, std::string &)) +// .stubs() +// .will(returnValue(false)); +// MOCKER_CPP(&platform_ascendc::PlatformAscendC::GetSocVersion, +// platform_ascendc::SocVersion(platform_ascendc::PlatformAscendC::*)(void) const) +// .stubs() +// .will(returnValue(platform_ascendc::SocVersion::ASCEND310P)); + +// uint32_t ret1 = plat.GetCoreNumVector(); +// EXPECT_EQ(ret1, static_cast(0)); +// MOCKER_CPP(&platform_ascendc::PlatformAscendCManager::PlatformAscendCInit) +// .stubs() +// .will(returnValue(platform_info)); +// auto ret2 = platform_ascendc::PlatformAscendCManager::GetInstance(); + +// } + TEST_F(TestTiling, TestReGluFloat16OrBf16) { const std::vector srcShapeDims = { 8, 128 }; @@ -2990,6 +4669,390 @@ TEST_F(TestTiling, TestReGluFloat32) EXPECT_EQ(maxValue, 256); } +#if __CCE_AICORE__ == 220 +extern void platfrom_stub_set_chip_version(const char *num); +TEST_F(TestTiling, TestBroadCast220) +{ + fe::PlatFormInfos platform_info; + auto plat = platform_ascendc::PlatformAscendC(&platform_info); + platfrom_stub_set_chip_version("Ascend910B"); + uint32_t firstDim = 32; + uint32_t lastDim = 32; + std::vector srcShapeDims = {firstDim, 1}; + auto srcShape = ge::Shape(srcShapeDims); + std::vector dstShapeDims = {firstDim, lastDim}; + auto dstShape = ge::Shape(dstShapeDims); + uint32_t maxValue{0}; + uint32_t minValue{0}; + constexpr uint32_t halfSize = 2; + constexpr uint32_t halfOneBlockElementNum = 16; + constexpr uint32_t minHalfAlignSize = halfOneBlockElementNum * halfOneBlockElementNum * halfSize; + constexpr uint32_t BRCB_ONE_SIZE = 8; + uint32_t firstDimAlignNum = (firstDim + BRCB_ONE_SIZE - 1) / BRCB_ONE_SIZE * BRCB_ONE_SIZE; + uint32_t maxHalfAlignSize = firstDimAlignNum * halfOneBlockElementNum * halfSize; + GetBroadCastMaxMinTmpSize(plat, srcShape, dstShape, halfSize, false, maxValue, minValue); + EXPECT_EQ(minValue, minHalfAlignSize); + EXPECT_EQ(maxValue, maxHalfAlignSize); + + srcShapeDims = {firstDim, 1}; + srcShape = ge::Shape(srcShapeDims); + uint32_t lastDimNotAlign = 31; + dstShapeDims = {firstDim, lastDimNotAlign}; + dstShape = ge::Shape(dstShapeDims); + + uint32_t blockDimAlignBlockNum = (lastDimNotAlign + halfOneBlockElementNum - 1) / halfOneBlockElementNum; + uint32_t blockDimAlign = blockDimAlignBlockNum * halfOneBlockElementNum; + uint32_t minCopyTempBufferSize = halfOneBlockElementNum * blockDimAlign * halfSize; + auto minHalfNotAlignSize = minHalfAlignSize + minCopyTempBufferSize; + + uint32_t maxCopyTempBufferSize = firstDim * blockDimAlign * halfSize; + uint32_t maxHalfNotAlignValue = maxHalfAlignSize + maxCopyTempBufferSize; + + GetBroadCastMaxMinTmpSize(plat, srcShape, dstShape, halfSize, false, maxValue, minValue); + EXPECT_EQ(minValue, minHalfNotAlignSize); + EXPECT_EQ(maxValue, maxHalfNotAlignValue); + + constexpr uint32_t int8Size = 1; + srcShapeDims = {firstDim, 1}; + srcShape = ge::Shape(srcShapeDims); + dstShapeDims = {firstDim, lastDim}; + dstShape = ge::Shape(dstShapeDims); + const uint32_t alignSrcSize = + ((firstDim + halfOneBlockElementNum - 1) / halfOneBlockElementNum) * halfOneBlockElementNum; + uint32_t alignDstSize = + ((firstDim * lastDim + halfOneBlockElementNum - 1) / halfOneBlockElementNum) * halfOneBlockElementNum; + uint32_t castTempBufferSize = (alignSrcSize + alignDstSize) * halfSize; + GetBroadCastMaxMinTmpSize(plat, srcShape, dstShape, int8Size, false, maxValue, minValue); + EXPECT_EQ(minValue, minHalfAlignSize + castTempBufferSize); + EXPECT_EQ(maxValue, maxHalfAlignSize + castTempBufferSize); + + srcShapeDims = {firstDim, 1}; + srcShape = ge::Shape(srcShapeDims); + dstShapeDims = {firstDim, lastDimNotAlign}; + dstShape = ge::Shape(dstShapeDims); + alignDstSize = + ((firstDim * lastDimNotAlign + halfOneBlockElementNum - 1) / halfOneBlockElementNum) * halfOneBlockElementNum; + castTempBufferSize = (alignSrcSize + alignDstSize) * halfSize; + GetBroadCastMaxMinTmpSize(plat, srcShape, dstShape, int8Size, false, maxValue, minValue); + EXPECT_EQ(minValue, minHalfNotAlignSize + castTempBufferSize); + EXPECT_EQ(maxValue, maxHalfNotAlignValue + castTempBufferSize); +} + +TEST_F(TestTiling, TestPowerTiling) +{ + platfrom_stub_set_chip_version("Ascend910B"); + auto platformPtr = platform_ascendc::PlatformAscendCManager::GetInstance("Ascend910B"); + std::vector shapeDims = { 512 }; + auto powerShape = ge::Shape(shapeDims); + uint32_t maxVal; + uint32_t minVal; + GetPowerMaxMinTmpSize(powerShape, powerShape, false, 4, false, maxVal, minVal); + EXPECT_EQ(maxVal, 512 * 4 * 4 + 256); + EXPECT_EQ(minVal, 256 * 4 + 256); + GetPowerMaxMinTmpSize(powerShape, powerShape, true, 4, false, maxVal, minVal); + EXPECT_EQ(maxVal, 512 * 4 * 6); + EXPECT_EQ(minVal, 256 * 6); + GetPowerMaxMinTmpSize(powerShape, powerShape, false, 2, false, maxVal, minVal); + EXPECT_EQ(maxVal, 512 * 2 * 14 + 256); + EXPECT_EQ(minVal, 256 * 7 + 256); + std::vector scalar_shape = { 1 }; + auto scalarShape = ge::Shape(scalar_shape); + GetPowerMaxMinTmpSize(powerShape, scalarShape, false, 2, false, maxVal, minVal); + EXPECT_EQ(maxVal, 512 * 2 * 14 + 256); + EXPECT_EQ(minVal, 256 * 7 + 256); + GetPowerMaxMinTmpSize(powerShape, scalarShape, true, 4, false, maxVal, minVal); + EXPECT_EQ(maxVal, 512 * 4 * 7); + EXPECT_EQ(minVal, 256 * 7); + GetPowerMaxMinTmpSize(powerShape, scalarShape, false, 4, false, maxVal, minVal); + EXPECT_EQ(maxVal, 512 * 4 * 5 + 256); + EXPECT_EQ(minVal, 256 * 5 + 256); + + std::vector shape1 = { 16 }; + auto powerShape1 = ge::Shape( shape1 ); + GetPowerMaxMinTmpSize(powerShape1, scalarShape, false, 4, false, maxVal, minVal); + EXPECT_EQ(maxVal, 256 * 5 + 256); + EXPECT_EQ(minVal, 256 * 5 + 256); + GetPowerMaxMinTmpSize(powerShape1, scalarShape, false, 2, false, maxVal, minVal); + EXPECT_EQ(maxVal, 256 * 7 + 256); + EXPECT_EQ(minVal, 256 * 7 + 256); + GetPowerMaxMinTmpSize(powerShape1, scalarShape, true, 4, false, maxVal, minVal); + EXPECT_EQ(maxVal, 256 * 7); + EXPECT_EQ(minVal, 256 * 7); + GetPowerMaxMinTmpSize(powerShape1, powerShape1, false, 4, false, maxVal, minVal); + EXPECT_EQ(maxVal, 256 * 4 + 256); + EXPECT_EQ(minVal, 256 * 4 + 256); + GetPowerMaxMinTmpSize(powerShape1, powerShape1, false, 2, false, maxVal, minVal); + EXPECT_EQ(maxVal, 256 * 7 + 256); + EXPECT_EQ(minVal, 256 * 7 + 256); + GetPowerMaxMinTmpSize(powerShape1, powerShape1, true, 4, false, maxVal, minVal); + EXPECT_EQ(maxVal, 256 * 6); + EXPECT_EQ(minVal, 256 * 6); +} + +TEST_F(TestTiling, TestPowerTilingFactorSize) +{ + platfrom_stub_set_chip_version("Ascend910B"); + auto platformPtr = platform_ascendc::PlatformAscendCManager::GetInstance("Ascend910B"); + uint32_t maxLiveNodeCnt = 0xffff; + uint32_t extraBuf = 0xffff; + GetPowerTmpBufferFactorSize(false, true, false, 4, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 5); + EXPECT_EQ(extraBuf, 256); + GetPowerTmpBufferFactorSize(false, true, true, 4, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 7); + EXPECT_EQ(extraBuf, 0); + GetPowerTmpBufferFactorSize(false, true, false, 2, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 14); + EXPECT_EQ(extraBuf, 256); + GetPowerTmpBufferFactorSize(true, true, false, 4, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 4); + EXPECT_EQ(extraBuf, 256); + GetPowerTmpBufferFactorSize(true, true, true, 4, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 6); + EXPECT_EQ(extraBuf, 0); + GetPowerTmpBufferFactorSize(true, true, false, 2, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 14); + EXPECT_EQ(extraBuf, 256); +} + +#endif + +#if __CCE_AICORE__ == 200 +extern void platfrom_stub_set_chip_version(const char *num); +TEST_F(TestTiling, TestPowerTilingV200) +{ + platfrom_stub_set_chip_version("Ascend310P"); + auto platformPtr = platform_ascendc::PlatformAscendCManager::GetInstance("Ascend310P"); + std::vector shapeDims = { 512 }; + auto powerShape = ge::Shape(shapeDims); + uint32_t maxVal; + uint32_t minVal; + GetPowerMaxMinTmpSize(powerShape, powerShape, false, 4, false, maxVal, minVal); + EXPECT_EQ(maxVal, 512 * 4 * 5 + 256); + EXPECT_EQ(minVal, 256 * 5 + 256); + GetPowerMaxMinTmpSize(powerShape, powerShape, true, 4, false, maxVal, minVal); + EXPECT_EQ(maxVal, 512 * 4 * 7); + EXPECT_EQ(minVal, 256 * 7); + GetPowerMaxMinTmpSize(powerShape, powerShape, false, 2, false, maxVal, minVal); + EXPECT_EQ(maxVal, 512 * 2 * 16 + 256); + EXPECT_EQ(minVal, 256 * 8 + 256); + std::vector scalar_shape = { 1 }; + auto scalarShape = ge::Shape(scalar_shape); + GetPowerMaxMinTmpSize(powerShape, scalarShape, false, 2, false, maxVal, minVal); + EXPECT_EQ(maxVal, 512 * 2 * 16 + 256); + EXPECT_EQ(minVal, 256 * 8 + 256); + GetPowerMaxMinTmpSize(powerShape, scalarShape, true, 4, false, maxVal, minVal); + EXPECT_EQ(maxVal, 512 * 4 * 8); + EXPECT_EQ(minVal, 256 * 8); + GetPowerMaxMinTmpSize(powerShape, scalarShape, false, 4, false, maxVal, minVal); + EXPECT_EQ(maxVal, 512 * 4 * 6 + 256); + EXPECT_EQ(minVal, 256 * 6 + 256); + + std::vector shape1 = { 16 }; + auto powerShape1 = ge::Shape( shape1 ); + GetPowerMaxMinTmpSize(powerShape1, scalarShape, false, 4, false, maxVal, minVal); + EXPECT_EQ(maxVal, 256 * 6 + 256); + EXPECT_EQ(minVal, 256 * 6 + 256); + GetPowerMaxMinTmpSize(powerShape1, scalarShape, false, 2, false, maxVal, minVal); + EXPECT_EQ(maxVal, 256 * 8 + 256); + EXPECT_EQ(minVal, 256 * 8 + 256); + GetPowerMaxMinTmpSize(powerShape1, scalarShape, true, 4, false, maxVal, minVal); + EXPECT_EQ(maxVal, 256 * 8); + EXPECT_EQ(minVal, 256 * 8); + GetPowerMaxMinTmpSize(powerShape1, powerShape1, false, 4, false, maxVal, minVal); + EXPECT_EQ(maxVal, 256 * 5 + 256); + EXPECT_EQ(minVal, 256 * 5 + 256); + GetPowerMaxMinTmpSize(powerShape1, powerShape1, false, 2, false, maxVal, minVal); + EXPECT_EQ(maxVal, 256 * 8 + 256); + EXPECT_EQ(minVal, 256 * 8 + 256); + GetPowerMaxMinTmpSize(powerShape1, powerShape1, true, 4, false, maxVal, minVal); + EXPECT_EQ(maxVal, 256 * 7); + EXPECT_EQ(minVal, 256 * 7); +} + +TEST_F(TestTiling, TestPowerTilingFactorSizeV200) +{ + platfrom_stub_set_chip_version("Ascend310P"); + auto platformPtr = platform_ascendc::PlatformAscendCManager::GetInstance("Ascend310P"); + uint32_t maxLiveNodeCnt = 0xffff; + uint32_t extraBuf = 0xffff; + GetPowerTmpBufferFactorSize(false, true, false, 4, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 6); + EXPECT_EQ(extraBuf, 256); + GetPowerTmpBufferFactorSize(false, true, true, 4, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 8); + EXPECT_EQ(extraBuf, 0); + GetPowerTmpBufferFactorSize(false, true, false, 2, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 16); + EXPECT_EQ(extraBuf, 256); + GetPowerTmpBufferFactorSize(true, true, false, 4, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 5); + EXPECT_EQ(extraBuf, 256); + GetPowerTmpBufferFactorSize(true, true, true, 4, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 7); + EXPECT_EQ(extraBuf, 0); + GetPowerTmpBufferFactorSize(true, true, false, 2, maxLiveNodeCnt, extraBuf); + EXPECT_EQ(maxLiveNodeCnt, 16); + EXPECT_EQ(extraBuf, 256); +} + +TEST_F(TestTiling, TestLastBroadCast200) +{ + fe::PlatFormInfos platform_info; + auto plat = platform_ascendc::PlatformAscendC(&platform_info); + platfrom_stub_set_chip_version("Ascend310P"); + uint32_t firstDim = 32; + uint32_t lastDim = 32; + std::vector srcShapeDims = {firstDim, 1}; + auto srcShape = ge::Shape(srcShapeDims); + std::vector dstShapeDims = {firstDim, lastDim}; + auto dstShape = ge::Shape(dstShapeDims); + uint32_t maxValue{0}; + uint32_t minValue{0}; + constexpr uint32_t halfSize = 2; + constexpr uint32_t halfOneBlockElementNum = 16; + constexpr uint32_t MAX_BLOCK_NUM = 8; + constexpr uint32_t ONE_BLOCK_SIZE = 32; + uint32_t minTmpBufferSize = + halfOneBlockElementNum * ((lastDim + MAX_BLOCK_NUM - 1) / MAX_BLOCK_NUM) * halfSize; + uint32_t minHalfAlignSize = ONE_BLOCK_SIZE + + minTmpBufferSize; + uint32_t maxHalfAlignSize = ONE_BLOCK_SIZE + firstDim * lastDim * halfSize; + GetBroadCastMaxMinTmpSize(plat, srcShape, dstShape, halfSize, false, maxValue, minValue); + EXPECT_EQ(minValue, minHalfAlignSize); + EXPECT_EQ(maxValue, maxHalfAlignSize); + + constexpr uint32_t int8Size = 1; + const uint32_t alignSrcSize = + ((firstDim + halfOneBlockElementNum - 1) / halfOneBlockElementNum) * halfOneBlockElementNum; + const uint32_t alignDstSize = + ((firstDim * lastDim + halfOneBlockElementNum - 1) / halfOneBlockElementNum) * halfOneBlockElementNum; + const uint32_t castTempBufferSize = (alignSrcSize + alignDstSize) * halfSize; + GetBroadCastMaxMinTmpSize(plat, srcShape, dstShape, int8Size, false, maxValue, minValue); + EXPECT_EQ(minValue, minHalfAlignSize + castTempBufferSize); + EXPECT_EQ(maxValue, maxHalfAlignSize + castTempBufferSize); +} + +TEST_F(TestTiling, TestFirstBroadCast200) +{ + fe::PlatFormInfos platform_info; + auto plat = platform_ascendc::PlatformAscendC(&platform_info); + platfrom_stub_set_chip_version("Ascend310P"); + uint32_t firstDim = 32; + uint32_t lastDim = 32; + std::vector srcShapeDims = {1, lastDim}; + auto srcShape = ge::Shape(srcShapeDims); + std::vector dstShapeDims = {firstDim, lastDim}; + auto dstShape = ge::Shape(dstShapeDims); + uint32_t maxValue{0}; + uint32_t minValue{0}; + constexpr uint32_t halfSize = 2; + constexpr uint32_t ONE_BLOCK_SIZE = 32; + GetBroadCastMaxMinTmpSize(plat, srcShape, dstShape, halfSize, false, maxValue, minValue); + EXPECT_EQ(minValue, ONE_BLOCK_SIZE); + EXPECT_EQ(maxValue, ONE_BLOCK_SIZE); + + constexpr uint32_t int8Size = 1; + constexpr uint32_t HALF_ONE_BLK_SIZE = 16; + const uint32_t alignSrcSize = ((lastDim + HALF_ONE_BLK_SIZE - 1) / HALF_ONE_BLK_SIZE) * HALF_ONE_BLK_SIZE; + const uint32_t alignDstSize = + ((firstDim * lastDim + HALF_ONE_BLK_SIZE - 1) / HALF_ONE_BLK_SIZE) * HALF_ONE_BLK_SIZE; + const uint32_t castTempBufferSize = (alignSrcSize + alignDstSize) * halfSize; + GetBroadCastMaxMinTmpSize(plat, srcShape, dstShape, int8Size, false, maxValue, minValue); + EXPECT_EQ(minValue, ONE_BLOCK_SIZE + castTempBufferSize); + EXPECT_EQ(maxValue, ONE_BLOCK_SIZE + castTempBufferSize); +} + +TEST_F(TestTiling, TestOneElementBroadCast200) +{ + fe::PlatFormInfos platform_info; + auto plat = platform_ascendc::PlatformAscendC(&platform_info); + platfrom_stub_set_chip_version("Ascend310P"); + uint32_t srcDim = 1; + uint32_t dstDim = 32; + std::vector srcShapeDims = {srcDim}; + auto srcShape = ge::Shape(srcShapeDims); + std::vector dstShapeDims = {dstDim}; + auto dstShape = ge::Shape(dstShapeDims); + uint32_t maxValue{0}; + uint32_t minValue{0}; + constexpr uint32_t halfSize = 2; + GetBroadCastMaxMinTmpSize(plat, srcShape, dstShape, halfSize, false, maxValue, minValue); + EXPECT_EQ(minValue, 0); + EXPECT_EQ(maxValue, 0); + + constexpr uint32_t int8Size = 1; + constexpr uint32_t HALF_ONE_BLK_SIZE = 16; + constexpr uint32_t ONE_BLOCK_SIZE = 32; + const uint32_t alignSrcSize = ((srcDim + HALF_ONE_BLK_SIZE - 1) / HALF_ONE_BLK_SIZE) * HALF_ONE_BLK_SIZE; + const uint32_t alignDstSize = ((dstDim + HALF_ONE_BLK_SIZE - 1) / HALF_ONE_BLK_SIZE) * HALF_ONE_BLK_SIZE; + const uint32_t castTempBufferSize = (alignSrcSize + alignDstSize) * halfSize; + GetBroadCastMaxMinTmpSize(plat, srcShape, dstShape, int8Size, false, maxValue, minValue); + EXPECT_EQ(minValue, castTempBufferSize + ONE_BLOCK_SIZE); + EXPECT_EQ(maxValue, castTempBufferSize + ONE_BLOCK_SIZE); +} +#endif + +TEST_F(TestTiling, TestReduceXorSumTilingInt16) +{ + std::vector shapeDims = { 128, 128 }; + auto shape = ge::Shape(shapeDims); + uint32_t maxSize; + uint32_t minSize; + GetReduceXorSumMaxMinTmpSize(shape, 2, true, maxSize, minSize); + EXPECT_EQ(maxSize, 65536); + EXPECT_EQ(minSize, 65536); + + GetReduceXorSumMaxMinTmpSize(shape, 2, false, maxSize, minSize); + EXPECT_EQ(maxSize, 98304); + EXPECT_EQ(minSize, 98304); + + shapeDims = { 8 }; + shape = ge::Shape(shapeDims); + GetReduceXorSumMaxMinTmpSize(shape, 2, false, maxSize, minSize); + EXPECT_EQ(maxSize, 768); + EXPECT_EQ(minSize, 768); + + GetReduceXorSumMaxMinTmpSize(shape, 2, true,maxSize, minSize); + EXPECT_EQ(maxSize, 512); + EXPECT_EQ(minSize, 512); +} + +TEST_F(TestTiling, TestCumSum) +{ + uint32_t firstDim = 32; + uint32_t lastDim = 16; + std::vector srcShapeDims = {firstDim, lastDim}; + auto srcShape = ge::Shape(srcShapeDims); + uint32_t maxValue{0}; + uint32_t minValue{0}; + constexpr uint32_t halfSize = 2; + constexpr uint32_t transDataTo5HDAddrListSize = 16; + uint32_t minHalfSize = transDataTo5HDAddrListSize * lastDim * 3 * sizeof(uint16_t); + uint32_t alignOutter = (firstDim + transDataTo5HDAddrListSize - 1) / transDataTo5HDAddrListSize * transDataTo5HDAddrListSize; + uint32_t maxHalfSize = alignOutter * lastDim * 3 * sizeof(uint16_t); + + GetCumSumMaxMinTmpSize(srcShape, halfSize, true, false, maxValue, minValue); + EXPECT_EQ(minValue, minHalfSize); + EXPECT_EQ(maxValue, maxHalfSize); + + constexpr uint32_t floatSize = 4; + uint32_t minFloatSize = transDataTo5HDAddrListSize * lastDim * 2 * sizeof(float); + uint32_t maxFloatSize = alignOutter * lastDim * 2 * sizeof(float); + + GetCumSumMaxMinTmpSize(srcShape, floatSize, true, false, maxValue, minValue); + EXPECT_EQ(minValue, minFloatSize); + EXPECT_EQ(maxValue, maxFloatSize); + + maxHalfSize = minHalfSize = firstDim * lastDim * sizeof(float); + GetCumSumMaxMinTmpSize(srcShape, halfSize, false, false, maxValue, minValue); + EXPECT_EQ(minValue, minHalfSize); + EXPECT_EQ(maxValue, maxHalfSize); + + + GetCumSumMaxMinTmpSize(srcShape, floatSize, false, false, maxValue, minValue); + EXPECT_EQ(minValue, 0); + EXPECT_EQ(maxValue, 0); +} TEST_F(TestTiling, tiling_compute_error) { MultiCoreMatmulTiling tiling; @@ -3005,4 +5068,4 @@ TEST_F(TestTiling, tiling_compute_error) bmm_tiling.biasType_.pos = TPosition::TSCM; ret = bmm_tiling.Compute(); EXPECT_EQ(ret, -1); -} +} \ No newline at end of file -- Gitee From 9e64853723a12f42b2fb022075edf190fed9f151 Mon Sep 17 00:00:00 2001 From: jiangchengcheng-on Date: Thu, 19 Sep 2024 09:55:05 +0000 Subject: [PATCH 02/17] fix error Signed-off-by: jiangchengcheng-on --- lib/matmul/matmul_tiling_base.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/matmul/matmul_tiling_base.h b/lib/matmul/matmul_tiling_base.h index c537af99..0f3f8a5c 100644 --- a/lib/matmul/matmul_tiling_base.h +++ b/lib/matmul/matmul_tiling_base.h @@ -133,8 +133,8 @@ enum class DequantType : int32_t { }; enum class ScheduleType : int32_t { - INNER_PRODUCT = 0; - OUTER_PRODUCT = 1; + INNER_PRODUCT = 0, + OUTER_PRODUCT = 1, }; struct SysTilingTempBufSize { -- Gitee From db2eed11c6833d8e189899e640843714a015d4e3 Mon Sep 17 00:00:00 2001 From: jiangchengcheng-on Date: Thu, 19 Sep 2024 10:09:57 +0000 Subject: [PATCH 03/17] fix error Signed-off-by: jiangchengcheng-on --- lib/matmul/matmul_tiling_base.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/matmul/matmul_tiling_base.h b/lib/matmul/matmul_tiling_base.h index 0f3f8a5c..a724fdad 100644 --- a/lib/matmul/matmul_tiling_base.h +++ b/lib/matmul/matmul_tiling_base.h @@ -301,7 +301,7 @@ public: BufferPool bufferPool_; MatrixTraverse traverse_ = MatrixTraverse::FIRSTM; MatrixMadType madType_ = MatrixMadType::NORMAL; - ScheduleType scheduleType = ScheduleType::NOSET; + ScheduleType scheduleType = ScheduleType::INNER_PRODUCT; bool transND2NZ_ = false; bool transNZ2ND_ = false; -- Gitee From db4c8389b125975596953f0dd3fd46a312f86a15 Mon Sep 17 00:00:00 2001 From: jiangchengcheng-on Date: Thu, 19 Sep 2024 10:20:43 +0000 Subject: [PATCH 04/17] new dfx handler file Signed-off-by: jiangchengcheng-on --- .../modules/dfx/handlers/dfx_chain_handler.h | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 impl/matmul/modules/dfx/handlers/dfx_chain_handler.h diff --git a/impl/matmul/modules/dfx/handlers/dfx_chain_handler.h b/impl/matmul/modules/dfx/handlers/dfx_chain_handler.h new file mode 100644 index 00000000..41120438 --- /dev/null +++ b/impl/matmul/modules/dfx/handlers/dfx_chain_handler.h @@ -0,0 +1,42 @@ +/** + * Copyright (c) 2024 Huawei Technologies Co., Ltd. + * This file is a part of the CANN Open Software. + * Licensed under CANN Open Software License Agreement Version 1.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ + +/*! + * \file dfx_chain_handler.h + * \brief + */ + +#ifndef MATMUL_DFX_CHAIN_HANDLER_H +#define MATMUL_DFX_CHAIN_HANDLER_H + +namespace matmul { + +struct DfxFuncInfo; + +template +struct DfxChainHandler { + template + __aicore__ inline static void PreCall(const DfxFuncInfo& info, Args&&... args) { + (HANDLERS::PreCall(info, std::forward(args)...), ...); + } + + template + __aicore__ inline static void PostCall(const DfxFuncInfo& info, const RT& ret) { + (HANDLERS::PostCall(info, ret), ...); + } + + __aicore__ inline static void PostCall(const DfxFuncInfo& info) { + (HANDLERS::PostCall(info), ...); + } +}; + +} + +#endif \ No newline at end of file -- Gitee From 1c17522bae2383c27ac9944ede113bbe6de17cf6 Mon Sep 17 00:00:00 2001 From: jiangchengcheng-on Date: Thu, 19 Sep 2024 10:34:33 +0000 Subject: [PATCH 05/17] fix error Signed-off-by: jiangchengcheng-on --- impl/matmul/modules/dfx/dfx_handler.h | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/impl/matmul/modules/dfx/dfx_handler.h b/impl/matmul/modules/dfx/dfx_handler.h index c176cfdd..ce6c0fc3 100644 --- a/impl/matmul/modules/dfx/dfx_handler.h +++ b/impl/matmul/modules/dfx/dfx_handler.h @@ -13,27 +13,29 @@ * \brief */ - #ifndef MATMUL_DFX_HANDLER_H +#ifndef MATMUL_DFX_HANDLER_H #define MATMUL_DFX_HANDLER_H #include "dfx_config.h" namespace matmul { -struct DfxHandler ( +struct DfxHandler { template - __aicore__ inline void PreCall(const DfxFuncInfo& info, Agrs&&... args) { - DfxConfig::EnableHandlers::PreCall(info, std::forward(args)...); } template - __aicore__ inline void PostCall(const DfxFuncInfo& info, const RT& ret) { - DfxConfig::EnableHandlers::PostCall(info, ret); + __aicore__ inline static void PostCall(const DfxFuncInfo& info, const RT& ret) { + DfxConfig::EnabledHandlers::PostCall(info, ret); } - __aicore__ inline void PostCall(const DfxFuncInfo& info) { - DfxConfig::EnableHandlers::PostCall(info); + __aicore__ inline static void PostCall(const DfxFuncInfo& info) { + DfxConfig::EnabledHandlers::PostCall(info); } -); +}; + } + #endif \ No newline at end of file -- Gitee From ec25883b7e7dd8e7890bc105ab284955fd92b756 Mon Sep 17 00:00:00 2001 From: jiangchengcheng-on Date: Thu, 19 Sep 2024 10:52:52 +0000 Subject: [PATCH 06/17] del test case Signed-off-by: jiangchengcheng-on --- tests/tiling/test_tiling.cpp | 41 ------------------------------------ 1 file changed, 41 deletions(-) diff --git a/tests/tiling/test_tiling.cpp b/tests/tiling/test_tiling.cpp index b0e98872..93745e82 100644 --- a/tests/tiling/test_tiling.cpp +++ b/tests/tiling/test_tiling.cpp @@ -79,47 +79,6 @@ TEST_F(TestTiling, PlatformConstructor) EXPECT_EQ(ret, 0); } -TEST_F(TestTiling, TestMatmulApiTilingL0DB) -{ - matmul_tiling::PlatformInfo plat {.socVersion = platform_ascendc::SocVersion::ASCEND910B, .l1Size = 524288, - .l0CSize = 131072, .ubSize = 196608, .l0ASize = 65536, .l0BSize = 65536}; - MatmulApiTiling tiling(plat); - tiling.SetAType(TPosition::TSCM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT16); - tiling.SetBType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT16); - tiling.SetCType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT); - tiling.SetBiasType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT); - tiling.SetShape(2048, 20480, 16); - tiling.SetOrgShape(2048, 20480, 16); - tiling.SetBias(false); - tiling.SetBufferSpace(-1, -1, -1, -1); - tiling.SetMatmulConfigParams({1, false, ScheduleType::OUTER_PRODUCT, MatrixTraverse::FIRSTM}); - optiling::TCubeTiling tilingData; - int ret = tiling.GetTiling(tilingData); - tiling.PrintTilingData(); - EXPECT_EQ(ret, 0); -} - -TEST_F(TestTiling, TestMatmulApiTilingL0DBError) -{ - matmul_tiling::PlatformInfo plat {.socVersion = platform_ascendc::SocVersion::ASCEND910B, .l1Size = 524288, - .l0CSize = 131072, .ubSize = 196608, .l0ASize = 65536, .l0BSize = 65536}; - MatmulApiTiling tiling(plat); - tiling.SetAType(TPosition::TSCM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT16); - tiling.SetBType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT16); - tiling.SetCType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT); - tiling.SetBiasType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT); - tiling.SetShape(16, 16, 2048); - tiling.SetOrgShape(16, 16, 2048); - tiling.SetFixSplit(16, 16, -1); - tiling.SetBias(false); - tiling.SetBufferSpace(-1, -1, -1, -1); - tiling.SetMatmulConfigParams({1, false, ScheduleType::OUTER_PRODUCT, MatrixTraverse::FIRSTN}); - optiling::TCubeTiling tilingData; - int ret = tiling.GetTiling(tilingData); - tiling.PrintTilingData(); - EXPECT_EQ(ret, -1); -} - TEST_F(TestTiling, TestInt4BaseK) { matmul_tiling::PlatformInfo plat {.socVersion = platform_ascendc::SocVersion::ASCEND910B, .l1Size = 524288, -- Gitee From 090c6cedcf2fe3179d6b6c58e819c65d961627f9 Mon Sep 17 00:00:00 2001 From: jiangchengcheng-on Date: Thu, 19 Sep 2024 11:16:01 +0000 Subject: [PATCH 07/17] REVERT Signed-off-by: jiangchengcheng-on --- tests/tiling/test_tiling.cpp | 2086 +--------------------------------- 1 file changed, 32 insertions(+), 2054 deletions(-) diff --git a/tests/tiling/test_tiling.cpp b/tests/tiling/test_tiling.cpp index 93745e82..61144c1b 100644 --- a/tests/tiling/test_tiling.cpp +++ b/tests/tiling/test_tiling.cpp @@ -38,12 +38,11 @@ TEST_F(TestTiling, MultiCoreSmallMN) rnnMatmul3.SetBType(matmul_tiling::TPosition::GM, matmul_tiling::CubeFormat::ND, matmul_tiling::DataType ::DT_FLOAT); rnnMatmul3.SetCType(matmul_tiling::TPosition::GM, matmul_tiling::CubeFormat::NZ, matmul_tiling::DataType ::DT_FLOAT); rnnMatmul3.SetBiasType(matmul_tiling::TPosition::VECCALC, matmul_tiling::CubeFormat::ND, matmul_tiling::DataType ::DT_FLOAT); - rnnMatmul3.SetSingleRange(-1,-1,-1,-1,-1,-1); auto ret = rnnMatmul3.SetBias(true); ret = rnnMatmul3.SetDim(24); ret = rnnMatmul3.SetOrgShape(5, 40, 986); ret = rnnMatmul3.SetShape(5, 10, 986); - ret = rnnMatmul3.SetBufferSpace(-1, -1, -1); // will use all buffer space if not explicitly specified + ret = rnnMatmul3.SetBufferSpace(); // will use all buffer space if not explicitly specified optiling::TCubeTiling tilingData; ret = rnnMatmul3.GetTiling(tilingData); rnnMatmul3.PrintTilingData(); @@ -75,7 +74,6 @@ TEST_F(TestTiling, PlatformConstructor) optiling::TCubeTiling tilingData; int ret = tiling.GetTiling(tilingData); tiling.PrintTilingData(); - tiling.PrintTilingDataInfo(tilingData); EXPECT_EQ(ret, 0); } @@ -173,8 +171,6 @@ TEST_F(TestTiling, Tiling_BatchMatmul) bmm.biasType_.pos = TPosition::TSCM; retParam = bmm.CheckSetParam(); EXPECT_EQ(retParam, false); - - EXPECT_EQ(bmm.SetSingleBatch(2, 2), 0); } TEST_F(TestTiling, ATscmCase) @@ -800,105 +796,6 @@ TEST_F(TestTiling, TestSetBufferSpace) EXPECT_EQ(tiling.bufferPool_.l1Size, 1024); } -TEST_F(TestTiling, TestCosTilingFloat) -{ - std::vector shapeDims = { 128, 128 }; - auto cosShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - AscendC::GetCosMaxMinTmpSize(cosShape, 4, false, maxValue, minValue); - EXPECT_EQ(maxValue, 128 * 128 * 4 * 3); - AscendC::GetCosMaxMinTmpSize(cosShape, 4, true, maxValue, minValue); - EXPECT_EQ(maxValue, 128 * 128 * 4 * 2); - uint32_t maxLiveNodeCnt = 0; - uint32_t extraBuf = 0; - GetCosTmpBufferFactorSize(4, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 3); - EXPECT_EQ(extraBuf, 0); -} - -TEST_F(TestTiling, TestCosTilingFloat512) -{ - std::vector shapeDims = { 512 }; - auto cosShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - AscendC::GetCosMaxMinTmpSize(cosShape, 4, false, maxValue, minValue); - EXPECT_EQ(minValue, 256 * 3); - AscendC::GetCosMaxMinTmpSize(cosShape, 4, true, maxValue, minValue); - EXPECT_EQ(minValue, 256 * 2); -} - -TEST_F(TestTiling, TestCosTilingHalf) -{ - std::vector shapeDims = { 128, 128 }; - auto cosShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - AscendC::GetCosMaxMinTmpSize(cosShape, 2, false, maxValue, minValue); - EXPECT_EQ(maxValue, 128 * 128 * 8 * 2); - EXPECT_EQ(minValue, 256 * 8); - uint32_t maxLiveNodeCnt = 0; - uint32_t extraBuf = 0; - GetCosTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 8); - EXPECT_EQ(extraBuf, 0); -} - -TEST_F(TestTiling, TestAtanTilingFloat) -{ - std::vector shapeDims = { 128, 128 }; - auto atanShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetAtanMaxMinTmpSize(atanShape, 4, false, maxValue, minValue); - EXPECT_EQ(maxValue, 128 * 128 * 4 * 5); - EXPECT_EQ(minValue, 256 * 5); -} - -TEST_F(TestTiling, TestAtanTilingHalf) -{ - std::vector shapeDims = { 128, 128 }; - auto atanShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetAtanMaxMinTmpSize(atanShape, 2, false, maxValue, minValue); - EXPECT_EQ(minValue, 256 * 12); - EXPECT_EQ(maxValue, 128 * 128 * 2 * 12); - uint32_t maxLiveNodeCnt = 0; - uint32_t extraBuf = 0; - GetAtanTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 12); - EXPECT_EQ(extraBuf, 0); -} - -TEST_F(TestTiling, TestClampTilingFloat) -{ - std::vector shapeDims = { 128, 128 }; - auto atanShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetClampMaxMinTmpSize(atanShape, 4, false, maxValue, minValue); - EXPECT_EQ(maxValue, 128 * 128 * 1); - EXPECT_EQ(minValue, 64 * 1); -} - -TEST_F(TestTiling, TestClampTilingHalf) -{ - std::vector shapeDims = { 128, 128 }; - auto atanShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetClampMaxMinTmpSize(atanShape, 2, false, maxValue, minValue); - EXPECT_EQ(minValue, 128 * 1); - EXPECT_EQ(maxValue, 128 * 128 * 1); - uint32_t maxLiveNodeCnt = 0; - uint32_t extraBuf = 0; - GetClampTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 1); - EXPECT_EQ(extraBuf, 0); -} - TEST_F(TestTiling, TestSoftMaxTiling) { std::vector shapeDims = { 128, 128 }; @@ -1012,6 +909,7 @@ TEST_F(TestTiling, TestSoftMaxFlashV2Tiling) SoftMaxFlashV2TilingFunc(softmaxShape, inputTypeSize, maxSumTypeSize, workLength, tilingData, true, true); EXPECT_EQ(tilingData.get_reduceM(), 64); } + TEST_F(TestTiling, TestSoftMaxFlashV2TilingBasicBlock) { std::vector shapeDims = { 8, 1024 }; @@ -1040,163 +938,6 @@ TEST_F(TestTiling, TestSoftMaxFlashV2TilingBasicBlock) EXPECT_EQ(tilingData.get_reduceM(), 8); } -TEST_F(TestTiling, TestAsinTmpBufferFacotrHalfWithoutBasicBlock) { - uint32_t maxLivedNodes = 0xffff; - uint32_t extraBuffer = 0xffff; - GetAsinTmpBufferFactorSize(2, maxLivedNodes, extraBuffer); - EXPECT_EQ(maxLivedNodes, 6); - EXPECT_EQ(extraBuffer, 0); -} - -TEST_F(TestTiling, TestAsinTmpBufferFacotrFloatWithoutBasicBlock) { - uint32_t maxLivedNodes = 0xffff; - uint32_t extraBuffer = 0xffff; - GetAsinTmpBufferFactorSize(4, maxLivedNodes, extraBuffer); - EXPECT_EQ(maxLivedNodes, 2); - EXPECT_EQ(extraBuffer, 0); -} - -TEST_F(TestTiling, TestAsinTilingHalf128) -{ - std::vector shapeDims = { 128 }; - auto asinShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetAsinMaxMinTmpSize(asinShape, 2, false, maxValue, minValue); - EXPECT_EQ(maxValue, 256 * 6); - EXPECT_EQ(minValue, 256 * 6); -} - -TEST_F(TestTiling, TestAsinTilingFloat) -{ - std::vector shapeDims = { 32 }; - auto asinShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetAsinMaxMinTmpSize(asinShape, 4, false, maxValue, minValue); - EXPECT_EQ(maxValue, 256 * 2); - EXPECT_EQ(minValue, 256 * 2); -} - -TEST_F(TestTiling, TestAsinTilingHalf16K) -{ - std::vector shapeDims = { 128, 128 }; - auto asinShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetAsinMaxMinTmpSize(asinShape, 2, false, maxValue, minValue); - EXPECT_EQ(maxValue, 128 * 128 * 6 * 2); - EXPECT_EQ(minValue, 256 * 6); -} - -TEST_F(TestTiling, TestAsinTilingFloat16K) -{ - std::vector shapeDims = { 128, 128 }; - auto asinShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetAsinMaxMinTmpSize(asinShape, 4, false, maxValue, minValue); - EXPECT_EQ(maxValue, 128 * 128 * 2 * 4); - EXPECT_EQ(minValue, 256 * 2); -} - -TEST_F(TestTiling, TestSinhTilingFloat) -{ - std::vector shapeDims = { 128, 128 }; - auto sinhShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetSinhMaxMinTmpSize(sinhShape, 4, true, maxValue, minValue); - EXPECT_EQ(minValue, 256 * 1); - EXPECT_EQ(maxValue, 128 * 128 * 1 * 4); - - uint32_t maxLiveNodeCnt; - uint32_t extraBuf; - GetSinhTmpBufferFactorSize(4, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 1); - EXPECT_EQ(extraBuf, 0); -} - -TEST_F(TestTiling, TestSinhTilingHalf) -{ - std::vector shapeDims = { 128, 128 }; - auto sinhShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetSinhMaxMinTmpSize(sinhShape, 2, true, maxValue, minValue); - EXPECT_EQ(minValue, 256 * 4); - EXPECT_EQ(maxValue, 128 * 128 * 4 * 2); - - uint32_t maxLiveNodeCnt; - uint32_t extraBuf; - GetSinhTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 4); - EXPECT_EQ(extraBuf, 0); -} - -TEST_F(TestTiling, TestRoundTiling) -{ - fe::PlatFormInfos platform_info; - auto plat = platform_ascendc::PlatformAscendC(&platform_info); - std::vector shapeDims = { 128, 128 }; - auto tanShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - uint32_t maxLiveNodeCnt = 0; - uint32_t extraBuf = 0; - platform_ascendc::SocVersion socVersion = plat.GetSocVersion(); - GetRoundMaxMinTmpSize(plat, tanShape, 4, false, maxValue, minValue); - GetRoundTmpBufferFactorSize(plat, 4, maxLiveNodeCnt, extraBuf); - GetRoundMaxMinTmpSize(plat, tanShape, 2, false, maxValue, minValue); - GetRoundTmpBufferFactorSize(plat, 2, maxLiveNodeCnt, extraBuf); -} - -TEST_F(TestTiling, TestTanTilingFloat) -{ - std::vector shapeDims = { 128, 128 }; - auto tanShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetTanMaxMinTmpSize(tanShape, 4, false, maxValue, minValue); - EXPECT_EQ(maxValue, 128 * 128 * 4 * 4); - uint32_t maxLiveNodeCnt; - uint32_t extraBuf; - GetTanTmpBufferFactorSize(4, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 4); - EXPECT_EQ(extraBuf, 0); -} - -TEST_F(TestTiling, TestTanTilingFloat512) -{ - std::vector shapeDims = { 512 }; - auto tanShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetTanMaxMinTmpSize(tanShape, 4, false, maxValue, minValue); - EXPECT_EQ(minValue, 256 * 4); - uint32_t maxLiveNodeCnt; - uint32_t extraBuf; - GetTanTmpBufferFactorSize(4, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 4); - EXPECT_EQ(extraBuf, 0); -} - -TEST_F(TestTiling, TestTanTilingHalf) -{ - std::vector shapeDims = { 128, 128 }; - auto tanShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetTanMaxMinTmpSize(tanShape, 2, false, maxValue, minValue); - EXPECT_EQ(maxValue, 128 * 128 * 10 * 2); - EXPECT_EQ(minValue, 256 * 10); - uint32_t maxLiveNodeCnt; - uint32_t extraBuf; - GetTanTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 10); - EXPECT_EQ(extraBuf, 0); -} - TEST_F(TestTiling, TEstSwiGLUTilingHalf) { std::vector shapeDims = {10, 512}; @@ -1252,132 +993,6 @@ TEST_F(TestTiling, TestSwiGLUFactorHalf) EXPECT_EQ(extraBuf, 0); } -TEST_F(TestTiling, TestFmodTilingFloat) -{ - std::vector shapeDims = { 128, 128 }; - auto fmodShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetFmodMaxMinTmpSize(fmodShape, 4, false, maxValue, minValue); - EXPECT_EQ(minValue, 256); - EXPECT_EQ(maxValue, 128 * 128 * 1 * 4); -} - -TEST_F(TestTiling, TestFmodTilingHalf) -{ - std::vector shapeDims = { 128, 128 }; - auto fmodShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetFmodMaxMinTmpSize(fmodShape, 2, false, maxValue, minValue); - EXPECT_EQ(minValue, 128 * 128 * 3 * 4); - EXPECT_EQ(maxValue, 128 * 128 * 3 * 4); -} - -TEST_F(TestTiling, TestTruncTilingFloat) -{ - std::vector shapeDims = { 128, 128 }; - auto truncShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetTruncMaxMinTmpSize(truncShape, 4, false, maxValue, minValue); - EXPECT_EQ(minValue, 256 * 1); - EXPECT_EQ(maxValue, 128 * 128 * 1 * 4); - - uint32_t maxLiveNodeCnt; - uint32_t extraBuf; - GetTruncTmpBufferFactorSize(4, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 1); - EXPECT_EQ(extraBuf, 0); -} - -TEST_F(TestTiling, TestTruncTilingHalf) -{ - std::vector shapeDims = { 128, 128 }; - auto truncShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetTruncMaxMinTmpSize(truncShape, 2, false, maxValue, minValue); - EXPECT_EQ(minValue, 256 * 2); - EXPECT_EQ(maxValue, 128 * 128 * 2 * 2); - - uint32_t maxLiveNodeCnt; - uint32_t extraBuf; - GetTruncTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 2); - EXPECT_EQ(extraBuf, 0); -} - -TEST_F(TestTiling, TestTruncTilingHalf512) -{ - std::vector shapeDims = { 512 }; - auto truncShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetTruncMaxMinTmpSize(truncShape, 2, false, maxValue, minValue); - EXPECT_EQ(maxValue, 512 * 2 * 2); - EXPECT_EQ(minValue, 256 * 2); -} - -TEST_F(TestTiling, TestAcosTmpBufferFacotrHalfWithoutBasicBlock) { - uint32_t maxLivedNodes = 0xffff; - uint32_t extraBuffer = 0xffff; - GetAcosTmpBufferFactorSize(2, maxLivedNodes, extraBuffer); - EXPECT_EQ(maxLivedNodes, 6); - EXPECT_EQ(extraBuffer, 0); -} - - -TEST_F(TestTiling, TestAcosTmpBufferFacotrFloatWithoutBasicBlock) { - uint32_t maxLivedNodes = 0xffff; - uint32_t extraBuffer = 0xffff; - GetAcosTmpBufferFactorSize(4, maxLivedNodes, extraBuffer); - EXPECT_EQ(maxLivedNodes, 2); - EXPECT_EQ(extraBuffer, 0); -} - -TEST_F(TestTiling, TestAcosTilingHalf128) -{ - std::vector shapeDims = { 128 }; - auto acosShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetAcosMaxMinTmpSize(acosShape, 2, false, maxValue, minValue); - EXPECT_EQ(minValue, 256 * 6); - EXPECT_EQ(maxValue, 256 * 6); -} - -TEST_F(TestTiling, TestAcosTilingFloat) -{ - std::vector shapeDims = { 32 }; - auto acosShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetAcosMaxMinTmpSize(acosShape, 4, false, maxValue, minValue); - EXPECT_EQ(minValue, 256 * 2); - EXPECT_EQ(maxValue, 256 * 2); -} - -TEST_F(TestTiling, TestTanhTiling) -{ - uint32_t maxVal = 0; - uint32_t minVal = 0; - GetTanhMaxMinTmpSize(ge::Shape({128}), 4, false, maxVal, minVal); - EXPECT_EQ(maxVal, 128 * 4 * 1); - EXPECT_EQ(minVal, 256 * 1); - GetTanhMaxMinTmpSize(ge::Shape({32}), 2, false, maxVal, minVal); - EXPECT_EQ(maxVal, 256 * 4); - EXPECT_EQ(minVal, 256 * 4); - uint32_t extraBuf = 123; - uint32_t maxLivedNodesCnt = 123; - GetTanhTmpBufferFactorSize(4, maxLivedNodesCnt, extraBuf); - EXPECT_EQ(extraBuf, 0); - EXPECT_EQ(maxLivedNodesCnt, 1); - GetTanhTmpBufferFactorSize(2, maxLivedNodesCnt, extraBuf); - EXPECT_EQ(extraBuf, 0); - EXPECT_EQ(maxLivedNodesCnt, 4); -} - TEST_F(TestTiling, TestSigmoidTiling) { std::vector shapeDims = { 128 }; @@ -1389,288 +1004,39 @@ TEST_F(TestTiling, TestSigmoidTiling) EXPECT_EQ(minVal, 256); } -TEST_F(TestTiling, TestLogTilingMaxMin) -{ - std::vector shapeDims = { 128 }; - auto logShape = ge::Shape(shapeDims); - uint32_t maxVal; - uint32_t minVal; - GetLogMaxMinTmpSize(logShape, 4, false, maxVal, minVal); - EXPECT_EQ(maxVal, 0); - EXPECT_EQ(minVal, 0); - GetLog2MaxMinTmpSize(logShape, 4, false, maxVal, minVal); - EXPECT_EQ(maxVal, 0); - EXPECT_EQ(minVal, 0); - GetLog2MaxMinTmpSize(logShape, 2, false, maxVal, minVal); - EXPECT_EQ(maxVal, 4 * 128); - EXPECT_EQ(minVal, 256); - GetLog10MaxMinTmpSize(logShape, 4, false, maxVal, minVal); - EXPECT_EQ(maxVal, 0); - EXPECT_EQ(minVal, 0); -} - -TEST_F(TestTiling, TestLogTilingFactor) +TEST_F(TestTiling, TestLayernormTiling) { - uint32_t maxLiveNodeCnt; - uint32_t extraBuf; - GetLogTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 0); - EXPECT_EQ(extraBuf, 0); - GetLog10TmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 0); - EXPECT_EQ(extraBuf, 0); - GetLog2TmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 2); - EXPECT_EQ(extraBuf, 0); - GetLog2TmpBufferFactorSize(4, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 0); - EXPECT_EQ(extraBuf, 0); -} + const uint32_t stackBufferSize = 100 * 1024; + const uint32_t typeSize = 4; -TEST_F(TestTiling, TestAcosTilingHalf16K) -{ - std::vector shapeDims = { 128, 128 }; - auto acosShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetAcosMaxMinTmpSize(acosShape, 2, false, maxValue, minValue); - EXPECT_EQ(maxValue, 128 * 128 * 6 * 2); - EXPECT_EQ(minValue, 256 * 6); -} + std::vector shapeDims = { 128, 128, 128, 128, 128, 128 }; + auto layernormShape = ge::Shape(shapeDims); + const bool isReuseSource = false; + optiling::LayerNormTiling tilling; -TEST_F(TestTiling, TestAcosTilingFloat16K) -{ - std::vector shapeDims = { 128, 128 }; - auto acosShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; uint32_t minValue = 0; - GetAcosMaxMinTmpSize(acosShape, 4, false, maxValue, minValue); - EXPECT_EQ(maxValue, 128 * 128 * 2 * 4); - EXPECT_EQ(minValue, 256 * 2); -} - -TEST_F(TestTiling, TestAsinhTilingFloat) -{ - std::vector shapeDims = { 128, 128 }; - auto asinhShape = ge::Shape(shapeDims); uint32_t maxValue = 0; - uint32_t minValue = 0; - AscendC::GetAsinhMaxMinTmpSize(asinhShape, 4, true, maxValue, minValue); - EXPECT_EQ(minValue, 256 * 3); - EXPECT_EQ(maxValue, 128 * 128 * 3 * 4); - AscendC::GetAsinhMaxMinTmpSize(ge::Shape({32}), 4, true, maxValue, minValue); - EXPECT_EQ(minValue, 256 * 3); - EXPECT_EQ(maxValue, 256 * 3); + AscendC::GetLayerNormMaxMinTmpSize(layernormShape, typeSize, isReuseSource, maxValue, minValue); + EXPECT_EQ(maxValue, 3 * (128 * 128 * 128) * typeSize + 2 * (128 * 128) * typeSize); + EXPECT_EQ(minValue, 3 * 128 * typeSize + 2 * (128 * 128) * typeSize); - uint32_t maxLiveNodeCnt; - uint32_t extraBuf; - AscendC::GetAsinhTmpBufferFactorSize(4, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 3); - EXPECT_EQ(extraBuf, 0); + AscendC::GetLayerNormNDTillingInfo(layernormShape, stackBufferSize, typeSize, isReuseSource, tilling); + EXPECT_EQ(tilling.get_tmpBufSize(), stackBufferSize / sizeof(float)); } -TEST_F(TestTiling, TestAsinhTilingHalf) +TEST_F(TestTiling, TestRmsnormTiling) { - std::vector shapeDims = { 128, 128 }; - auto asinhShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - AscendC::GetAsinhMaxMinTmpSize(asinhShape, 2, true, maxValue, minValue); - EXPECT_EQ(minValue, 256 * 3); - EXPECT_EQ(maxValue, 128 * 128 * 3 * 2); - - uint32_t maxLiveNodeCnt; - uint32_t extraBuf; - AscendC::GetAsinhTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 3); - EXPECT_EQ(extraBuf, 0); -} - -TEST_F(TestTiling, TestAcoshTilingHalf) -{ - std::vector shapeDims = { 128, 128 }; - auto acoshShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - AscendC::GetAcoshMaxMinTmpSize(acoshShape, 2, true, maxValue, minValue); - EXPECT_EQ(minValue, 256 * 2); - EXPECT_EQ(maxValue, 128 * 128 * 2 * 2); - - uint32_t maxLiveNodeCnt; - uint32_t extraBuf; - GetAcoshTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 2); - EXPECT_EQ(extraBuf, 0); -} - -TEST_F(TestTiling, TestAcoshTilingFloat) -{ - std::vector shapeDims = { 128, 128 }; - auto acoshShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - AscendC::GetAcoshMaxMinTmpSize(acoshShape, 4, true, maxValue, minValue); - EXPECT_EQ(minValue, 256 * 1); - EXPECT_EQ(maxValue, 128 * 128 * 1 * 4); - - uint32_t maxLiveNodeCnt; - uint32_t extraBuf; - AscendC::GetAcoshTmpBufferFactorSize(4, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 1); - EXPECT_EQ(extraBuf, 0); -} - -TEST_F(TestTiling, TestSelectWithBytesMaskTilingSameAxis) -{ - const auto shape = ge::Shape({ 8, 128 }); - const auto scalarShape = ge::Shape({1}); - uint32_t maxValue; - uint32_t minValue; - GetSelectWithBytesMaskMaxMinTmpSize(shape, scalarShape, 2, shape, 1, true, maxValue, minValue); - EXPECT_EQ(minValue, 128 * 8 * 2 + 512); - EXPECT_EQ(maxValue, 128 * 8 * 2 + 512); - GetSelectWithBytesMaskMaxMinTmpSize(scalarShape, shape, 2, shape, 1, false, maxValue, minValue); - EXPECT_EQ(minValue, 128 * 8 * 2 + 512); - EXPECT_EQ(maxValue, 128 * 8 * 2 + 512); -} - -TEST_F(TestTiling, TestSelectWithBytesMaskTilingSameAxisLargeShape) -{ - const auto shape = ge::Shape({ 128, 128 }); - const auto scalarShape = ge::Shape({1}); - uint32_t maxValue; - uint32_t minValue; - GetSelectWithBytesMaskMaxMinTmpSize(shape, scalarShape, 2, shape, 1, true, maxValue, minValue); - EXPECT_EQ(minValue, 4096 * 2 + 512); - EXPECT_EQ(maxValue, 128 * 128 * 2 + 512); - GetSelectWithBytesMaskMaxMinTmpSize(scalarShape, shape, 2, shape, 1, false, maxValue, minValue); - EXPECT_EQ(minValue, 4096 * 2 + 512); - EXPECT_EQ(maxValue, 128 * 128 * 2 + 512); -} - -TEST_F(TestTiling, TestSelectWithBytesMaskTilingSameAxisSmallShape) -{ - const auto shape = ge::Shape({ 1, 16 }); - const auto scalarShape = ge::Shape({1}); - uint32_t maxValue; - uint32_t minValue; - GetSelectWithBytesMaskMaxMinTmpSize(shape, scalarShape, 2, shape, 1, true, maxValue, minValue); - EXPECT_EQ(minValue, 1024); - EXPECT_EQ(maxValue, 1024); - GetSelectWithBytesMaskMaxMinTmpSize(scalarShape, shape, 2, shape, 1, false, maxValue, minValue); - EXPECT_EQ(minValue, 1024); - EXPECT_EQ(maxValue, 1024); -} - -TEST_F(TestTiling, TestSelectWithBytesMaskTilingDiffAxis) -{ - const auto srcShape = ge::Shape({ 8, 128 }); - const auto scalarShape = ge::Shape({1}); - const auto maskShape = ge::Shape({ 8, 160 }); - uint32_t maxValue; - uint32_t minValue; - GetSelectWithBytesMaskMaxMinTmpSize(srcShape, scalarShape, 2, maskShape, 1, true, maxValue, minValue); - EXPECT_EQ(minValue, 128 * 8 * 2 + 512); - EXPECT_EQ(maxValue, 128 * 8 * 2 + 512); - GetSelectWithBytesMaskMaxMinTmpSize(srcShape, scalarShape, 2, maskShape, 1, false, maxValue, minValue); - EXPECT_EQ(minValue, 128 * 8 * 2 + 512 + 8 * 128); - EXPECT_EQ(maxValue, 128 * 8 * 2 + 512 + 8 * 128); - GetSelectWithBytesMaskMaxMinTmpSize(scalarShape, srcShape, 2, maskShape, 1, true, maxValue, minValue); - EXPECT_EQ(minValue, 128 * 8 * 2 + 512); - EXPECT_EQ(maxValue, 128 * 8 * 2 + 512); -} - -TEST_F(TestTiling, TestSelectWithBytesMaskTilingDiffAxisLargeShape) -{ - const auto srcShape = ge::Shape({ 128, 128 }); - const auto scalarShape = ge::Shape({1}); - const auto maskShape = ge::Shape({ 128, 160 }); - uint32_t maxValue; - uint32_t minValue; - GetSelectWithBytesMaskMaxMinTmpSize(srcShape, scalarShape, 2, maskShape, 1, true, maxValue, minValue); - EXPECT_EQ(minValue, 4096 * 2 + 512); - EXPECT_EQ(maxValue, 128 * 128 * 2 + 512); - GetSelectWithBytesMaskMaxMinTmpSize(srcShape, scalarShape, 2, maskShape, 1, false, maxValue, minValue); - EXPECT_EQ(minValue, 4096 * 2 + 512 + 128 * 128); - EXPECT_EQ(maxValue, 128 * 128 * 2 + 512 + 128 * 128); - GetSelectWithBytesMaskMaxMinTmpSize(scalarShape, srcShape, 2, maskShape, 1, true, maxValue, minValue); - EXPECT_EQ(minValue, 4096 * 2 + 512); - EXPECT_EQ(maxValue, 128 * 128 * 2 + 512); -} - -TEST_F(TestTiling, TestSelectWithBytesMaskTilingDiffAxisSmallShape) -{ - const auto srcShape = ge::Shape({ 1, 16 }); - const auto scalarShape = ge::Shape({1}); - const auto maskShape = ge::Shape({ 1, 32 }); - uint32_t maxValue; - uint32_t minValue; - GetSelectWithBytesMaskMaxMinTmpSize(srcShape, scalarShape, 2, maskShape, 1, true, maxValue, minValue); - EXPECT_EQ(minValue, 1024); - EXPECT_EQ(maxValue, 1024); - GetSelectWithBytesMaskMaxMinTmpSize(srcShape, scalarShape, 2, maskShape, 1, false, maxValue, minValue); - EXPECT_EQ(minValue, 1024 + 32); - EXPECT_EQ(maxValue, 1024 + 32); - GetSelectWithBytesMaskMaxMinTmpSize(scalarShape, srcShape, 2, maskShape, 1, true, maxValue, minValue); - EXPECT_EQ(minValue, 1024); - EXPECT_EQ(maxValue, 1024); -} - -TEST_F(TestTiling, TestLayernormTiling) -{ - const uint32_t stackBufferSize = 100 * 1024; - const uint32_t typeSize = 4; - - std::vector shapeDims = { 128, 128, 128, 128, 128, 128 }; - auto layernormShape = ge::Shape(shapeDims); - const bool isReuseSource = false; - optiling::LayerNormTiling tilling; - - uint32_t minValue = 0; - uint32_t maxValue = 0; - - AscendC::GetLayerNormMaxMinTmpSize(layernormShape, typeSize, isReuseSource, maxValue, minValue); - EXPECT_EQ(maxValue, 3 * (128 * 128 * 128) * typeSize + 2 * (128 * 128) * typeSize); - EXPECT_EQ(minValue, 3 * 128 * typeSize + 2 * (128 * 128) * typeSize); - - AscendC::GetLayerNormNDTillingInfo(layernormShape, stackBufferSize, typeSize, isReuseSource, tilling); - EXPECT_EQ(tilling.get_tmpBufSize(), stackBufferSize / sizeof(float)); -} - -TEST_F(TestTiling, TestGroupnormTiling) -{ - const uint32_t stackBufferSize = 100 * 1024; - const uint32_t typeSize = 4; - const uint32_t groupNum = 4; - - std::vector shapeDims = { 16, 16, 8, 8}; - auto groupnormShape = ge::Shape(shapeDims); - const bool isReuseSource = false; - optiling::GroupNormTiling tilling; - - uint32_t minValue = 0; - uint32_t maxValue = 0; - - AscendC::GetGroupNormMaxMinTmpSize(groupnormShape, typeSize, isReuseSource, groupNum, maxValue, minValue); - EXPECT_EQ(maxValue, 3 * (16 * 16 * 8 * 8) * typeSize + 2 * groupNum * 16 * typeSize); - EXPECT_EQ(minValue, 3 * (16 / 4 * 8 * 8) * typeSize + 2 * groupNum * 16 * typeSize); - - AscendC::GetGroupNormNDTilingInfo(groupnormShape, stackBufferSize, typeSize, isReuseSource, groupNum, tilling); - EXPECT_EQ(tilling.get_tmpBufSize(), stackBufferSize / sizeof(float)); -} -TEST_F(TestTiling, TestRmsnormTiling) -{ - constexpr uint32_t bLength = 4; - constexpr uint32_t sLength = 32; - constexpr uint32_t hLength = 16; - constexpr uint32_t bsLength = bLength * sLength; - constexpr uint32_t bshLength = bLength * sLength * hLength; - std::vector shapeDims = {bLength, sLength, hLength}; - auto shape = ge::Shape(shapeDims); - constexpr uint32_t typeSize = 4; - constexpr uint32_t ONE_BLK_FLOAT = 8; - + constexpr uint32_t bLength = 4; + constexpr uint32_t sLength = 32; + constexpr uint32_t hLength = 16; + constexpr uint32_t bsLength = bLength * sLength; + constexpr uint32_t bshLength = bLength * sLength * hLength; + std::vector shapeDims = {bLength, sLength, hLength}; + auto shape = ge::Shape(shapeDims); + constexpr uint32_t typeSize = 4; + constexpr uint32_t ONE_BLK_FLOAT = 8; + uint32_t maxValue = 0; uint32_t minValue = 0; // common scene @@ -1732,8 +1098,8 @@ TEST_F(TestTiling, TestRmsnormTiling) auto shape2 = ge::Shape({1,8,128}); res = AscendC::GetRmsNormTilingInfo(shape2, shape2, stackBufferSize, typeSize, tiling, true); EXPECT_EQ(res, true); - EXPECT_EQ(tiling.get_mainBshLength(), 896); - EXPECT_EQ(tiling.get_mainBsLength(), 7); + EXPECT_EQ(tiling.get_mainBshLength(), 128); + EXPECT_EQ(tiling.get_mainBsLength(), 1); stackBufferSize = (8*128 + 8)*4; // shape: 1,8,128 res = AscendC::GetRmsNormTilingInfo(shape2, shape2, stackBufferSize, typeSize, tiling, true); @@ -1761,30 +1127,12 @@ TEST_F(TestTiling, TestRmsnormTiling) res = AscendC::GetRmsNormTilingInfo(shape2, shape3, stackBufferSize, typeSize, tiling); EXPECT_EQ(res, false); + // abnormal case: basic block doesnot support h >= 2048 stackBufferSize = 16*2048*4; auto shape4 = ge::Shape({1,8,2048}); res = AscendC::GetRmsNormTilingInfo(shape4, shape4, stackBufferSize, typeSize, tiling, true); EXPECT_EQ(res, false); - - stackBufferSize = 2048; - shape4 = ge::Shape({14,1,56}); - res = AscendC::GetRmsNormTilingInfo(shape4, shape4, stackBufferSize, typeSize, tiling); - EXPECT_EQ(res, true); - EXPECT_EQ(tiling.get_mainBshLength(), 448); - EXPECT_EQ(tiling.get_mainBsLength(), 8); - EXPECT_EQ(tiling.get_tailBshLength(), 336); - EXPECT_EQ(tiling.get_tailBsLength(), 6); - EXPECT_EQ(tiling.get_loopRound(), 1); - - stackBufferSize = 2080; - res = AscendC::GetRmsNormTilingInfo(shape4, shape4, stackBufferSize, typeSize, tiling); - EXPECT_EQ(res, true); - EXPECT_EQ(tiling.get_mainBshLength(),504); - EXPECT_EQ(tiling.get_mainBsLength(), 9); - EXPECT_EQ(tiling.get_tailBshLength(), 280); - EXPECT_EQ(tiling.get_tailBsLength(), 5); - EXPECT_EQ(tiling.get_loopRound(), 1); } TEST_F(TestTiling, TestBatchnormTiling) @@ -1957,39 +1305,6 @@ TEST_F(TestTiling, TestDeepnormTiling) EXPECT_EQ(tiling.get_oneTmpSize(), 512); } -TEST_F(TestTiling, TestExpTiling) -{ - std::vector shapeDims = {128, 128}; - auto expShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - - // float isReuseSrc = false 3 tmpBuffer - AscendC::GetExpMaxMinTmpSize(expShape, 4, false, maxValue, minValue); - EXPECT_EQ(minValue, 3 * 256); - EXPECT_EQ(maxValue, 3 * 128 * 128 * 4); - // float isReuseSrc = true 2 tmpBuffer - AscendC::GetExpMaxMinTmpSize(expShape, 4, true, maxValue, minValue); - EXPECT_EQ(minValue, 2 * 256); - EXPECT_EQ(maxValue, 2 * 128 * 128 * 4); - // half 4 tmpBuffer - AscendC::GetExpMaxMinTmpSize(expShape, 2, false, maxValue, minValue); - EXPECT_EQ(minValue, 4 * 256); - EXPECT_EQ(maxValue, 4 * 128 * 128 * 4); - AscendC::GetExpMaxMinTmpSize(expShape, 2, true, maxValue, minValue); - EXPECT_EQ(minValue, 4 * 256); - EXPECT_EQ(maxValue, 4 * 128 * 128 * 4); - - uint32_t maxLiveNodeCnt; - uint32_t extraBuf; - AscendC::GetExpTmpBufferFactorSize(4, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 3); - EXPECT_EQ(extraBuf, 0); - AscendC::GetExpTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 8); - EXPECT_EQ(extraBuf, 0); -} - TEST_F(TestTiling, TestMatmulApiTilngFactorSplit1) { MatmulApiTiling tiling; @@ -2331,7 +1646,6 @@ TEST_F(TestTiling, TestMatmulApiTilngMultiCoreBTSCM4) EXPECT_EQ(res, 0); } - TEST_F(TestTiling, TestMatmulApiTilngSingleCoreFullLoadCase) { optiling::TCubeTiling tilingData; @@ -2373,59 +1687,6 @@ TEST_F(TestTiling, TestMatmulApiTilngMultiCoreBTSCM5) EXPECT_EQ(res, 0); } -TEST_F(TestTiling, TestConcatTiling) -{ - fe::PlatFormInfos platform_info; - auto plat = platform_ascendc::PlatformAscendC(&platform_info); - const uint32_t elemCount = 128; - - AscendC::GetConcatTmpSize(plat, elemCount, 2); -} - -TEST_F(TestTiling, TestSortTiling) -{ - fe::PlatFormInfos platform_info; - auto plat = platform_ascendc::PlatformAscendC(&platform_info); - const uint32_t elemCount = 128; - - AscendC::GetSortTmpSize(plat, elemCount, 4); -} - -TEST_F(TestTiling, TestUnPadTiling) -{ - const uint32_t stackBufferSize = 100 * 1024; - const uint32_t typeSize = 4; - - std::vector shapeDims = { 32, 32 }; - auto srcShape = ge::Shape(shapeDims); - optiling::UnPadTiling tiling; - - AscendC::UnPadTilingFunc(srcShape, 0, typeSize, tiling); - AscendC::UnPadTilingFunc(srcShape, stackBufferSize, typeSize, tiling); - fe::PlatFormInfos platform_info; - auto plat = platform_ascendc::PlatformAscendC(&platform_info); - uint32_t maxValue = 0; - uint32_t minValue = 0; - AscendC::GetUnPadMaxMinTmpSize(plat, srcShape, typeSize, maxValue, minValue); -} - -TEST_F(TestTiling, TestPadTiling) -{ - const uint32_t stackBufferSize = 100 * 1024; - const uint32_t typeSize = 4; - - std::vector shapeDims = { 32, 32}; - std::vector ori_shape_dims = { 32, 31 }; - auto srcShape = ge::Shape(shapeDims); - auto oriSrcShape = ge::Shape(ori_shape_dims); - optiling::PadTiling tiling; - - AscendC::PadTilingFunc(srcShape, oriSrcShape, stackBufferSize, typeSize, tiling); - uint32_t maxValue = 0; - uint32_t minValue = 0; - AscendC::GetPadMaxMinTmpSize(srcShape, typeSize, maxValue, minValue); -} - TEST_F(TestTiling, TestLayernormGradTiling) { const uint32_t stackBufferSize = 100 * 1024; @@ -2477,23 +1738,6 @@ TEST_F(TestTiling, TestLayernormGradBetaTiling) EXPECT_EQ(tiling.get_stackBufferSize(), stackBufferSize / sizeof(float)); } -TEST_F(TestTiling, TestConfusionTransposeTiling) -{ - const uint32_t stackBufferSize = 100 * 1024; - const uint32_t typeSize = 2; - - std::vector shapeDims = { 1, 2, 64, 32 }; - auto srcShape = ge::Shape(shapeDims); - optiling::ConfusionTransposeTiling tiling; - AscendC::GetConfusionTransposeTilingInfo(srcShape, stackBufferSize, typeSize, 1, tiling); - AscendC::GetConfusionTransposeTilingInfo(srcShape, stackBufferSize, typeSize, 2, tiling); - AscendC::GetConfusionTransposeTilingInfo(srcShape, stackBufferSize, typeSize, 3, tiling); - AscendC::GetConfusionTransposeTilingInfo(srcShape, stackBufferSize, typeSize, 4, tiling); - AscendC::GetConfusionTransposeTilingInfo(srcShape, stackBufferSize, typeSize, 5, tiling); - AscendC::GetConfusionTransposeTilingInfo(srcShape, stackBufferSize, typeSize, 6, tiling); - AscendC::GetConfusionTransposeTilingInfo(srcShape, stackBufferSize, typeSize, 7, tiling); -} - TEST_F(TestTiling, TestMatmulApiTilngL0BNoDB) { MatmulApiTiling tiling; @@ -2620,7 +1864,7 @@ TEST_P(RnnTilingbTestSuite, TestMatmulApiTilngRnnRealCase) matmul_tiling::DataType ::DT_FLOAT); rnnMatmul2.SetBiasType(matmul_tiling::TPosition::VECCALC, matmul_tiling::CubeFormat::ND, (matmul_tiling::DataType)dataType); - // full loaded + // full load auto ret = rnnMatmul.SetBias(true); ret = rnnMatmul.SetDim(rnnParams.sysAivCoreNum / 4); int32_t input_align = MathUtil::CeilDivision(rnnParams.inputSize, 16) * 16; @@ -2678,7 +1922,7 @@ TEST_P(RnnTilingbTestSuite, TestMatmulApiTilngRnnRealCase) sizeof(float); EXPECT_LT(l1UsedSize, 512 * 1024 - 64); rnnParams.usedCoreNum = dim * 4; - } else { // part of full loaded + } else { // part of full load // two matmul time sharing auto ret = rnnMatmul.SetBias(true); ret = rnnMatmul.SetDim(rnnParams.sysAivCoreNum / 4); @@ -2774,7 +2018,7 @@ TEST_P(RnnTilingbTestSuite, TestMatmulApiTilngRnnRealCase) rnnParams.baseN = rnnMatmul.GetBaseN(); rnnParams.baseK = rnnMatmul.GetBaseK(); // get output info ret = rnnMatmul.GetSingleShape(rnnParams.singleM, rnnParams.singleN, - rnnParams.singleK); // get single core data + rnnParams.singleK); // get single core data ret = rnnMatmul.GetCoreNum(dim, mDim, nDim); // get used blockdim after multi-cores cut, carried by user to kernel, contrl Kernel business // input mm @@ -2807,6 +2051,7 @@ TEST_P(RnnTilingbTestSuite, TestMatmulApiTilngRnnRealCase) tilingData.hiddenMMParam.get_depthB1()) * sizeof(float); EXPECT_LT(l1UsedSize, 512 * 1024 - 64); + // mm basic property rnnParams.usedCoreNum = dim; } } @@ -2829,129 +2074,6 @@ TEST_F(TestTiling, TestMatmulApiTilngSetShapeZero) EXPECT_EQ(ret, -1); } -// #if __CCE_AICORE__ == 200 -// TEST_F(TestTiling, TestPlatformAscendC) -// { -// fe::PlatFormInfos platform_info; -// auto plat = platform_ascendc::PlatformAscendC(&platform_info); -// EXPECT_EQ(plat.GetCoreNumVector(), 8); -// EXPECT_EQ(plat.GetCoreNumVector() + plat.GetCoreNumAic() , 18); -// } -// #endif - -// #if __CCE_AICORE__ == 220 -// extern void platfrom_stub_set_num_aic(const char *num); -// extern void platfrom_stub_set_num_aiv(const char *num); -// extern void platfrom_stub_set_num_cub(const char *num); -// extern void platfrom_stub_set_ctl(const char *num); -// extern void platfrom_stub_set_chip_version(const char *num); -// extern void platfrom_stub_set_num(uint32_t num); -// TEST_F(TestTiling, TestPlatformAscendC) -// { -// fe::PlatFormInfos platform_info; -// auto plat = platform_ascendc::PlatformAscendC(&platform_info); -// uint64_t ub_size, l1_size, l0; -// uint64_t l2_bw, hbm_bw, bw; -// plat.GetCoreMemSize(platform_ascendc::CoreMemType::UB, ub_size); -// plat.GetCoreMemSize(platform_ascendc::CoreMemType::L1, l1_size); -// EXPECT_EQ(ub_size, 196352); -// EXPECT_EQ(l1_size, 524032); -// plat.GetCoreMemSize(platform_ascendc::CoreMemType::L0_A, l0); -// EXPECT_EQ(l0, 65536); -// plat.GetCoreMemSize(platform_ascendc::CoreMemType::L0_B, l0); -// EXPECT_EQ(l0, 65536); -// plat.GetCoreMemSize(platform_ascendc::CoreMemType::L0_C, l0); -// EXPECT_EQ(l0, 65536 * 2); -// plat.GetCoreMemBw(platform_ascendc::CoreMemType::L2, l2_bw); -// plat.GetCoreMemBw(platform_ascendc::CoreMemType::HBM, hbm_bw); -// EXPECT_EQ(l2_bw, 110); -// EXPECT_EQ(hbm_bw, 32); -// plat.GetCoreMemBw(platform_ascendc::CoreMemType::UB, bw); -// EXPECT_EQ(plat.GetCoreNum(), 48); -// EXPECT_EQ(plat.GetCoreNumAic(), 24); -// EXPECT_EQ(plat.GetCoreNumAiv(), 48); -// platfrom_stub_set_num_cub("20"); -// EXPECT_EQ(plat.GetCoreNumAic(), 20); -// platfrom_stub_set_num_aiv("40"); -// EXPECT_EQ(plat.GetCoreNumAiv(), 40); -// platfrom_stub_set_ctl("AICore"); -// EXPECT_EQ(plat.GetCoreNumAic(), 24); -// EXPECT_EQ(plat.GetCoreNumAiv(), 24); -// platfrom_stub_set_num_aic("20"); -// EXPECT_EQ(plat.GetCoreNumAic(), 20); -// EXPECT_EQ(plat.GetCoreNumAiv(), 20); -// EXPECT_EQ(bw, 0); -// EXPECT_EQ(plat.CalcTschBlockDim(1, 0, 1), 1); -// EXPECT_EQ(plat.CalcTschBlockDim(1, 1, 0), 1); -// EXPECT_EQ(plat.CalcTschBlockDim(2, 1, 1), 2); -// EXPECT_EQ(plat.CalcTschBlockDim(2, 2, 1), 2); -// EXPECT_EQ(plat.CalcTschBlockDim(2, 1, 2), 1); -// EXPECT_EQ(plat.CalcTschBlockDim(3, 1, 2), 2); -// EXPECT_EQ(plat.CalcTschBlockDim(6, 1, 3), 2); - -// EXPECT_EQ(plat.GetLibApiWorkSpaceSize(), 16 * 1024 * 1024); -// platfrom_stub_set_chip_version("Ascend910"); -// EXPECT_EQ(plat.GetLibApiWorkSpaceSize(), 2 * 1024 * 1024); -// EXPECT_EQ(plat.GetSocVersion(), platform_ascendc::SocVersion::ASCEND910); -// EXPECT_EQ(plat.GetCoreNumVector(), 0); -// } -// #endif - -// #if __CCE_AICORE__ == 300 -// extern void platfrom_stub_set_num_aic(const char *num); -// extern void platfrom_stub_set_num_aiv(const char *num); -// extern void platfrom_stub_set_num_cub(const char *num); -// extern void platfrom_stub_set_ctl(const char *num); -// extern void platfrom_stub_set_chip_version(const char *num); -// extern void platfrom_stub_set_num(uint32_t num); -// TEST_F(TestTiling, TestPlatformAscendC) -// { -// fe::PlatFormInfos platform_info; -// auto plat = platform_ascendc::PlatformAscendC(&platform_info); -// uint64_t ub_size, l1_size, l0; -// uint64_t l2_bw, hbm_bw, bw; -// plat.GetCoreMemSize(platform_ascendc::CoreMemType::UB, ub_size); -// plat.GetCoreMemSize(platform_ascendc::CoreMemType::L1, l1_size); -// EXPECT_EQ(ub_size, 248 * 1024); -// EXPECT_EQ(l1_size, 1024 * 1024); -// plat.GetCoreMemSize(platform_ascendc::CoreMemType::L0_A, l0); -// EXPECT_EQ(l0, 65536); -// plat.GetCoreMemSize(platform_ascendc::CoreMemType::L0_B, l0); -// EXPECT_EQ(l0, 65536); -// plat.GetCoreMemSize(platform_ascendc::CoreMemType::L0_C, l0); -// EXPECT_EQ(l0, 65536 * 2); -// plat.GetCoreMemBw(platform_ascendc::CoreMemType::L2, l2_bw); -// plat.GetCoreMemBw(platform_ascendc::CoreMemType::HBM, hbm_bw); -// EXPECT_EQ(l2_bw, 256); -// EXPECT_EQ(hbm_bw, 17); -// plat.GetCoreMemBw(platform_ascendc::CoreMemType::UB, bw); -// EXPECT_EQ(plat.GetCoreNum(), 1); -// EXPECT_EQ(plat.GetCoreNumAic(), 1); -// EXPECT_EQ(plat.GetCoreNumAiv(), 1); -// platfrom_stub_set_num_cub("1"); -// EXPECT_EQ(plat.GetCoreNumAic(), 1); -// platfrom_stub_set_num_aiv("1"); -// EXPECT_EQ(plat.GetCoreNumAiv(), 1); -// platfrom_stub_set_ctl("AICore"); -// EXPECT_EQ(plat.GetCoreNumAic(), 1); -// EXPECT_EQ(plat.GetCoreNumAiv(), 1); -// platfrom_stub_set_num_aic("2"); -// EXPECT_EQ(plat.GetCoreNumAic(), 2); -// EXPECT_EQ(plat.GetCoreNumAiv(), 2); -// EXPECT_EQ(bw, 0); -// EXPECT_EQ(plat.CalcTschBlockDim(1, 0, 1), 1); -// EXPECT_EQ(plat.CalcTschBlockDim(1, 1, 0), 1); -// EXPECT_EQ(plat.CalcTschBlockDim(2, 1, 1), 2); -// EXPECT_EQ(plat.CalcTschBlockDim(2, 2, 1), 2); -// EXPECT_EQ(plat.CalcTschBlockDim(2, 1, 2), 1); -// EXPECT_EQ(plat.CalcTschBlockDim(3, 1, 2), 2); -// EXPECT_EQ(plat.CalcTschBlockDim(6, 1, 3), 2); - -// EXPECT_EQ(plat.GetLibApiWorkSpaceSize(), 2097152); -// EXPECT_EQ(plat.GetCoreNumVector(), 0); -// } -// #endif - TEST_F(TestTiling, TestMatmulApiTilngInt8Case1) { MatmulApiTiling tiling; @@ -3230,60 +2352,6 @@ TEST_F(TestTiling, TestMatmulApiTilngInt8Case9) EXPECT_EQ(ret, 0); } -TEST_F(TestTiling, TestErfTilingFloat) -{ - std::vector shapeDims = { 128, 128 }; - auto erfShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetErfMaxMinTmpSize(erfShape, 4, false, maxValue, minValue); - EXPECT_EQ(maxValue, 128 * 128 * 3 * 4); - EXPECT_EQ(minValue, 256 * 3); -} - -TEST_F(TestTiling, TestErfTilingHalf) -{ - std::vector shapeDims = { 128, 128 }; - auto erfShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetErfMaxMinTmpSize(erfShape, 2, false, maxValue, minValue); - EXPECT_EQ(maxValue, 128 * 128 * 2 * 8); - EXPECT_EQ(minValue, 256 * 8); - uint32_t maxLiveNodeCnt; - uint32_t extraBuf; - GetErfTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 8); - EXPECT_EQ(extraBuf, 0); -} - -TEST_F(TestTiling, TestErfcTilingFloat) -{ - std::vector shapeDims = { 128, 128 }; - auto erfcShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetErfcMaxMinTmpSize(erfcShape, 4, false, maxValue, minValue); - EXPECT_EQ(maxValue, 128 * 128 * 7 * 4); - EXPECT_EQ(minValue, 256 * 7); -} - -TEST_F(TestTiling, TestErfcTilingHalf) -{ - std::vector shapeDims = { 128, 128 }; - auto erfcShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetErfcMaxMinTmpSize(erfcShape, 2, false, maxValue, minValue); - EXPECT_EQ(maxValue, 128 * 128 * 2 * 16); - EXPECT_EQ(minValue, 256 * 16); - uint32_t maxLiveNodeCnt; - uint32_t extraBuf; - GetErfcTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 16); - EXPECT_EQ(extraBuf, 0); -} - TEST_F(TestTiling, TestMatmulApiTilngInt8Case10) { MultiCoreMatmulTiling tiling; @@ -3362,95 +2430,6 @@ TEST_F(TestTiling, TestMatmulApiTilngInt8Case13) EXPECT_EQ(ret, 0); } -TEST_F(TestTiling, TestCoshTilingFloat) -{ - std::vector shapeDims = { 128, 128 }; - auto coshShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetCoshMaxMinTmpSize(coshShape, 4, false, maxValue, minValue); - EXPECT_EQ(maxValue, 128 * 128 * 2 * 4); - EXPECT_EQ(minValue, 256 * 2); -} - -TEST_F(TestTiling, TestCoshTilingFloat512) -{ - std::vector shapeDims = { 512 }; - auto coshShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetCoshMaxMinTmpSize(coshShape, 4, false, maxValue, minValue); - EXPECT_EQ(maxValue, 512 * 4 * 2); - EXPECT_EQ(minValue, 256 * 2); -} - -TEST_F(TestTiling, TestCoshTilingHalf) -{ - std::vector shapeDims = { 128, 128 }; - auto coshShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetCoshMaxMinTmpSize(coshShape, 2, false, maxValue, minValue); - EXPECT_EQ(maxValue, 128 * 128 * 2 * 6); - EXPECT_EQ(minValue, 256 * 6); - uint32_t maxLiveNodeCnt; - uint32_t extraBuf; - GetCoshTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 6); - EXPECT_EQ(extraBuf, 0); -} - -TEST_F(TestTiling, TestSinTilingFloat) -{ - std::vector shapeDims = { 128, 128 }; - auto sinShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetSinMaxMinTmpSize(sinShape, 4, true, maxValue, minValue); - EXPECT_EQ(minValue, 2 * 256); - EXPECT_EQ(maxValue, 128 * 128 * 2 * 4); - GetSinMaxMinTmpSize(sinShape, 4, false, maxValue, minValue); - EXPECT_EQ(minValue, 3 * 256); - EXPECT_EQ(maxValue, 128 * 128 * 3 * 4); - uint32_t maxLiveNodeCnt; - uint32_t extraBuf; - GetSinTmpBufferFactorSize(4, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 3); - EXPECT_EQ(extraBuf, 0); -} - -TEST_F(TestTiling, TestSinTilingHalf) -{ - std::vector shapeDims = { 128, 128 }; - auto sinShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetSinMaxMinTmpSize(sinShape, 2, false, maxValue, minValue); - EXPECT_EQ(maxValue, 128 * 128 * 8 * 2); - EXPECT_EQ(minValue, 8 * 256); - uint32_t maxLiveNodeCnt; - uint32_t extraBuf; - GetSinTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 8); - EXPECT_EQ(extraBuf, 0); -} - -TEST_F(TestTiling, TestAscendSumTiling) -{ - uint32_t n = 8; - uint32_t maxValue; - uint32_t minValue; - GetSumMaxMinTmpSize(n, 2, true, maxValue, minValue); - EXPECT_EQ(minValue, 32); - EXPECT_EQ(maxValue, 32); - - maxValue = 0; - minValue = 0; - GetSumMaxMinTmpSize(n, 4, false, maxValue, minValue); - EXPECT_EQ(minValue, 32); - EXPECT_EQ(maxValue, 32); -} - TEST_F(TestTiling, TestAscendSiluTiling) { std::vector shapeDims = { 512 }; @@ -3473,54 +2452,6 @@ TEST_F(TestTiling, TestAscendSwishTiling) EXPECT_EQ(maxValue, 0); } -TEST_F(TestTiling, TestAscendXorTiling) -{ - std::vector shapeDims = { 128, 128 }; - auto xorShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - uint32_t maxLiveNodeCnt; - uint32_t extraBuf; - GetXorMaxMinTmpSize(xorShape, 2, true, maxValue, minValue); - EXPECT_EQ(maxValue, 128 * 128 * 1 * 2); - EXPECT_EQ(minValue, 1 * 256); - GetXorTmpBufferFactorSize(4, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 1); - EXPECT_EQ(extraBuf, 0); -} - -TEST_F(TestTiling, TestFracTilingFloat) -{ - std::vector shapeDims = { 128, 128 }; - auto fracShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetFracMaxMinTmpSize(fracShape, 4, true, maxValue, minValue); - EXPECT_EQ(minValue, 0); - EXPECT_EQ(maxValue, 0); - uint32_t maxLiveNodeCnt; - uint32_t extraBuf; - GetFracTmpBufferFactorSize(4, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 0); - EXPECT_EQ(extraBuf, 0); -} - -TEST_F(TestTiling, TestFracTilingHalf) -{ - std::vector shapeDims = { 128, 128 }; - auto fracShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetFracMaxMinTmpSize(fracShape, 2, true, maxValue, minValue); - EXPECT_EQ(minValue, 1024); - EXPECT_EQ(maxValue, 131072); - uint32_t maxLiveNodeCnt; - uint32_t extraBuf; - GetFracTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 4); - EXPECT_EQ(extraBuf, 0); -} - #if __CCE_AICORE__ == 220 extern void platfrom_stub_set_chip_version(const char *num); TEST_F(TestTiling, TestTopkTiling_TopKModeNomal_isInitIndexTrue_Float_Inner64) @@ -4003,15 +2934,6 @@ TEST_F(TestTiling, TestTopkTiling_TopKModeSmall310P_HALF) } #endif -TEST_F(TestTiling, TestArithProgression) -{ - uint32_t maxValue; - uint32_t minValue; - GetArithProgressionMaxMinTmpSize(maxValue, minValue); - EXPECT_EQ(maxValue, 0); - EXPECT_EQ(minValue, 0); -} - TEST_F(TestTiling, TestGeGLUTilingFloat) { std::vector shapeDims = { 128, 128 }; @@ -4046,566 +2968,6 @@ TEST_F(TestTiling, TestGeGLUTilingHalf) EXPECT_EQ(extraBuf, 0); } -TEST_F(TestTiling, TestLgammaTilingFp32) -{ - std::vector shapeDims = { 128, 128 }; - auto shape = ge::Shape(shapeDims); - uint32_t maxSize; - uint32_t minSize; - GetLgammaMaxMinTmpSize(shape, 4, true, maxSize, minSize); - EXPECT_EQ(maxSize, 458752); - EXPECT_EQ(minSize, 1792); - - GetLgammaMaxMinTmpSize(shape, 4, false, maxSize, minSize); - EXPECT_EQ(maxSize, 524288); - EXPECT_EQ(minSize, 2048); - - shapeDims = { 8 }; - shape = ge::Shape(shapeDims); - GetLgammaMaxMinTmpSize(shape, 4, false, maxSize, minSize); - EXPECT_EQ(maxSize, 2048); - EXPECT_EQ(minSize, 2048); - - GetLgammaMaxMinTmpSize(shape, 4, true,maxSize, minSize); - EXPECT_EQ(maxSize, 1792); - EXPECT_EQ(minSize, 1792); - - uint32_t maxLiveNodeCnt = 0xffff; - uint32_t extraBuf = 0xffff; - GetLgammaTmpBufferFactorSize(4, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 8); - EXPECT_EQ(extraBuf, 0); -} - -TEST_F(TestTiling, TestLgammaTilingHalf) -{ - std::vector shapeDims = { 128, 128 }; - auto shape = ge::Shape(shapeDims); - uint32_t maxSize; - uint32_t minSize; - - GetLgammaMaxMinTmpSize(shape, 2, false, maxSize, minSize); - EXPECT_EQ(maxSize, 128 * 128 * 2 * 13 * 2); - EXPECT_EQ(minSize, 13 * 2 * 256); - - shapeDims = { 8 }; - shape = ge::Shape(shapeDims); - GetLgammaMaxMinTmpSize(shape, 2, false, maxSize, minSize); - EXPECT_EQ(maxSize, 256 * 13 * 2); - EXPECT_EQ(minSize, 256 * 13 * 2); - - uint32_t maxLiveNodeCnt = 0xffff; - uint32_t extraBuf = 0xffff; - GetLgammaTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 13); - EXPECT_EQ(extraBuf, 0); -} - -TEST_F(TestTiling, TestDigammaTilingFp32) -{ - std::vector shapeDims = { 128, 128 }; - auto shape = ge::Shape(shapeDims); - uint32_t maxSize; - uint32_t minSize; - GetDigammaMaxMinTmpSize(shape, 4, true, maxSize, minSize); - EXPECT_EQ(maxSize, 393216); - EXPECT_EQ(minSize, 1536); - - GetDigammaMaxMinTmpSize(shape, 4, false, maxSize, minSize); - EXPECT_EQ(maxSize, 458752); - EXPECT_EQ(minSize, 1792); - - shapeDims = { 8 }; - shape = ge::Shape(shapeDims); - GetDigammaMaxMinTmpSize(shape, 4, false, maxSize, minSize); - EXPECT_EQ(maxSize, 1792); - EXPECT_EQ(minSize, 1792); - - GetDigammaMaxMinTmpSize(shape, 4, true,maxSize, minSize); - EXPECT_EQ(maxSize, 1536); - EXPECT_EQ(minSize, 1536); - - uint32_t maxLiveNodeCnt = 0xffff; - uint32_t extraBuf = 0xffff; - GetDigammaTmpBufferFactorSize(4, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 7); - EXPECT_EQ(extraBuf, 0); -} - -TEST_F(TestTiling, TestDigammaTilingHalf) -{ - std::vector shapeDims = { 128, 128 }; - auto shape = ge::Shape(shapeDims); - uint32_t maxSize; - uint32_t minSize; - - GetDigammaMaxMinTmpSize(shape, 2, false, maxSize, minSize); - EXPECT_EQ(maxSize, 128 * 128 * 2 * 8 * 2); - EXPECT_EQ(minSize, 8 * 2 * 256); - - shapeDims = { 8 }; - shape = ge::Shape(shapeDims); - GetDigammaMaxMinTmpSize(shape, 2, false, maxSize, minSize); - EXPECT_EQ(maxSize, 256 * 8 * 2); - EXPECT_EQ(minSize, 256 * 8 * 2); - - uint32_t maxLiveNodeCnt = 0xffff; - uint32_t extraBuf = 0xffff; - GetDigammaTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 16); - EXPECT_EQ(extraBuf, 0); -} - -TEST_F(TestTiling, TestAtanhTilingFloat) -{ - std::vector shapeDims = { 128, 128 }; - auto aTanhShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - AscendC::GetAtanhMaxMinTmpSize(aTanhShape, 4, true, maxValue, minValue); - EXPECT_EQ(minValue, 256 * 1); - EXPECT_EQ(maxValue, 128 * 128 * 4 * 1); - - uint32_t maxLiveNodeCnt; - uint32_t extraBuf; - AscendC::GetAtanhTmpBufferFactorSize(4, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 1); - EXPECT_EQ(extraBuf, 0); -} - -TEST_F(TestTiling, TestAtanhTilingHalf) -{ - std::vector shapeDims = { 128, 128 }; - auto aTanhShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - AscendC::GetAtanhMaxMinTmpSize(aTanhShape, 2, true, maxValue, minValue); - EXPECT_EQ(minValue, 256 * 4); - EXPECT_EQ(maxValue, 128 * 128 * 2 * 4); - - uint32_t maxLiveNodeCnt; - uint32_t extraBuf; - AscendC::GetAtanhTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 4); - EXPECT_EQ(extraBuf, 0); -} - -TEST_F(TestTiling, TestSignTiling) -{ - std::vector shapeDims = { 128, 128 }; - auto signShape = ge::Shape(shapeDims); - uint32_t signNeedMaxSize; - uint32_t signNeedMinSize; - uint32_t maxLiveNodeCnt; - uint32_t extraBuf; - GetSignMaxMinTmpSize(signShape, 2, false, signNeedMaxSize, signNeedMinSize); - EXPECT_EQ(signNeedMaxSize, 3 * 128 * 128 * 2); - EXPECT_EQ(signNeedMinSize, 3 * 256); - - GetSignMaxMinTmpSize(signShape, 4, false, signNeedMaxSize, signNeedMinSize); - EXPECT_EQ(signNeedMaxSize, 3 * 128 * 128 * 4); - EXPECT_EQ(signNeedMinSize, 3 * 256); - - GetSignTmpBufferFactorSize(4, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 3); - EXPECT_EQ(extraBuf, 0); -} - -TEST_F(TestTiling, TestAscendMeanTiling) -{ - uint32_t n = 8; - uint32_t maxValue; - uint32_t minValue; - uint32_t maxLiveNodeCnt; - uint32_t extraBuf; - - GetMeanMaxMinTmpSize(n, 2, 2, true, maxValue, minValue); - EXPECT_EQ(minValue, 32); - EXPECT_EQ(maxValue, 32); - - maxValue = 0; - minValue = 0; - GetMeanMaxMinTmpSize(n, 4, 4, true, maxValue, minValue); - EXPECT_EQ(minValue, 32); - EXPECT_EQ(maxValue, 32); - - GetMeanMaxMinTmpSize(n, 2, 4, true, maxValue, minValue); - EXPECT_EQ(minValue, 96); - EXPECT_EQ(maxValue, 96); - - GetMeanTmpBufferFactorSize(4, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 1); - EXPECT_EQ(extraBuf, 0); -} - -// TEST_F(TestTiling, TestKernelContextBuildBase) -// { -// auto builder = context_ascendc::BuildKernelRunContext(2, 2); -// EXPECT_EQ(builder.kernelInputNum, 2); -// } - - -// TEST_F(TestTiling, TestKernelContextBuild) -// { -// gert::Shape input1_shape = {2, 1, 1, 1, 1, 1, 1, 2, 2}; -// int32_t input1_tensor_buffer[] = {0, 2, 3, 3, 1, 0, 0, 1}; -// gert::TensorData input1_tensor_data{(void*)input1_tensor_buffer, nullptr}; -// gert::Shape output_shape = {5, 3}; -// int64_t output_tensor_buffer[15]; -// gert::TensorData output_tensor_data{(void*)output_tensor_buffer, nullptr}; -// auto kernelHolder = -// context_ascendc::KernelRunContextBuilder() -// .KernelIONum(2, 2) -// .Inputs({reinterpret_cast(&input1_shape), -// reinterpret_cast(&input1_tensor_data)}) -// .Outputs({reinterpret_cast(&output_shape), reinterpret_cast(&output_tensor_data)}) -// .NodeIoNum(1, 1) -// .IrInputNum(1) -// .NodeInputTd(0, ge::DT_INT32, ge::FORMAT_ND, ge::FORMAT_ND) -// .NodeOutputTd(0, ge::DT_INT64, ge::FORMAT_ND, ge::FORMAT_ND) -// .Build(); -// auto context = kernelHolder.GetContext(); -// EXPECT_NE(context, nullptr); -// } - -// TEST_F(TestTiling, TestTilingContextBuildWithConstValue) -// { -// string active_type = "gelu"; -// gert::StorageShape x_shape = {{1024, 5120}, {1024, 5120}}; -// gert::StorageShape expert_tokens_shape = {{16}, {16}}; -// gert::StorageShape weight1_shape = {{16, 5120, 0}, {16, 5120, 0}}; -// gert::StorageShape bias1_shape = {{16, 0}, {16, 0}}; -// gert::StorageShape weight2_shape = {{16, 0, 5120}, {16, 0, 5120}}; -// gert::StorageShape bias2_shape = {{16, 5120}, {16, 5120}}; - -// gert::StorageShape output_shape = {{1024, 5210}, {1024, 5210}}; - -// std::vector expert_tokens_const_value (16, 1); -// std::vector x_const_value (1024 * 5120, 2.f); -// std::vector bias2_value (16 * 5120, 3.f); -// auto param = gert::TilingData::CreateCap(4096); -// auto workspace_size_holer = gert::ContinuousVector::Create(4096); -// auto ws_size = reinterpret_cast(workspace_size_holer.get()); -// auto holder = context_ascendc::TilingContextBuilder() -// .SetOpNameType("name", "tpye") -// .NodeIoNum(6, 1) -// .IrInstanceNum({1, 1, 1, 1, 1, 1}) -// .AddInputTd(0, ge::DT_FLOAT16, ge::FORMAT_ND, ge::FORMAT_ND, &x_shape, reinterpret_cast(x_const_value.data())) -// .AddInputTd(1, ge::DT_FLOAT16, ge::FORMAT_ND, ge::FORMAT_ND, &weight1_shape) -// .AddInputTd(2, ge::DT_FLOAT16, ge::FORMAT_ND, ge::FORMAT_ND, &weight2_shape) -// .AddInputTd(3, ge::DT_INT64, ge::FORMAT_ND, ge::FORMAT_ND, &expert_tokens_shape, reinterpret_cast(expert_tokens_const_value.data())) -// .AddInputTd(4, ge::DT_FLOAT16, ge::FORMAT_ND, ge::FORMAT_ND, &bias1_shape) -// .AddInputTd(5, ge::DT_BF16, ge::FORMAT_ND, ge::FORMAT_ND, &bias2_shape, reinterpret_cast(bias2_value.data())) -// .AddOutputTd(0, ge::DT_FLOAT16, ge::FORMAT_ND, ge::FORMAT_ND, &output_shape) -// .AddAttrs({ -// {"activation", ge::AnyValue::CreateFrom(active_type)}, -// {"inner_precise", ge::AnyValue::CreateFrom(1)} -// }) -// .TilingData(param.get()) -// .Workspace(ws_size) -// .Build(); - -// gert::TilingContext* tiling_context = holder.GetContext(); -// EXPECT_NE(tiling_context, nullptr); - -// } - -// TEST_F(TestTiling, TestTilingContextBuildAddInputs) -// { -// string active_type = "gelu"; -// gert::StorageShape x_shape = {{1024, 5120}, {1024, 5120}}; -// std::vector inputs; -// std::vector outputs; -// context_ascendc::TensorInfo input; -// input.shape = x_shape; -// input.dType = ge::DT_FLOAT16; -// input.oriFormat = ge::FORMAT_ND; -// input.format = ge::FORMAT_ND; -// input.dataPath = "1111"; -// inputs.push_back(input); -// context_ascendc::TensorInfo output; -// output.shape = x_shape; -// output.dType = ge::DT_FLOAT16; -// output.oriFormat = ge::FORMAT_ND; -// output.format = ge::FORMAT_ND; -// output.dataPath = "222"; -// outputs.push_back(output); - -// auto param = gert::TilingData::CreateCap(4096); -// auto workspace_size_holer = gert::ContinuousVector::Create(4096); -// auto ws_size = reinterpret_cast(workspace_size_holer.get()); -// auto holder = context_ascendc::TilingContextBuilder() -// .SetOpNameType("name", "tpye") -// .NodeIoNum(1, 1) -// .IrInstanceNum({1}) -// .AddInputs(inputs) -// .AddOutputs(outputs) -// .AddAttrs({ -// {"activation", ge::AnyValue::CreateFrom(active_type)}, -// {"inner_precise", ge::AnyValue::CreateFrom(1)} -// }) -// .TilingData(param.get()) -// .Workspace(ws_size) -// .Build(); - -// gert::TilingContext* tiling_context = holder.GetContext(); -// EXPECT_NE(tiling_context, nullptr); -// } - -// TEST_F(TestTiling, TestTilingContextBuildFailed) -// { -// string active_type = "gelu"; -// gert::StorageShape x_shape = {{-1, 5120}, {-1, 5120}}; -// std::vector x_const_value (1024 * 5120, 2.f); -// auto param = gert::TilingData::CreateCap(4096); -// auto workspace_size_holer = gert::ContinuousVector::Create(4096); -// auto ws_size = reinterpret_cast(workspace_size_holer.get()); -// auto holder = context_ascendc::TilingContextBuilder() -// .NodeIoNum(1, 1) -// .IrInstanceNum({1, 1}) -// .CompileInfo(nullptr) -// .PlatformInfo(nullptr) -// .AddInputTd(0, ge::DT_FLOAT16, ge::FORMAT_ND, ge::FORMAT_ND, &x_shape, reinterpret_cast(x_const_value.data())) -// .Workspace(ws_size) -// .Build(); - -// gert::TilingContext* tiling_context = holder.GetContext(); -// EXPECT_EQ(tiling_context, nullptr); -// } - -// TEST_F(TestTiling, TestTilingContextBuildWithBinFile) -// { -// string active_type = "gelu"; -// gert::StorageShape x_shape = {{1024, 5120}, {1024, 5120}}; -// gert::StorageShape expert_tokens_shape = {{16}, {16}}; -// gert::StorageShape weight1_shape = {{16, 5120, 0}, {16, 5120, 0}}; -// gert::StorageShape bias1_shape = {{16, 0}, {16, 0}}; -// gert::StorageShape weight2_shape = {{16, 0, 5120}, {16, 0, 5120}}; -// gert::StorageShape bias2_shape = {{16, 5120}, {16, 5120}}; -// gert::StorageShape output_shape = {{1024, 5210}, {1024, 5210}}; - -// std::vector expert_tokens_const_value (16, 1); - -// std::vector x_const_value (1024 * 5120, 2.f); -// std::vector bias2_value (16 * 5120, 3.f); -// auto param = gert::TilingData::CreateCap(4096); -// auto workspace_size_holer = gert::ContinuousVector::Create(4096); -// auto ws_size = reinterpret_cast(workspace_size_holer.get()); -// auto holder = context_ascendc::TilingContextBuilder() -// .SetOpNameType("name", "tpye") -// .NodeIoNum(6, 1) -// .IrInstanceNum({1, 1, 1, 1, 1, 1}) -// .AddInputTd(0, ge::DT_FLOAT16, ge::FORMAT_ND, ge::FORMAT_ND, &x_shape, reinterpret_cast(x_const_value.data())) -// .AddInputTd(1, ge::DT_FLOAT16, ge::FORMAT_ND, ge::FORMAT_ND, &weight1_shape) -// .AddInputTd(2, ge::DT_FLOAT16, ge::FORMAT_ND, ge::FORMAT_ND, &weight2_shape) -// .AddInputTd(3, ge::DT_INT64, ge::FORMAT_ND, ge::FORMAT_ND, &expert_tokens_shape, "./expert_tokens_data.bin") -// .AddInputTd(4, ge::DT_FLOAT16, ge::FORMAT_ND, ge::FORMAT_ND, &bias1_shape) -// .AddInputTd(5, ge::DT_BF16, ge::FORMAT_ND, ge::FORMAT_ND, &bias2_shape, reinterpret_cast(bias2_value.data())) -// .AddOutputTd(0, ge::DT_FLOAT16, ge::FORMAT_ND, ge::FORMAT_ND, &output_shape) -// .AddAttrs({ -// {"activation", ge::AnyValue::CreateFrom(active_type)}, -// {"inner_precise", ge::AnyValue::CreateFrom(1)} -// }) -// .TilingData(param.get()) -// .Workspace(ws_size) -// .Build(); -// gert::TilingContext* tiling_context = holder.GetContext(); -// EXPECT_EQ(tiling_context, nullptr); -// } - -TEST_F(TestTiling, TestAxpyTiling) -{ - uint32_t maxVal = 0; - uint32_t minVal = 0; - GetAxpyMaxMinTmpSize(ge::Shape({128}), 4, false, maxVal, minVal); - EXPECT_EQ(maxVal, 0); - EXPECT_EQ(minVal, 0); - GetAxpyMaxMinTmpSize(ge::Shape({256}), 2, false, maxVal, minVal); - EXPECT_EQ(maxVal, 256 * 4 * 2); - EXPECT_EQ(minVal, 256 * 4); - GetAxpyMaxMinTmpSize(ge::Shape({32}), 2, false, maxVal, minVal); - EXPECT_EQ(maxVal, 256 * 4); - EXPECT_EQ(minVal, 256 * 4); - uint32_t extraBuf = 123; - uint32_t maxLivedNodesCnt = 123; - GetAxpyTmpBufferFactorSize(4, maxLivedNodesCnt, extraBuf); - EXPECT_EQ(extraBuf, 0); - EXPECT_EQ(maxLivedNodesCnt, 1); - GetAxpyTmpBufferFactorSize(2, maxLivedNodesCnt, extraBuf); - EXPECT_EQ(extraBuf, 0); - EXPECT_EQ(maxLivedNodesCnt, 4); -} - -TEST_F(TestTiling, TestCeilTilingFloat) -{ - std::vector shapeDims = { 128, 128 }; - auto ceilShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetCeilMaxMinTmpSize(ceilShape, sizeof(float), false, maxValue, minValue); - EXPECT_EQ(minValue, 256 * 1); - EXPECT_EQ(maxValue, 128 * 128 * 1 * 4); - - uint32_t maxLiveNodeCnt; - uint32_t extraBuf; - GetCeilTmpBufferFactorSize(sizeof(float), maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 1); - EXPECT_EQ(extraBuf, 0); -} - -TEST_F(TestTiling, TestCeilTilingHalf) -{ - std::vector shapeDims = { 128, 128 }; - auto ceilShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetCeilMaxMinTmpSize(ceilShape, 2, false, maxValue, minValue); - EXPECT_EQ(minValue, 256 * 2); - EXPECT_EQ(maxValue, 128 * 128 * 2 * 2); - - uint32_t maxLiveNodeCnt; - uint32_t extraBuf; - GetCeilTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 2); - EXPECT_EQ(extraBuf, 0); -} - -TEST_F(TestTiling, TestCeilTilingHalf512) -{ - std::vector shapeDims = { 512 }; - auto ceilShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetCeilMaxMinTmpSize(ceilShape, 2, false, maxValue, minValue); - EXPECT_EQ(maxValue, 512 * 2 * 2); - EXPECT_EQ(minValue, 256 * 2); -} - -TEST_F(TestTiling, TestFloorTilingFloat) -{ - std::vector shapeDims = { 128, 128 }; - auto floorShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetFloorMaxMinTmpSize(floorShape, sizeof(float), false, maxValue, minValue); - EXPECT_EQ(minValue, 0); - EXPECT_EQ(maxValue, 0); - - uint32_t maxLiveNodeCnt; - uint32_t extraBuf; - GetFloorTmpBufferFactorSize(sizeof(float), maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 0); - EXPECT_EQ(extraBuf, 0); -} - -TEST_F(TestTiling, TestFloorTilingHalf) -{ - std::vector shapeDims = { 128, 128 }; - auto floorShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetFloorMaxMinTmpSize(floorShape, 2, false, maxValue, minValue); - EXPECT_EQ(minValue, 256 * 2); - EXPECT_EQ(maxValue, 128 * 128 * 2 * 2); - - uint32_t maxLiveNodeCnt; - uint32_t extraBuf; - GetFloorTmpBufferFactorSize(2, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 2); - EXPECT_EQ(extraBuf, 0); -} - -TEST_F(TestTiling, TestFloorTilingHalf512) -{ - std::vector shapeDims = { 512 }; - auto floorShape = ge::Shape(shapeDims); - uint32_t maxValue = 0; - uint32_t minValue = 0; - GetFloorMaxMinTmpSize(floorShape, 2, false, maxValue, minValue); - EXPECT_EQ(maxValue, 512 * 2 * 2); - EXPECT_EQ(minValue, 256 * 2); -} - -// TEST_F(TestTiling, TestGetSocVersion) -// { -// fe::PlatFormInfos platform_info; -// auto plat = platform_ascendc::PlatformAscendC(&platform_info); - -// MOCKER_CPP(&fe::PlatFormInfos::GetPlatformResWithLock, -// bool(fe::PlatFormInfos::*)(const std::string &, const std::string &, std::string &)) -// .stubs() -// .will(returnValue(false)); - -// platform_ascendc::SocVersion ret = plat.GetSocVersion(); -// EXPECT_EQ(ret, platform_ascendc::SocVersion::RESERVED_VERSION); -// } - -// TEST_F(TestTiling, TestCoreNum) -// { -// fe::PlatFormInfos platform_info; -// auto plat = platform_ascendc::PlatformAscendC(&platform_info); - -// MOCKER_CPP(&fe::PlatFormInfos::GetPlatformResWithLock, -// bool(fe::PlatFormInfos::*)(const std::string &, const std::string &, std::string &)) -// .stubs() -// .will(returnValue(false)); - -// uint32_t ret1 = plat.GetCoreNumAic(); -// uint32_t ret2 = plat.GetCoreNumAiv(); -// EXPECT_EQ(ret1, 0); -// EXPECT_EQ(ret2, 0); -// } - -// TEST_F(TestTiling, TestGetLibApiWorkSpaceSize) -// { -// fe::PlatFormInfos platform_info; -// auto plat = platform_ascendc::PlatformAscendC(&platform_info); - -// MOCKER_CPP(&fe::PlatFormInfos::GetPlatformResWithLock, -// bool(fe::PlatFormInfos::*)(const std::string &, const std::string &, std::string &)) -// .stubs() -// .will(returnValue(false)); - -// uint32_t ret1 = plat.GetLibApiWorkSpaceSize(); -// EXPECT_EQ(ret1, static_cast(-1)); -// } -// TEST_F(TestTiling, TestPlatformAscendCManager) -// { -// void *handle; -// int a = 7; -// handle = &a; - -// MOCKER_CPP(&fe::PlatFormInfos::GetPlatformResWithLock, -// bool(fe::PlatFormInfos::*)(const std::string &, const std::string &, std::string &)) -// .stubs() -// .will(returnValue(false)); - -// auto ret2 = platform_ascendc::PlatformAscendCManager::GetInstance(); -// } - -// TEST_F(TestTiling, TestGetVectorCoreNum) -// { -// fe::PlatFormInfos platform_info; -// auto plat = platform_ascendc::PlatformAscendC(&platform_info); - -// MOCKER_CPP(&fe::PlatFormInfos::GetPlatformResWithLock, -// bool(fe::PlatFormInfos::*)(const std::string &, const std::string &, std::string &)) -// .stubs() -// .will(returnValue(false)); -// MOCKER_CPP(&platform_ascendc::PlatformAscendC::GetSocVersion, -// platform_ascendc::SocVersion(platform_ascendc::PlatformAscendC::*)(void) const) -// .stubs() -// .will(returnValue(platform_ascendc::SocVersion::ASCEND310P)); - -// uint32_t ret1 = plat.GetCoreNumVector(); -// EXPECT_EQ(ret1, static_cast(0)); -// MOCKER_CPP(&platform_ascendc::PlatformAscendCManager::PlatformAscendCInit) -// .stubs() -// .will(returnValue(platform_info)); -// auto ret2 = platform_ascendc::PlatformAscendCManager::GetInstance(); - -// } - TEST_F(TestTiling, TestReGluFloat16OrBf16) { const std::vector srcShapeDims = { 8, 128 }; @@ -4628,390 +2990,6 @@ TEST_F(TestTiling, TestReGluFloat32) EXPECT_EQ(maxValue, 256); } -#if __CCE_AICORE__ == 220 -extern void platfrom_stub_set_chip_version(const char *num); -TEST_F(TestTiling, TestBroadCast220) -{ - fe::PlatFormInfos platform_info; - auto plat = platform_ascendc::PlatformAscendC(&platform_info); - platfrom_stub_set_chip_version("Ascend910B"); - uint32_t firstDim = 32; - uint32_t lastDim = 32; - std::vector srcShapeDims = {firstDim, 1}; - auto srcShape = ge::Shape(srcShapeDims); - std::vector dstShapeDims = {firstDim, lastDim}; - auto dstShape = ge::Shape(dstShapeDims); - uint32_t maxValue{0}; - uint32_t minValue{0}; - constexpr uint32_t halfSize = 2; - constexpr uint32_t halfOneBlockElementNum = 16; - constexpr uint32_t minHalfAlignSize = halfOneBlockElementNum * halfOneBlockElementNum * halfSize; - constexpr uint32_t BRCB_ONE_SIZE = 8; - uint32_t firstDimAlignNum = (firstDim + BRCB_ONE_SIZE - 1) / BRCB_ONE_SIZE * BRCB_ONE_SIZE; - uint32_t maxHalfAlignSize = firstDimAlignNum * halfOneBlockElementNum * halfSize; - GetBroadCastMaxMinTmpSize(plat, srcShape, dstShape, halfSize, false, maxValue, minValue); - EXPECT_EQ(minValue, minHalfAlignSize); - EXPECT_EQ(maxValue, maxHalfAlignSize); - - srcShapeDims = {firstDim, 1}; - srcShape = ge::Shape(srcShapeDims); - uint32_t lastDimNotAlign = 31; - dstShapeDims = {firstDim, lastDimNotAlign}; - dstShape = ge::Shape(dstShapeDims); - - uint32_t blockDimAlignBlockNum = (lastDimNotAlign + halfOneBlockElementNum - 1) / halfOneBlockElementNum; - uint32_t blockDimAlign = blockDimAlignBlockNum * halfOneBlockElementNum; - uint32_t minCopyTempBufferSize = halfOneBlockElementNum * blockDimAlign * halfSize; - auto minHalfNotAlignSize = minHalfAlignSize + minCopyTempBufferSize; - - uint32_t maxCopyTempBufferSize = firstDim * blockDimAlign * halfSize; - uint32_t maxHalfNotAlignValue = maxHalfAlignSize + maxCopyTempBufferSize; - - GetBroadCastMaxMinTmpSize(plat, srcShape, dstShape, halfSize, false, maxValue, minValue); - EXPECT_EQ(minValue, minHalfNotAlignSize); - EXPECT_EQ(maxValue, maxHalfNotAlignValue); - - constexpr uint32_t int8Size = 1; - srcShapeDims = {firstDim, 1}; - srcShape = ge::Shape(srcShapeDims); - dstShapeDims = {firstDim, lastDim}; - dstShape = ge::Shape(dstShapeDims); - const uint32_t alignSrcSize = - ((firstDim + halfOneBlockElementNum - 1) / halfOneBlockElementNum) * halfOneBlockElementNum; - uint32_t alignDstSize = - ((firstDim * lastDim + halfOneBlockElementNum - 1) / halfOneBlockElementNum) * halfOneBlockElementNum; - uint32_t castTempBufferSize = (alignSrcSize + alignDstSize) * halfSize; - GetBroadCastMaxMinTmpSize(plat, srcShape, dstShape, int8Size, false, maxValue, minValue); - EXPECT_EQ(minValue, minHalfAlignSize + castTempBufferSize); - EXPECT_EQ(maxValue, maxHalfAlignSize + castTempBufferSize); - - srcShapeDims = {firstDim, 1}; - srcShape = ge::Shape(srcShapeDims); - dstShapeDims = {firstDim, lastDimNotAlign}; - dstShape = ge::Shape(dstShapeDims); - alignDstSize = - ((firstDim * lastDimNotAlign + halfOneBlockElementNum - 1) / halfOneBlockElementNum) * halfOneBlockElementNum; - castTempBufferSize = (alignSrcSize + alignDstSize) * halfSize; - GetBroadCastMaxMinTmpSize(plat, srcShape, dstShape, int8Size, false, maxValue, minValue); - EXPECT_EQ(minValue, minHalfNotAlignSize + castTempBufferSize); - EXPECT_EQ(maxValue, maxHalfNotAlignValue + castTempBufferSize); -} - -TEST_F(TestTiling, TestPowerTiling) -{ - platfrom_stub_set_chip_version("Ascend910B"); - auto platformPtr = platform_ascendc::PlatformAscendCManager::GetInstance("Ascend910B"); - std::vector shapeDims = { 512 }; - auto powerShape = ge::Shape(shapeDims); - uint32_t maxVal; - uint32_t minVal; - GetPowerMaxMinTmpSize(powerShape, powerShape, false, 4, false, maxVal, minVal); - EXPECT_EQ(maxVal, 512 * 4 * 4 + 256); - EXPECT_EQ(minVal, 256 * 4 + 256); - GetPowerMaxMinTmpSize(powerShape, powerShape, true, 4, false, maxVal, minVal); - EXPECT_EQ(maxVal, 512 * 4 * 6); - EXPECT_EQ(minVal, 256 * 6); - GetPowerMaxMinTmpSize(powerShape, powerShape, false, 2, false, maxVal, minVal); - EXPECT_EQ(maxVal, 512 * 2 * 14 + 256); - EXPECT_EQ(minVal, 256 * 7 + 256); - std::vector scalar_shape = { 1 }; - auto scalarShape = ge::Shape(scalar_shape); - GetPowerMaxMinTmpSize(powerShape, scalarShape, false, 2, false, maxVal, minVal); - EXPECT_EQ(maxVal, 512 * 2 * 14 + 256); - EXPECT_EQ(minVal, 256 * 7 + 256); - GetPowerMaxMinTmpSize(powerShape, scalarShape, true, 4, false, maxVal, minVal); - EXPECT_EQ(maxVal, 512 * 4 * 7); - EXPECT_EQ(minVal, 256 * 7); - GetPowerMaxMinTmpSize(powerShape, scalarShape, false, 4, false, maxVal, minVal); - EXPECT_EQ(maxVal, 512 * 4 * 5 + 256); - EXPECT_EQ(minVal, 256 * 5 + 256); - - std::vector shape1 = { 16 }; - auto powerShape1 = ge::Shape( shape1 ); - GetPowerMaxMinTmpSize(powerShape1, scalarShape, false, 4, false, maxVal, minVal); - EXPECT_EQ(maxVal, 256 * 5 + 256); - EXPECT_EQ(minVal, 256 * 5 + 256); - GetPowerMaxMinTmpSize(powerShape1, scalarShape, false, 2, false, maxVal, minVal); - EXPECT_EQ(maxVal, 256 * 7 + 256); - EXPECT_EQ(minVal, 256 * 7 + 256); - GetPowerMaxMinTmpSize(powerShape1, scalarShape, true, 4, false, maxVal, minVal); - EXPECT_EQ(maxVal, 256 * 7); - EXPECT_EQ(minVal, 256 * 7); - GetPowerMaxMinTmpSize(powerShape1, powerShape1, false, 4, false, maxVal, minVal); - EXPECT_EQ(maxVal, 256 * 4 + 256); - EXPECT_EQ(minVal, 256 * 4 + 256); - GetPowerMaxMinTmpSize(powerShape1, powerShape1, false, 2, false, maxVal, minVal); - EXPECT_EQ(maxVal, 256 * 7 + 256); - EXPECT_EQ(minVal, 256 * 7 + 256); - GetPowerMaxMinTmpSize(powerShape1, powerShape1, true, 4, false, maxVal, minVal); - EXPECT_EQ(maxVal, 256 * 6); - EXPECT_EQ(minVal, 256 * 6); -} - -TEST_F(TestTiling, TestPowerTilingFactorSize) -{ - platfrom_stub_set_chip_version("Ascend910B"); - auto platformPtr = platform_ascendc::PlatformAscendCManager::GetInstance("Ascend910B"); - uint32_t maxLiveNodeCnt = 0xffff; - uint32_t extraBuf = 0xffff; - GetPowerTmpBufferFactorSize(false, true, false, 4, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 5); - EXPECT_EQ(extraBuf, 256); - GetPowerTmpBufferFactorSize(false, true, true, 4, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 7); - EXPECT_EQ(extraBuf, 0); - GetPowerTmpBufferFactorSize(false, true, false, 2, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 14); - EXPECT_EQ(extraBuf, 256); - GetPowerTmpBufferFactorSize(true, true, false, 4, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 4); - EXPECT_EQ(extraBuf, 256); - GetPowerTmpBufferFactorSize(true, true, true, 4, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 6); - EXPECT_EQ(extraBuf, 0); - GetPowerTmpBufferFactorSize(true, true, false, 2, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 14); - EXPECT_EQ(extraBuf, 256); -} - -#endif - -#if __CCE_AICORE__ == 200 -extern void platfrom_stub_set_chip_version(const char *num); -TEST_F(TestTiling, TestPowerTilingV200) -{ - platfrom_stub_set_chip_version("Ascend310P"); - auto platformPtr = platform_ascendc::PlatformAscendCManager::GetInstance("Ascend310P"); - std::vector shapeDims = { 512 }; - auto powerShape = ge::Shape(shapeDims); - uint32_t maxVal; - uint32_t minVal; - GetPowerMaxMinTmpSize(powerShape, powerShape, false, 4, false, maxVal, minVal); - EXPECT_EQ(maxVal, 512 * 4 * 5 + 256); - EXPECT_EQ(minVal, 256 * 5 + 256); - GetPowerMaxMinTmpSize(powerShape, powerShape, true, 4, false, maxVal, minVal); - EXPECT_EQ(maxVal, 512 * 4 * 7); - EXPECT_EQ(minVal, 256 * 7); - GetPowerMaxMinTmpSize(powerShape, powerShape, false, 2, false, maxVal, minVal); - EXPECT_EQ(maxVal, 512 * 2 * 16 + 256); - EXPECT_EQ(minVal, 256 * 8 + 256); - std::vector scalar_shape = { 1 }; - auto scalarShape = ge::Shape(scalar_shape); - GetPowerMaxMinTmpSize(powerShape, scalarShape, false, 2, false, maxVal, minVal); - EXPECT_EQ(maxVal, 512 * 2 * 16 + 256); - EXPECT_EQ(minVal, 256 * 8 + 256); - GetPowerMaxMinTmpSize(powerShape, scalarShape, true, 4, false, maxVal, minVal); - EXPECT_EQ(maxVal, 512 * 4 * 8); - EXPECT_EQ(minVal, 256 * 8); - GetPowerMaxMinTmpSize(powerShape, scalarShape, false, 4, false, maxVal, minVal); - EXPECT_EQ(maxVal, 512 * 4 * 6 + 256); - EXPECT_EQ(minVal, 256 * 6 + 256); - - std::vector shape1 = { 16 }; - auto powerShape1 = ge::Shape( shape1 ); - GetPowerMaxMinTmpSize(powerShape1, scalarShape, false, 4, false, maxVal, minVal); - EXPECT_EQ(maxVal, 256 * 6 + 256); - EXPECT_EQ(minVal, 256 * 6 + 256); - GetPowerMaxMinTmpSize(powerShape1, scalarShape, false, 2, false, maxVal, minVal); - EXPECT_EQ(maxVal, 256 * 8 + 256); - EXPECT_EQ(minVal, 256 * 8 + 256); - GetPowerMaxMinTmpSize(powerShape1, scalarShape, true, 4, false, maxVal, minVal); - EXPECT_EQ(maxVal, 256 * 8); - EXPECT_EQ(minVal, 256 * 8); - GetPowerMaxMinTmpSize(powerShape1, powerShape1, false, 4, false, maxVal, minVal); - EXPECT_EQ(maxVal, 256 * 5 + 256); - EXPECT_EQ(minVal, 256 * 5 + 256); - GetPowerMaxMinTmpSize(powerShape1, powerShape1, false, 2, false, maxVal, minVal); - EXPECT_EQ(maxVal, 256 * 8 + 256); - EXPECT_EQ(minVal, 256 * 8 + 256); - GetPowerMaxMinTmpSize(powerShape1, powerShape1, true, 4, false, maxVal, minVal); - EXPECT_EQ(maxVal, 256 * 7); - EXPECT_EQ(minVal, 256 * 7); -} - -TEST_F(TestTiling, TestPowerTilingFactorSizeV200) -{ - platfrom_stub_set_chip_version("Ascend310P"); - auto platformPtr = platform_ascendc::PlatformAscendCManager::GetInstance("Ascend310P"); - uint32_t maxLiveNodeCnt = 0xffff; - uint32_t extraBuf = 0xffff; - GetPowerTmpBufferFactorSize(false, true, false, 4, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 6); - EXPECT_EQ(extraBuf, 256); - GetPowerTmpBufferFactorSize(false, true, true, 4, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 8); - EXPECT_EQ(extraBuf, 0); - GetPowerTmpBufferFactorSize(false, true, false, 2, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 16); - EXPECT_EQ(extraBuf, 256); - GetPowerTmpBufferFactorSize(true, true, false, 4, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 5); - EXPECT_EQ(extraBuf, 256); - GetPowerTmpBufferFactorSize(true, true, true, 4, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 7); - EXPECT_EQ(extraBuf, 0); - GetPowerTmpBufferFactorSize(true, true, false, 2, maxLiveNodeCnt, extraBuf); - EXPECT_EQ(maxLiveNodeCnt, 16); - EXPECT_EQ(extraBuf, 256); -} - -TEST_F(TestTiling, TestLastBroadCast200) -{ - fe::PlatFormInfos platform_info; - auto plat = platform_ascendc::PlatformAscendC(&platform_info); - platfrom_stub_set_chip_version("Ascend310P"); - uint32_t firstDim = 32; - uint32_t lastDim = 32; - std::vector srcShapeDims = {firstDim, 1}; - auto srcShape = ge::Shape(srcShapeDims); - std::vector dstShapeDims = {firstDim, lastDim}; - auto dstShape = ge::Shape(dstShapeDims); - uint32_t maxValue{0}; - uint32_t minValue{0}; - constexpr uint32_t halfSize = 2; - constexpr uint32_t halfOneBlockElementNum = 16; - constexpr uint32_t MAX_BLOCK_NUM = 8; - constexpr uint32_t ONE_BLOCK_SIZE = 32; - uint32_t minTmpBufferSize = - halfOneBlockElementNum * ((lastDim + MAX_BLOCK_NUM - 1) / MAX_BLOCK_NUM) * halfSize; - uint32_t minHalfAlignSize = ONE_BLOCK_SIZE + + minTmpBufferSize; - uint32_t maxHalfAlignSize = ONE_BLOCK_SIZE + firstDim * lastDim * halfSize; - GetBroadCastMaxMinTmpSize(plat, srcShape, dstShape, halfSize, false, maxValue, minValue); - EXPECT_EQ(minValue, minHalfAlignSize); - EXPECT_EQ(maxValue, maxHalfAlignSize); - - constexpr uint32_t int8Size = 1; - const uint32_t alignSrcSize = - ((firstDim + halfOneBlockElementNum - 1) / halfOneBlockElementNum) * halfOneBlockElementNum; - const uint32_t alignDstSize = - ((firstDim * lastDim + halfOneBlockElementNum - 1) / halfOneBlockElementNum) * halfOneBlockElementNum; - const uint32_t castTempBufferSize = (alignSrcSize + alignDstSize) * halfSize; - GetBroadCastMaxMinTmpSize(plat, srcShape, dstShape, int8Size, false, maxValue, minValue); - EXPECT_EQ(minValue, minHalfAlignSize + castTempBufferSize); - EXPECT_EQ(maxValue, maxHalfAlignSize + castTempBufferSize); -} - -TEST_F(TestTiling, TestFirstBroadCast200) -{ - fe::PlatFormInfos platform_info; - auto plat = platform_ascendc::PlatformAscendC(&platform_info); - platfrom_stub_set_chip_version("Ascend310P"); - uint32_t firstDim = 32; - uint32_t lastDim = 32; - std::vector srcShapeDims = {1, lastDim}; - auto srcShape = ge::Shape(srcShapeDims); - std::vector dstShapeDims = {firstDim, lastDim}; - auto dstShape = ge::Shape(dstShapeDims); - uint32_t maxValue{0}; - uint32_t minValue{0}; - constexpr uint32_t halfSize = 2; - constexpr uint32_t ONE_BLOCK_SIZE = 32; - GetBroadCastMaxMinTmpSize(plat, srcShape, dstShape, halfSize, false, maxValue, minValue); - EXPECT_EQ(minValue, ONE_BLOCK_SIZE); - EXPECT_EQ(maxValue, ONE_BLOCK_SIZE); - - constexpr uint32_t int8Size = 1; - constexpr uint32_t HALF_ONE_BLK_SIZE = 16; - const uint32_t alignSrcSize = ((lastDim + HALF_ONE_BLK_SIZE - 1) / HALF_ONE_BLK_SIZE) * HALF_ONE_BLK_SIZE; - const uint32_t alignDstSize = - ((firstDim * lastDim + HALF_ONE_BLK_SIZE - 1) / HALF_ONE_BLK_SIZE) * HALF_ONE_BLK_SIZE; - const uint32_t castTempBufferSize = (alignSrcSize + alignDstSize) * halfSize; - GetBroadCastMaxMinTmpSize(plat, srcShape, dstShape, int8Size, false, maxValue, minValue); - EXPECT_EQ(minValue, ONE_BLOCK_SIZE + castTempBufferSize); - EXPECT_EQ(maxValue, ONE_BLOCK_SIZE + castTempBufferSize); -} - -TEST_F(TestTiling, TestOneElementBroadCast200) -{ - fe::PlatFormInfos platform_info; - auto plat = platform_ascendc::PlatformAscendC(&platform_info); - platfrom_stub_set_chip_version("Ascend310P"); - uint32_t srcDim = 1; - uint32_t dstDim = 32; - std::vector srcShapeDims = {srcDim}; - auto srcShape = ge::Shape(srcShapeDims); - std::vector dstShapeDims = {dstDim}; - auto dstShape = ge::Shape(dstShapeDims); - uint32_t maxValue{0}; - uint32_t minValue{0}; - constexpr uint32_t halfSize = 2; - GetBroadCastMaxMinTmpSize(plat, srcShape, dstShape, halfSize, false, maxValue, minValue); - EXPECT_EQ(minValue, 0); - EXPECT_EQ(maxValue, 0); - - constexpr uint32_t int8Size = 1; - constexpr uint32_t HALF_ONE_BLK_SIZE = 16; - constexpr uint32_t ONE_BLOCK_SIZE = 32; - const uint32_t alignSrcSize = ((srcDim + HALF_ONE_BLK_SIZE - 1) / HALF_ONE_BLK_SIZE) * HALF_ONE_BLK_SIZE; - const uint32_t alignDstSize = ((dstDim + HALF_ONE_BLK_SIZE - 1) / HALF_ONE_BLK_SIZE) * HALF_ONE_BLK_SIZE; - const uint32_t castTempBufferSize = (alignSrcSize + alignDstSize) * halfSize; - GetBroadCastMaxMinTmpSize(plat, srcShape, dstShape, int8Size, false, maxValue, minValue); - EXPECT_EQ(minValue, castTempBufferSize + ONE_BLOCK_SIZE); - EXPECT_EQ(maxValue, castTempBufferSize + ONE_BLOCK_SIZE); -} -#endif - -TEST_F(TestTiling, TestReduceXorSumTilingInt16) -{ - std::vector shapeDims = { 128, 128 }; - auto shape = ge::Shape(shapeDims); - uint32_t maxSize; - uint32_t minSize; - GetReduceXorSumMaxMinTmpSize(shape, 2, true, maxSize, minSize); - EXPECT_EQ(maxSize, 65536); - EXPECT_EQ(minSize, 65536); - - GetReduceXorSumMaxMinTmpSize(shape, 2, false, maxSize, minSize); - EXPECT_EQ(maxSize, 98304); - EXPECT_EQ(minSize, 98304); - - shapeDims = { 8 }; - shape = ge::Shape(shapeDims); - GetReduceXorSumMaxMinTmpSize(shape, 2, false, maxSize, minSize); - EXPECT_EQ(maxSize, 768); - EXPECT_EQ(minSize, 768); - - GetReduceXorSumMaxMinTmpSize(shape, 2, true,maxSize, minSize); - EXPECT_EQ(maxSize, 512); - EXPECT_EQ(minSize, 512); -} - -TEST_F(TestTiling, TestCumSum) -{ - uint32_t firstDim = 32; - uint32_t lastDim = 16; - std::vector srcShapeDims = {firstDim, lastDim}; - auto srcShape = ge::Shape(srcShapeDims); - uint32_t maxValue{0}; - uint32_t minValue{0}; - constexpr uint32_t halfSize = 2; - constexpr uint32_t transDataTo5HDAddrListSize = 16; - uint32_t minHalfSize = transDataTo5HDAddrListSize * lastDim * 3 * sizeof(uint16_t); - uint32_t alignOutter = (firstDim + transDataTo5HDAddrListSize - 1) / transDataTo5HDAddrListSize * transDataTo5HDAddrListSize; - uint32_t maxHalfSize = alignOutter * lastDim * 3 * sizeof(uint16_t); - - GetCumSumMaxMinTmpSize(srcShape, halfSize, true, false, maxValue, minValue); - EXPECT_EQ(minValue, minHalfSize); - EXPECT_EQ(maxValue, maxHalfSize); - - constexpr uint32_t floatSize = 4; - uint32_t minFloatSize = transDataTo5HDAddrListSize * lastDim * 2 * sizeof(float); - uint32_t maxFloatSize = alignOutter * lastDim * 2 * sizeof(float); - - GetCumSumMaxMinTmpSize(srcShape, floatSize, true, false, maxValue, minValue); - EXPECT_EQ(minValue, minFloatSize); - EXPECT_EQ(maxValue, maxFloatSize); - - maxHalfSize = minHalfSize = firstDim * lastDim * sizeof(float); - GetCumSumMaxMinTmpSize(srcShape, halfSize, false, false, maxValue, minValue); - EXPECT_EQ(minValue, minHalfSize); - EXPECT_EQ(maxValue, maxHalfSize); - - - GetCumSumMaxMinTmpSize(srcShape, floatSize, false, false, maxValue, minValue); - EXPECT_EQ(minValue, 0); - EXPECT_EQ(maxValue, 0); -} TEST_F(TestTiling, tiling_compute_error) { MultiCoreMatmulTiling tiling; -- Gitee From 7eb61a596b053c299d79539038aee72c32e27db2 Mon Sep 17 00:00:00 2001 From: jiangchengcheng-on Date: Thu, 19 Sep 2024 12:14:07 +0000 Subject: [PATCH 08/17] fix error Signed-off-by: jiangchengcheng-on --- impl/matmul/matmul_impl.h | 2 +- impl/matmul/matmul_tiling_algorithm.cpp | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/impl/matmul/matmul_impl.h b/impl/matmul/matmul_impl.h index fdfc46df..7423ff6d 100644 --- a/impl/matmul/matmul_impl.h +++ b/impl/matmul/matmul_impl.h @@ -10124,7 +10124,7 @@ template ::CopyCo22GMNZ2NDOnTheFly( const GlobalTensor& gmC, const LocalTensor& src, bool enSequentialWrite) { - uint32_t dimN = (Kc != 0) ? Kc_ : N_; + uint32_t dimN = (Kc_ != 0) ? Kc_ : N_; const int blockCount = sizeof(DstT) == B32_BYTE_SIZE ? BLOCK_CUBE : ONE_BLK_SIZE / sizeof(DstT); const int oneBlockCount = ONE_BLK_SIZE / sizeof(DstT); int calcWidth = var.baseUseN_ / blockCount; diff --git a/impl/matmul/matmul_tiling_algorithm.cpp b/impl/matmul/matmul_tiling_algorithm.cpp index 68adb2bd..3af7d9f7 100644 --- a/impl/matmul/matmul_tiling_algorithm.cpp +++ b/impl/matmul/matmul_tiling_algorithm.cpp @@ -2275,6 +2275,27 @@ int64_t MatmulTilingAlgorithm::Process() tilingIns_->tiling_.set_baseN(singleCoreStatus.l0Status.nL0 * C0_SIZE); const int32_t reduceSize = C0_BYTE_SIZE / DTYPE_BIT_TAB.at(tilingIns_->aType_.dataType) * BITS_PER_BYTE; tilingIns_->tiling_.set_baseK(singleCoreStatus.l0Status.kL0 * reduceSize); + // check whether OUTER_PRODUCT is supported + if ((tilingIns_->scheduleType == scheduleType::OUTER_PRODUCT) && + (tilingIns_->tiling_.get_baseK() < tilingIns_->tiling_.get_singleCoreK())) { + TILING_LOG_WARNING("Unsupported scheduleType is OUTER_PRODUCT"); + return -1; + } + tilingIns_->tiling_.set_iterateOrder(GetIteratorOrder(singleCoreStatus, singleCoreM, singleCoreN, singleCoreK)); + int32_t newBaseM = singleCoreStatus.l0Status.mL0 * C0_SIZE; + int32_t newBaseN = singleCoreStatus.l0Status.nL0 * C0_SIZE; + if (tilingIns_->scheduleType == scheduleType::OUTER_PRODUCT) { + // when scheduleType is OUT_PRODUCT, each iteration computes 2 * basicBlock size of data + bool isL0CFullUsed = (newBaseM * newBaseN * NUM_TWO) * static_cast(DTYPE_BIT_TAB.at(tilingIns_->cType_.dataType) > + tilingIns_->bufferPool_.l0CSize) ? 1 : 0; + if (isL0CFullUsed && (tilingIns_->tiling_.get_iterateOrder() == 0)) { + newBaseN = MathUtil::CeilDivision(newBaseN / NUM_TWO, C0_SIZE); + } else if (isL0CFullUsed && (tilingIns_->tiling_.get_iterateOrder() == 1)) { + newBaseM = MathUtil::CeilDivision(newBaseM / NUM_TWO, C0_SIZE); + } + tilingIns_->tiling_.set_baseM(newBaseM); + tilingIns_->tiling_.set_baseN(newBaseN); + } tilingIns_->baseM = tilingIns_->tiling_.get_baseM(); tilingIns_->baseN = tilingIns_->tiling_.get_baseN(); tilingIns_->baseK = tilingIns_->tiling_.get_baseK(); -- Gitee From 5098ce3769408f02a805288a42f9a90e16e6aea5 Mon Sep 17 00:00:00 2001 From: jiangchengcheng-on Date: Thu, 19 Sep 2024 12:31:11 +0000 Subject: [PATCH 09/17] fix error Signed-off-by: jiangchengcheng-on --- impl/matmul/matmul_tiling_algorithm.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/impl/matmul/matmul_tiling_algorithm.cpp b/impl/matmul/matmul_tiling_algorithm.cpp index 3af7d9f7..67ac1e11 100644 --- a/impl/matmul/matmul_tiling_algorithm.cpp +++ b/impl/matmul/matmul_tiling_algorithm.cpp @@ -2276,7 +2276,7 @@ int64_t MatmulTilingAlgorithm::Process() const int32_t reduceSize = C0_BYTE_SIZE / DTYPE_BIT_TAB.at(tilingIns_->aType_.dataType) * BITS_PER_BYTE; tilingIns_->tiling_.set_baseK(singleCoreStatus.l0Status.kL0 * reduceSize); // check whether OUTER_PRODUCT is supported - if ((tilingIns_->scheduleType == scheduleType::OUTER_PRODUCT) && + if ((tilingIns_->scheduleType == ScheduleType::OUTER_PRODUCT) && (tilingIns_->tiling_.get_baseK() < tilingIns_->tiling_.get_singleCoreK())) { TILING_LOG_WARNING("Unsupported scheduleType is OUTER_PRODUCT"); return -1; @@ -2284,7 +2284,7 @@ int64_t MatmulTilingAlgorithm::Process() tilingIns_->tiling_.set_iterateOrder(GetIteratorOrder(singleCoreStatus, singleCoreM, singleCoreN, singleCoreK)); int32_t newBaseM = singleCoreStatus.l0Status.mL0 * C0_SIZE; int32_t newBaseN = singleCoreStatus.l0Status.nL0 * C0_SIZE; - if (tilingIns_->scheduleType == scheduleType::OUTER_PRODUCT) { + if (tilingIns_->scheduleType == ScheduleType::OUTER_PRODUCT) { // when scheduleType is OUT_PRODUCT, each iteration computes 2 * basicBlock size of data bool isL0CFullUsed = (newBaseM * newBaseN * NUM_TWO) * static_cast(DTYPE_BIT_TAB.at(tilingIns_->cType_.dataType) > tilingIns_->bufferPool_.l0CSize) ? 1 : 0; -- Gitee From 548661ff1be8d45ec5a594035d90831ef6f8932b Mon Sep 17 00:00:00 2001 From: jiangchengcheng-on Date: Thu, 19 Sep 2024 12:40:04 +0000 Subject: [PATCH 10/17] Update llt Signed-off-by: jiangchengcheng-on --- tests/tiling/test_tiling.cpp | 55 ++++++++++++++++++++++++++++++++---- 1 file changed, 49 insertions(+), 6 deletions(-) diff --git a/tests/tiling/test_tiling.cpp b/tests/tiling/test_tiling.cpp index 61144c1b..bdb0129f 100644 --- a/tests/tiling/test_tiling.cpp +++ b/tests/tiling/test_tiling.cpp @@ -38,11 +38,12 @@ TEST_F(TestTiling, MultiCoreSmallMN) rnnMatmul3.SetBType(matmul_tiling::TPosition::GM, matmul_tiling::CubeFormat::ND, matmul_tiling::DataType ::DT_FLOAT); rnnMatmul3.SetCType(matmul_tiling::TPosition::GM, matmul_tiling::CubeFormat::NZ, matmul_tiling::DataType ::DT_FLOAT); rnnMatmul3.SetBiasType(matmul_tiling::TPosition::VECCALC, matmul_tiling::CubeFormat::ND, matmul_tiling::DataType ::DT_FLOAT); + rnnMatmul3.SetSingleRange(-1,-1,-1,-1,-1,-1); auto ret = rnnMatmul3.SetBias(true); ret = rnnMatmul3.SetDim(24); ret = rnnMatmul3.SetOrgShape(5, 40, 986); ret = rnnMatmul3.SetShape(5, 10, 986); - ret = rnnMatmul3.SetBufferSpace(); // will use all buffer space if not explicitly specified + ret = rnnMatmul3.SetBufferSpace(-1, -1, -1); // will use all buffer space if not explicitly specified optiling::TCubeTiling tilingData; ret = rnnMatmul3.GetTiling(tilingData); rnnMatmul3.PrintTilingData(); @@ -74,9 +75,51 @@ TEST_F(TestTiling, PlatformConstructor) optiling::TCubeTiling tilingData; int ret = tiling.GetTiling(tilingData); tiling.PrintTilingData(); + tiling.PrintTilingDataInfo(tilingData); EXPECT_EQ(ret, 0); } +TEST_F(TestTiling, TestMatmulApiTilingL0DB) +{ + matmul_tiling::PlatformInfo plat {.socVersion = platform_ascendc::SocVersion::ASCEND910B, .l1Size = 524288, + .l0CSize = 131072, .ubSize = 196608, .l0ASize = 65536, .l0BSize = 65536}; + MatmulApiTiling tiling(plat); + tiling.SetAType(TPosition::TSCM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT16); + tiling.SetBType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT16); + tiling.SetCType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT); + tiling.SetBiasType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT); + tiling.SetShape(2048, 20480, 16); + tiling.SetOrgShape(2048, 20480, 16); + tiling.SetBias(false); + tiling.SetBufferSpace(-1, -1, -1, -1); + tiling.SetMatmulConfigParams({1, false, ScheduleType::OUTER_PRODUCT, MatrixTraverse::FIRSTM}); + optiling::TCubeTiling tilingData; + int ret = tiling.GetTiling(tilingData); + tiling.PrintTilingData(); + EXPECT_EQ(ret, 0); +} + +TEST_F(TestTiling, TestMatmulApiTilingL0DBError) +{ + matmul_tiling::PlatformInfo plat {.socVersion = platform_ascendc::SocVersion::ASCEND910B, .l1Size = 524288, + .l0CSize = 131072, .ubSize = 196608, .l0ASize = 65536, .l0BSize = 65536}; + MatmulApiTiling tiling(plat); + tiling.SetAType(TPosition::TSCM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT16); + tiling.SetBType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT16); + tiling.SetCType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT); + tiling.SetBiasType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT); + tiling.SetShape(16, 16, 2048); + tiling.SetOrgShape(16, 16, 2048); + tiling.SetFixSplit(16, 16, -1); + tiling.SetBias(false); + tiling.SetBufferSpace(-1, -1, -1, -1); + tiling.SetMatmulConfigParams({1, false, ScheduleType::OUTER_PRODUCT, MatrixTraverse::FIRSTN}); + optiling::TCubeTiling tilingData; + int ret = tiling.GetTiling(tilingData); + tiling.PrintTilingData(); + EXPECT_EQ(ret, -1); +} + TEST_F(TestTiling, TestInt4BaseK) { matmul_tiling::PlatformInfo plat {.socVersion = platform_ascendc::SocVersion::ASCEND910B, .l1Size = 524288, @@ -1885,7 +1928,7 @@ TEST_P(RnnTilingbTestSuite, TestMatmulApiTilngRnnRealCase) rnnParams.baseK = rnnMatmul.GetBaseK(); // get output info after cut ret = rnnMatmul.GetSingleShape(rnnParams.singleM, rnnParams.singleN, rnnParams.singleK); // get single process info ret = rnnMatmul.GetCoreNum(dim, mDim, - nDim); // get used blockdim after multi-cores cut, carried by user to kernel, contrl Kernel + nDim); // get used blockdim after multi-cores cut, carried by user to kernel, contrl Kernel // input mm int32_t l1_left = 512 * 1024 - 64 - rnnParams.singleN * (input_align + hidden_align) * sizeof(float) * 2; ret = rnnMatmul1.SetBufferSpace(l1_left, rnnParams.maxUbSize, rnnParams.maxUbSize); @@ -1922,7 +1965,7 @@ TEST_P(RnnTilingbTestSuite, TestMatmulApiTilngRnnRealCase) sizeof(float); EXPECT_LT(l1UsedSize, 512 * 1024 - 64); rnnParams.usedCoreNum = dim * 4; - } else { // part of full load + } else { // part of full load // two matmul time sharing auto ret = rnnMatmul.SetBias(true); ret = rnnMatmul.SetDim(rnnParams.sysAivCoreNum / 4); @@ -1943,7 +1986,7 @@ TEST_P(RnnTilingbTestSuite, TestMatmulApiTilngRnnRealCase) ret = rnnMatmul.GetSingleShape(rnnParams.singleM, rnnParams.singleN, rnnParams.singleK); // get single process info ret = rnnMatmul.GetCoreNum(dim, mDim, - nDim); // get used blockdim after multi-cores cut, carried by user to kernel, contrl Kernel business + nDim); // get used blockdim after multi-cores cut, carried by user to kernel, contrl Kernel business // input mm ret = rnnMatmul1.SetBufferSpace(-1, rnnParams.maxUbSize, rnnParams.maxUbSize); ret = rnnMatmul1.SetOrgShape(rnnParams.batch, rnnParams.hiddenSize * 4, rnnParams.inputSize); @@ -1977,7 +2020,7 @@ TEST_P(RnnTilingbTestSuite, TestMatmulApiTilngRnnRealCase) sizeof(float); EXPECT_LT(l1UsedSize, 512 * 1024 - 64); rnnParams.usedCoreNum = dim * 4; - } else { // no cache, reset AB,mm cache mechanism lose efficacy + } else { // no cache, reset AB, m cache mechanism lose efficacy std::cout << "can not load any weight" << std::endl; rnnMatmul.SetAType(matmul_tiling::TPosition::GM, matmul_tiling::CubeFormat::ND, (matmul_tiling::DataType)dataType); @@ -2020,7 +2063,7 @@ TEST_P(RnnTilingbTestSuite, TestMatmulApiTilngRnnRealCase) ret = rnnMatmul.GetSingleShape(rnnParams.singleM, rnnParams.singleN, rnnParams.singleK); // get single core data ret = rnnMatmul.GetCoreNum(dim, mDim, - nDim); // get used blockdim after multi-cores cut, carried by user to kernel, contrl Kernel business + nDim); // get used blockdim after multi-cores cut, carried by user to kernel, contrl Kernel business // input mm ret = rnnMatmul1.SetBufferSpace(-1, rnnParams.maxUbSize, rnnParams.maxUbSize); ret = rnnMatmul1.SetOrgShape(rnnParams.batch, rnnParams.hiddenSize * 4, rnnParams.inputSize); -- Gitee From a1d4b1686fe15fab1453ecec07ddcd098858ac82 Mon Sep 17 00:00:00 2001 From: jiangchengcheng-on Date: Thu, 19 Sep 2024 12:49:05 +0000 Subject: [PATCH 11/17] fix error Signed-off-by: jiangchengcheng-on --- impl/matmul/matmul_tiling_algorithm.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/impl/matmul/matmul_tiling_algorithm.cpp b/impl/matmul/matmul_tiling_algorithm.cpp index 67ac1e11..d780c593 100644 --- a/impl/matmul/matmul_tiling_algorithm.cpp +++ b/impl/matmul/matmul_tiling_algorithm.cpp @@ -1859,7 +1859,7 @@ void MatmulTilingAlgorithm::AddOptimalFactors(const std::string& opType, const M // A/B fullload or A fullload + B Kdim fullload or B fullload + A Kdim fullload(1/2/4) const int32_t mnCore = MathUtil::CeilDivision(coreNum, params.batch32); if (mnCore > 1) { - const float optPoint = sqrt((params.m32 + 0.0) / params.n32 * mnCore); + const float optPoint = static_cast(sqrt((params.m32 + 0.0) / params.n32 * mnCore)); const int32_t mdim = static_cast(ceil(optPoint)); const int32_t ndim = static_cast(ceil(mnCore / optPoint)); MathUtil::AddFactor(blockDimRes.mDimFactors, mdim); @@ -2320,7 +2320,6 @@ int64_t MatmulTilingAlgorithm::Process() int32_t b1LengthCache = 0; SetDepthL1CacheUBParams(a1LengthCache, b1LengthCache); tilingIns_->tiling_.set_transLength(transLength); // a1 b1 c1 reuse on ub - tilingIns_->tiling_.set_iterateOrder(GetIteratorOrder(singleCoreStatus, singleCoreM, singleCoreN, singleCoreK)); tilingIns_->tiling_.set_shareMode(0); tilingIns_->tiling_.set_dbL0A(singleCoreStatus.l0Status.dbL0A); tilingIns_->tiling_.set_dbL0B(singleCoreStatus.l0Status.dbL0B); -- Gitee From 9f857dcff98b73c2c9171b3a33c4f43459921e65 Mon Sep 17 00:00:00 2001 From: jiangchengcheng-on Date: Thu, 19 Sep 2024 12:53:54 +0000 Subject: [PATCH 12/17] fix Signed-off-by: jiangchengcheng-on --- tests/tiling/test_tiling.cpp | 41 ------------------------------------ 1 file changed, 41 deletions(-) diff --git a/tests/tiling/test_tiling.cpp b/tests/tiling/test_tiling.cpp index bdb0129f..fc267ee0 100644 --- a/tests/tiling/test_tiling.cpp +++ b/tests/tiling/test_tiling.cpp @@ -79,47 +79,6 @@ TEST_F(TestTiling, PlatformConstructor) EXPECT_EQ(ret, 0); } -TEST_F(TestTiling, TestMatmulApiTilingL0DB) -{ - matmul_tiling::PlatformInfo plat {.socVersion = platform_ascendc::SocVersion::ASCEND910B, .l1Size = 524288, - .l0CSize = 131072, .ubSize = 196608, .l0ASize = 65536, .l0BSize = 65536}; - MatmulApiTiling tiling(plat); - tiling.SetAType(TPosition::TSCM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT16); - tiling.SetBType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT16); - tiling.SetCType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT); - tiling.SetBiasType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT); - tiling.SetShape(2048, 20480, 16); - tiling.SetOrgShape(2048, 20480, 16); - tiling.SetBias(false); - tiling.SetBufferSpace(-1, -1, -1, -1); - tiling.SetMatmulConfigParams({1, false, ScheduleType::OUTER_PRODUCT, MatrixTraverse::FIRSTM}); - optiling::TCubeTiling tilingData; - int ret = tiling.GetTiling(tilingData); - tiling.PrintTilingData(); - EXPECT_EQ(ret, 0); -} - -TEST_F(TestTiling, TestMatmulApiTilingL0DBError) -{ - matmul_tiling::PlatformInfo plat {.socVersion = platform_ascendc::SocVersion::ASCEND910B, .l1Size = 524288, - .l0CSize = 131072, .ubSize = 196608, .l0ASize = 65536, .l0BSize = 65536}; - MatmulApiTiling tiling(plat); - tiling.SetAType(TPosition::TSCM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT16); - tiling.SetBType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT16); - tiling.SetCType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT); - tiling.SetBiasType(TPosition::GM, CubeFormat::ND, matmul_tiling::DataType::DT_FLOAT); - tiling.SetShape(16, 16, 2048); - tiling.SetOrgShape(16, 16, 2048); - tiling.SetFixSplit(16, 16, -1); - tiling.SetBias(false); - tiling.SetBufferSpace(-1, -1, -1, -1); - tiling.SetMatmulConfigParams({1, false, ScheduleType::OUTER_PRODUCT, MatrixTraverse::FIRSTN}); - optiling::TCubeTiling tilingData; - int ret = tiling.GetTiling(tilingData); - tiling.PrintTilingData(); - EXPECT_EQ(ret, -1); -} - TEST_F(TestTiling, TestInt4BaseK) { matmul_tiling::PlatformInfo plat {.socVersion = platform_ascendc::SocVersion::ASCEND910B, .l1Size = 524288, -- Gitee From db9d99d33c93eb84834b8a2247aa665e6db5c324 Mon Sep 17 00:00:00 2001 From: jiangchengcheng-on Date: Thu, 19 Sep 2024 13:14:23 +0000 Subject: [PATCH 13/17] fix Signed-off-by: jiangchengcheng-on --- impl/matmul/matmul_tiling_algorithm.cpp | 6 +++--- lib/matmul/matmul_client.h | 25 ++++++++++++------------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/impl/matmul/matmul_tiling_algorithm.cpp b/impl/matmul/matmul_tiling_algorithm.cpp index d780c593..17a36ecc 100644 --- a/impl/matmul/matmul_tiling_algorithm.cpp +++ b/impl/matmul/matmul_tiling_algorithm.cpp @@ -1628,7 +1628,7 @@ int32_t MatmulTilingAlgorithm::LoopNumFromSingleCoreToL0(const CoreStatusPack& c constexpr int32_t minSize = 64; constexpr int32_t minN0Size = 16; int32_t n0 = min(min(minN0Size, coreStatus.n), minSize); - int32_t m0 = min(min(coreStatus.m, minTotalSize / n0), minSize); + int32_t m0 = (n0 == 0) ? 0 : min(min(coreStatus.m, minTotalSize / n0), minSize); n0 = (m0 == 0) ? 0 : min(min(coreStatus.n, minTotalSize / m0), minSize); m0 = (n0 == 0) ? 0 : min(min(coreStatus.m, minTotalSize / n0), minSize); const int32_t k0 = (m0 != 0 && n0 != 0) ? min(min(minSize / m0, minSize / n0), coreStatus.k) : coreStatus.k; @@ -1805,7 +1805,7 @@ bool MatmulTilingAlgorithm::PreProcessMiniShape(const std::string& opType, CoreS coreStatus.n = coreStatus.nDim == 1 ? params.n32 : MathUtil::CeilDivision(params.nMapped, coreStatus.nDim); coreStatus.m = coreStatus.mDim == 1 ? params.m32 : MathUtil::CeilDivision(params.mMapped, coreStatus.mDim); coreStatus.k = coreStatus.kDim == 1 ? params.k32 : MathUtil::CeilDivision(params.kMapped, coreStatus.kDim); - params.nonFactorK = params.k32 % coreStatus.kDim == 0 ? false : true; + params.nonFactorK = (coreStatus.kDim == 0) ? false : (params.k32 % coreStatus.kDim == 0 ? false : true); return true; } return false; @@ -2286,7 +2286,7 @@ int64_t MatmulTilingAlgorithm::Process() int32_t newBaseN = singleCoreStatus.l0Status.nL0 * C0_SIZE; if (tilingIns_->scheduleType == ScheduleType::OUTER_PRODUCT) { // when scheduleType is OUT_PRODUCT, each iteration computes 2 * basicBlock size of data - bool isL0CFullUsed = (newBaseM * newBaseN * NUM_TWO) * static_cast(DTYPE_BIT_TAB.at(tilingIns_->cType_.dataType) > + bool isL0CFullUsed = (newBaseM * newBaseN * NUM_TWO) * static_cast(DTYPE_BYTE_TAB.at(tilingIns_->cType_.dataType) > tilingIns_->bufferPool_.l0CSize) ? 1 : 0; if (isL0CFullUsed && (tilingIns_->tiling_.get_iterateOrder() == 0)) { newBaseN = MathUtil::CeilDivision(newBaseN / NUM_TWO, C0_SIZE); diff --git a/lib/matmul/matmul_client.h b/lib/matmul/matmul_client.h index 94f310fe..d0766cfd 100644 --- a/lib/matmul/matmul_client.h +++ b/lib/matmul/matmul_client.h @@ -383,14 +383,6 @@ public: // Only support the mode that the IterateAll is asynchronous and GM output is continuous. // In discontinuous scenarios, the system stops responding. __aicore__ inline void WaitIterateAll() - { - ASSERT(!isSyncGetC); // Must be asynchronous mode - WaitEvent(this->devEvtID); - } - - // Only support the mode that the IterateAll is asynchronous and GM output is continuous. - // In discontinuous scenarios, the system stops responding. - __aicore__ inline void WaitIterateBatch() { ASSERT(!isSyncGetC); // Must be asynchronous mode auto intraId = this->devEvtID; @@ -402,13 +394,20 @@ public: WaitEvent(intraId); } + // Only support the mode that the IterateAll is asynchronous and GM output is continuous. + // In discontinuous scenarios, the system stops responding. + __aicore__ inline void WaitIterateBatch() + { + ASSERT(!isSyncGetC); // Must be asynchronous mode + WaitEvent(this->devEvtID); + } + template __aicore__ inline void IterateAll(const GlobalTensor& gm, uint8_t enAtomic = 0, bool enSequentialWrite = false, bool waitIterateAll = false, bool fakeMsg = false) { TRACE_START(TraceId::KFC_CLIENT_POST_MSG); ASSERT(kfcMsg_.body.isFirstIter == 1); - ASSERT(!(A_TYPE::ibShare && B_TYPE::ibShare) && "Iterate not support when samebab is enabled"); kfcMsg_.body.iterateFakeMsg = fakeMsg; kfcMsg_.body.cAddr = reinterpret_cast(gm.GetPhyAddr()); kfcMsg_.body.enAtomic = (uint8_t)(enAtomic); @@ -437,7 +436,6 @@ public: ASSERT(sync == true); ASSERT(enAtomic == 0); ASSERT(kfcMsg_.body.isFirstIter == 1); - ASSERT(!(A_TYPE::ibShare && B_TYPE::ibShare) && "Iterate not support when samebab is enabled"); ASSERT((PhyPosIsL1(C_TYPE::pos)) && "IterateAll LocalTensor only support QuePosition A1 or B1"); ASSERT(!(A_TYPE::ibShare && B_TYPE::ibShare) && "IterateAll LocalTensor not support when sameab" " is enabled"); @@ -472,7 +470,7 @@ public: { TRACE_START(TraceId::KFC_CLIENT_POST_MSG); ASSERT(kfcMsg_.body.isFirstIter == 1); - ASSERT(!(A_TYPE::ibShare && B_TYPE::ibShare) && "Iterate not support when samebab is enabled"); + ASSERT(!(A_TYPE::ibShare && B_TYPE::ibShare) && "IterateBatch not support when sameab is enabled"); kfcMsg_.body.cAddr = reinterpret_cast(gm.GetPhyAddr()); kfcMsg_.body.enSequentialWrite = enSequentialWrite; kfcMsg_.body.sync = sync; @@ -507,6 +505,7 @@ public: TRACE_START(TraceId::KFC_CLIENT_POST_MSG); ASSERT(sync == true); ASSERT(kfcMsg_.body.isFirstIter == 1); + ASSERT(!(A_TYPE::ibShare && B_TYPE::ibShare) && "IterateBatch not support when sameab is enabled"); if (ubCmatrix.GetPosition() == static_cast(TPosition::TSCM)) { kfcMsg_.body.cAddr = GetTscmAddr(ubCmatrix); kfcMsg_.body.cIsTscm = 1; @@ -552,7 +551,7 @@ public: curProcess = 0; ASSERT(kfcMsg_.body.isFirstIter == 1); ASSERT(cacheWorkspaceAddr); - + ASSERT(!(A_TYPE::ibShare && B_TYPE::ibShare) && "IterateBatch not support when sameab is enabled"); kfcMsg_.body.cAddr = reinterpret_cast(cacheWorkspaceAddr); kfcMsg_.body.enSequentialWrite = enSequentialWrite; kfcMsg_.body.sync = sync; @@ -888,7 +887,7 @@ private: template __aicore__ inline void PostMessage() { if constexpr (A_TYPE::ibShare && B_TYPE::ibShare) { - ASSERT(DoMatmulNorm(MM_CFG) && "MM_CFG should use norm config when sameabb is enabled"); + ASSERT(DoMatmulNorm(MM_CFG) && "MM_CFG should use norm config when sameab is enabled"); if (GetSubBlockIdxImpl() == 1) { *((uint32_t *)&kfcMsg_.body) = 0; kfcMsg_.ubAddr = -1; -- Gitee From 7046e0a8db5e39590fb5885a31e7fe2775be7479 Mon Sep 17 00:00:00 2001 From: jiangchengcheng-on Date: Thu, 19 Sep 2024 13:27:02 +0000 Subject: [PATCH 14/17] FIX Signed-off-by: jiangchengcheng-on --- tests/tiling/test_tiling.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/tiling/test_tiling.cpp b/tests/tiling/test_tiling.cpp index fc267ee0..2a66ea90 100644 --- a/tests/tiling/test_tiling.cpp +++ b/tests/tiling/test_tiling.cpp @@ -321,7 +321,7 @@ TEST_F(TestTiling, L1CacheUBCase02NeiABFullLoad) tiling.PrintTilingData(); EXPECT_EQ(ret, 0); EXPECT_EQ(tilingData.get_depthAL1CacheUB(), 0); - EXPECT_EQ(tilingData.get_depthBL1CacheUB(), 3); + EXPECT_EQ(tilingData.get_depthBL1CacheUB(), 0); } TEST_F(TestTiling, L1CacheUBCase03BothABFullLoad) @@ -341,7 +341,7 @@ TEST_F(TestTiling, L1CacheUBCase03BothABFullLoad) tiling.PrintTilingData(); EXPECT_EQ(ret, 0); EXPECT_EQ(tilingData.get_depthAL1CacheUB(), 1); - EXPECT_EQ(tilingData.get_depthBL1CacheUB(), 1); + EXPECT_EQ(tilingData.get_depthBL1CacheUB(), 0); } TEST_F(TestTiling, L1CacheUBCase04OnlyAFullLoad) @@ -360,7 +360,7 @@ TEST_F(TestTiling, L1CacheUBCase04OnlyAFullLoad) int ret = tiling.GetTiling(tilingData); tiling.PrintTilingData(); EXPECT_EQ(ret, 0); - EXPECT_EQ(tilingData.get_depthAL1CacheUB(), 1); + EXPECT_EQ(tilingData.get_depthAL1CacheUB(), 0); } TEST_F(TestTiling, L1CacheUBCase04OnlyBFullLoad) @@ -379,7 +379,7 @@ TEST_F(TestTiling, L1CacheUBCase04OnlyBFullLoad) int ret = tiling.GetTiling(tilingData); tiling.PrintTilingData(); EXPECT_EQ(ret, 0); - EXPECT_EQ(tilingData.get_depthBL1CacheUB(), 1); + EXPECT_EQ(tilingData.get_depthBL1CacheUB(), 0); } TEST_F(TestTiling, L1CacheUBCase05BothCache) @@ -400,7 +400,7 @@ TEST_F(TestTiling, L1CacheUBCase05BothCache) tiling.PrintTilingData(); EXPECT_EQ(ret, 0); EXPECT_EQ(tilingData.get_depthAL1CacheUB(), 1); - EXPECT_EQ(tilingData.get_depthBL1CacheUB(), 2); + EXPECT_EQ(tilingData.get_depthBL1CacheUB(), 20); } TEST_F(TestTiling, L1CacheUBCase06AMatIsTSCM) @@ -420,7 +420,7 @@ TEST_F(TestTiling, L1CacheUBCase06AMatIsTSCM) tiling.PrintTilingData(); EXPECT_EQ(ret, 0); EXPECT_EQ(tilingData.get_depthAL1CacheUB(), 0); - EXPECT_EQ(tilingData.get_depthBL1CacheUB(), 1); + EXPECT_EQ(tilingData.get_depthBL1CacheUB(), 0); } TEST_F(TestTiling, L1CacheUBCase07BMatIsTSCM) -- Gitee From e296745dbab51c68f5a8223ae7e5ed9417232a9b Mon Sep 17 00:00:00 2001 From: jiangchengcheng-on Date: Thu, 19 Sep 2024 13:32:08 +0000 Subject: [PATCH 15/17] fix Signed-off-by: jiangchengcheng-on --- impl/matmul/matmul_tiling_algorithm.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/impl/matmul/matmul_tiling_algorithm.cpp b/impl/matmul/matmul_tiling_algorithm.cpp index 17a36ecc..00ec6a12 100644 --- a/impl/matmul/matmul_tiling_algorithm.cpp +++ b/impl/matmul/matmul_tiling_algorithm.cpp @@ -1529,8 +1529,9 @@ void MatmulTilingAlgorithm::CalcLoadSize(const DimFactor& blockDims, const CoreS constexpr int32_t minKSize = 64; constexpr int32_t minTotalSize = 128; const int32_t n0 = min(minMNSize, coreStatus.n); // need check m,n > 16 or m,n<16 - const int32_t m0 = (n0 == 0) ? 0 : min(minMNSize, min(coreStatus.m, minTotalSize / n0)); - const int32_t k0 = (m0 != 0 && n0 != 0) ? min(min(minKSize / m0, minKSize / n0), coreStatus.k) : coreStatus.k; + const int32_t m0 = min(minMNSize, ((n0 == 0) ? 0 : min(coreStatus.m, minTotalSize / n0))); + const int32_t k0 = (m0 != 0 && n0 != 0) ? + min(min(minKSize / m0, minKSize / n0), coreStatus.k) : coreStatus.k; const int32_t dbBuffer = 2; // A/B fullload or A fullload + B Kdim fullload or B fullload + A Kdim fullload(1/2/4) @@ -1631,7 +1632,8 @@ int32_t MatmulTilingAlgorithm::LoopNumFromSingleCoreToL0(const CoreStatusPack& c int32_t m0 = (n0 == 0) ? 0 : min(min(coreStatus.m, minTotalSize / n0), minSize); n0 = (m0 == 0) ? 0 : min(min(coreStatus.n, minTotalSize / m0), minSize); m0 = (n0 == 0) ? 0 : min(min(coreStatus.m, minTotalSize / n0), minSize); - const int32_t k0 = (m0 != 0 && n0 != 0) ? min(min(minSize / m0, minSize / n0), coreStatus.k) : coreStatus.k; + const int32_t k0 = (m0 != 0 && n0 != 0) ? + min(min(minSize / m0, minSize / n0), coreStatus.k) : coreStatus.k; const int32_t loopNum = MathUtil::CeilDivision(coreStatus.m, m0) * MathUtil::CeilDivision(coreStatus.n, n0) * MathUtil::CeilDivision(coreStatus.k, k0); return loopNum; @@ -1670,12 +1672,12 @@ int MatmulTilingAlgorithm::GetBigPackageCondition(CoreStatusPack &coreStatus, void MatmulTilingAlgorithm::GetBlockDimHelper(const DimFactor& blockDim, CoreStatusPack& coreStatus, BlockDimCalculator& blockDimRes, const MatmulRunParas& params) { - blockDimRes.kNum = params.k32 / blockDim.k * C0_SIZE * REDUCE_BLOCK_SIZE; // contain k * 16 + blockDimRes.kNum = (blockDim.k == 0) ? 0 : params.k32 / blockDim.k * C0_SIZE * REDUCE_BLOCK_SIZE; // contain k * 16 blockDimRes.kBytes = blockDimRes.kNum * INPUTDTYPE_BYTES; // contain k * 16 * 2 coreStatus.batch = MathUtil::CeilDivision(params.batch32, blockDim.batch); coreStatus.m = MathUtil::CeilDivision(params.m32, blockDim.m); coreStatus.n = MathUtil::CeilDivision(params.n32, blockDim.n); - coreStatus.k = params.k32 / blockDim.k; + coreStatus.k = (blockDim.k == 0) ? 0 : params.k32 / blockDim.k; if (tilingIns_->enableSplitK_) { if (params.kMapped != params.k32) { // need check--splitK blockDimRes.kNum = params.kMapped / blockDim.k * NUM_TWO * C0_SIZE * REDUCE_BLOCK_SIZE; -- Gitee From 3a36af0a3f28f8c81e54554aaaf934e89c56bccb Mon Sep 17 00:00:00 2001 From: jiangchengcheng-on Date: Thu, 19 Sep 2024 13:50:09 +0000 Subject: [PATCH 16/17] fix Signed-off-by: jiangchengcheng-on --- lib/matmul/matmul_intf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/matmul/matmul_intf.h b/lib/matmul/matmul_intf.h index 6431b73c..6b62bbf9 100644 --- a/lib/matmul/matmul_intf.h +++ b/lib/matmul/matmul_intf.h @@ -62,7 +62,7 @@ __aicore__ inline void InitCurObj(AscendC::TPipe* tpipe, T& a, Args&&... b) #else namespace matmul { -template , MATMUL_POLICY_DEFAULT_OF(MatmulPolicy)> using Matmul = matmul::MatmulClient; } @@ -176,7 +176,7 @@ using Matmul = matmul::MatmulImpl, MATMUL_POLICY_DEFAULT_OF(MatmulPolicy)> -using Matmul = MatmulServiceAux; +using Matmul = matmul::MatmulServiceAux; } #endif -- Gitee From b26c2d2054c9a35e76569c632bc41080ae328def Mon Sep 17 00:00:00 2001 From: jiangchengcheng-on Date: Thu, 19 Sep 2024 14:04:28 +0000 Subject: [PATCH 17/17] fix error Signed-off-by: jiangchengcheng-on --- lib/matmul/matmul_intf.h | 61 ++++++++++++++++++++++++++++++++-------- 1 file changed, 50 insertions(+), 11 deletions(-) diff --git a/lib/matmul/matmul_intf.h b/lib/matmul/matmul_intf.h index 6b62bbf9..a5f2c127 100644 --- a/lib/matmul/matmul_intf.h +++ b/lib/matmul/matmul_intf.h @@ -25,7 +25,7 @@ #if __CCE_AICORE__ == 220 #ifdef ASCENDC_CUBE_ONLY namespace matmul { -template , MATMUL_POLICY_DEFAULT_OF(MatmulPolicy)> using Matmul = matmul::MatmulImpl; } @@ -62,7 +62,7 @@ __aicore__ inline void InitCurObj(AscendC::TPipe* tpipe, T& a, Args&&... b) #else namespace matmul { -template , MATMUL_POLICY_DEFAULT_OF(MatmulPolicy)> using Matmul = matmul::MatmulClient; } @@ -114,10 +114,11 @@ __aicore__ inline void InitCurObj(AscendC::TPipe* tpipe, T& a, Args&&... b) #define REGIST_MATMUL_OBJ(tpipe, workspace, ...) \ InitCurObj(tpipe, __VA_ARGS__) namespace matmul { -template , MATMUL_POLICY_DEFAULT_OF(MatmulPolicy)> using Matmul = matmul::MatmulImpl; } + #endif #else @@ -152,29 +153,52 @@ __aicore__ inline void InitCurObj(AscendC::TPipe* tpipe, T& a, Args&&... b) } } +#ifdef ASCENDC_TIME_STAMP_ON +#define REGIST_MATMUL_OBJ(tpipe, workspace, ...) \ + InitCurObj(tpipe, __VA_ARGS__); \ + AscendC::AscendCTimeStamp(static_cast(AscendC::TimeStampId::TIME_STAMP_MATMUL_SERVER_OBJ)) +#else #define REGIST_MATMUL_OBJ(tpipe, workspace, ...) \ InitCurObj(tpipe, __VA_ARGS__) +#endif #define REGIST_MATMUL_OBJ_REMOTE(tpipe, workspace, ...) namespace matmul { -template , MATMUL_POLICY_DEFAULT_OF(MatmulPolicy)> using Matmul = matmul::MatmulImpl; } #else +#ifdef ASCENDC_TIME_STAMP_ON #define REGIST_MATMUL_OBJ(tpipe, workspace, ...) \ if ASCEND_IS_AIC { \ AscendC::KfcServer server; \ + AscendC::AscendCTimeStamp(static_cast(AscendC::TimeStampId::TIME_STAMP_MATMUL_SERVER)); \ server.Init(workspace); \ + AscendC::AscendCTimeStamp(static_cast(AscendC::TimeStampId::TIME_STAMP_MATMUL_SERVER_INIT)); \ server.InitObj(tpipe, __VA_ARGS__); \ + AscendC::AscendCTimeStamp(static_cast(AscendC::TimeStampId::TIME_STAMP_MATMUL_SERVER_OBJ)); \ while (server.isRun()) { \ server.Run(__VA_ARGS__); \ }; \ server.Quit(); \ return; \ } +#else +#define REGIST_MATMUL_OBJ(tpipe, workspace, ...) \ + if ASCEND_IS_AIC { \ + AscendC::KfcServer server; \ + server.Init(workspace); \ + server.InitObj(tpipe, __VA_ARGS__); \ + while (server.isRun()) { \ + server.Run(__VA_ARGS__); \ + }; \ + server.Quit(); \ + return; \ + } +#endif namespace matmul { -template , MATMUL_POLICY_DEFAULT_OF(MatmulPolicy)> using Matmul = matmul::MatmulServiceAux; } @@ -185,14 +209,25 @@ using Matmul = matmul::MatmulServiceAux(AscendC::TimeStampId::TIME_STAMP_MATMUL_CLIENT_KFC)); \ + AscendC::SetMatrixKfc(tpipe, &__kfcClient__, 0, workspace, __VA_ARGS__); \ + AscendC::AscendCTimeStamp(static_cast(AscendC::TimeStampId::TIME_STAMP_MATMUL_MATRIX_KFC)); \ + AscendC::WaitEvent(matmul::WORKSPACE_SYNC_ID); \ + AscendC::AscendCTimeStamp(static_cast(AscendC::TimeStampId::TIME_STAMP_MATMUL_WAIT_EVE)) +#else #define REGIST_MATMUL_OBJ(tpipe, workspace, ...) \ AscendC::KfcCommClient __kfcClient__(workspace, AscendC::GetSubBlockIdx()); \ AscendC::g_kfcClient = &__kfcClient__; \ AscendC::SetMatrixKfc(tpipe, &__kfcClient__, 0, workspace, __VA_ARGS__); \ AscendC::WaitEvent(matmul::WORKSPACE_SYNC_ID) #endif +#endif namespace matmul { -template , MATMUL_POLICY_DEFAULT_OF(MatmulPolicy)> using Matmul = matmul::MatmulClient; } @@ -225,16 +260,20 @@ __aicore__ inline void InitCurObj(AscendC::TPipe* tpipe, T& a, Args&&... b) } } +#ifdef ASCENDC_TIME_STAMP_ON +#define REGIST_MATMUL_OBJ(tpipe, workspace, ...) \ + InitCurObj(tpipe, __VA_ARGS__); \ + AscendC::AscendCTimeStamp(static_cast(AscendC::TimeStampId::TIME_STAMP_MATMUL_OBJ)) +#else #define REGIST_MATMUL_OBJ(tpipe, workspace, ...) \ InitCurObj(tpipe, __VA_ARGS__) - +#endif namespace matmul { -template , MATMUL_POLICY_DEFAULT_OF(MatmulPolicy)> using Matmul = matmul::MatmulImpl; -} +} //namespace matmul #endif #endif - -#endif \ No newline at end of file +#endif \ No newline at end of file -- Gitee