diff --git a/README.md b/README.md index c7b7dc03f72707f94d8cb3ae86a3859b0ca11d37..3c1d2412d5241326984fd04ae7c2688b914a39f7 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ SHMEM - Atlas 800I A2/A3 系列产品 - Atlas 800T A2/A3 系列产品 - 平台:aarch64/x86 -- 配套软件:驱动固件 Ascend HDK 25.0.RC1.1、 CANN 8.2.RC1及之后版本。Ascend HDK版本为商发版本,CANN版本为社区版本,暂无支持商用版本。(参考《[CANN软件安装指南](https://www.hiascend.com/document/detail/zh/canncommercial/81RC1/softwareinst/instg/instg_0000.html?Mode=PmIns&InstallType=local&OS=Ubuntu&Software=cannToolKit)》安装CANN开发套件包以及配套固件和驱动) +- 配套软件:驱动固件 Ascend HDK 25.0.RC1.1、 CANN 8.2.RC1及之后版本。Ascend HDK版本为商发版本,CANN版本为社区版本,暂无支持商用版本。(参考《[CANN软件安装指南](https://www.hiascend.com/document/detail/zh/canncommercial/82RC1/softwareinst/instg/instg_0000.html?Mode=PmIns&InstallType=local&OS=Ubuntu&Software=cannToolKit)》安装CANN开发套件包以及配套固件和驱动) cmake >= 3.19 GLIBC >= 2.28 diff --git a/docs/quickstart.md b/docs/quickstart.md index ca7a1964204b04f29c15ff70dd56de7e0ca9ca04..b076c0d40c542744aa08a9a8e9d29a47273cdf41 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -25,7 +25,7 @@ - Atlas 800I A2/A3 系列产品 - Atlas 800T A2/A3 系列产品 - 平台:aarch64/x86 -- 配套软件:驱动固件 Ascend HDK 25.0.RC1.1、 CANN 8.2.RC1及之后版本。Ascend HDK版本为商发版本,CANN版本为社区版本,暂无支持商用版本。(参考《[CANN软件安装指南](https://www.hiascend.com/document/detail/zh/canncommercial/81RC1/softwareinst/instg/instg_0000.html?Mode=PmIns&InstallType=local&OS=Ubuntu&Software=cannToolKit)》安装CANN开发套件包以及配套固件和驱动) +- 配套软件:驱动固件 Ascend HDK 25.0.RC1.1、 CANN 8.2.RC1及之后版本。Ascend HDK版本为商发版本,CANN版本为社区版本,暂无支持商用版本。(参考《[CANN软件安装指南](https://www.hiascend.com/document/detail/zh/canncommercial/82RC1/softwareinst/instg/instg_0000.html?Mode=PmIns&InstallType=local&OS=Ubuntu&Software=cannToolKit)》安装CANN开发套件包以及配套固件和驱动) cmake >= 3.19 GLIBC >= 2.28 diff --git a/examples/allgather/main.cpp b/examples/allgather/main.cpp index c26782d4eeb8ea972258a7377bf3ec45a8079beb..af8d6d520e858d2640adf7c7232932f644cb87f7 100644 --- a/examples/allgather/main.cpp +++ b/examples/allgather/main.cpp @@ -19,9 +19,9 @@ #include #include #include -#include -#include -#include +#include +#include +#include #include "fp16_t.h" #include "bfloat16.h" diff --git a/examples/matmul_allreduce/main.cpp b/examples/matmul_allreduce/main.cpp index 7fbdb92d063e09b6a8884878710553ee4dac2a1c..a9356b70a3eaedfcc94e19b2d2c863527b4f03c5 100644 --- a/examples/matmul_allreduce/main.cpp +++ b/examples/matmul_allreduce/main.cpp @@ -26,9 +26,9 @@ #include #include #include -#include -#include -#include +#include +#include +#include // from catlass @@ -286,27 +286,27 @@ int main(int argc, char **argv) size_t workspaceSize = static_cast(m) * n * sizeof(__fp16); uint8_t *aDevice; - ACL_CHECK(aclrtMalloc((void **)(&aDevice), aSize, ACL_MEM_MALLOC_HUGE_FIRST)); + ACL_CHECK(aclrtMalloc(reinterpret_cast(&aDevice), aSize, ACL_MEM_MALLOC_HUGE_FIRST)); uint8_t *aHost; - ACL_CHECK(aclrtMallocHost((void **)(&aHost), aSize)); + ACL_CHECK(aclrtMallocHost(reinterpret_cast(&aHost), aSize)); std::string dataPath = argv[8]; std::string aPath = dataPath + "/rank_" + std::to_string(rankId) + "_a.bin"; ReadFile(aPath.c_str(), aHost, aSize); ACL_CHECK(aclrtMemcpy(aDevice, aSize, aHost, aSize, ACL_MEMCPY_HOST_TO_DEVICE)); uint8_t *bDevice; - ACL_CHECK(aclrtMalloc((void **)(&bDevice), bSize, ACL_MEM_MALLOC_HUGE_FIRST)); + ACL_CHECK(aclrtMalloc(reinterpret_cast(&bDevice), bSize, ACL_MEM_MALLOC_HUGE_FIRST)); uint8_t *bHost; - ACL_CHECK(aclrtMallocHost((void **)(&bHost), bSize)); + ACL_CHECK(aclrtMallocHost(reinterpret_cast(&bHost), bSize)); std::string bPath = dataPath + "/rank_" + std::to_string(rankId) + "_b.bin"; ReadFile(bPath.c_str(), bHost, bSize); ACL_CHECK(aclrtMemcpy(bDevice, bSize, bHost, bSize, ACL_MEMCPY_HOST_TO_DEVICE)); uint8_t *cDevice; - ACL_CHECK(aclrtMalloc((void **)(&cDevice), cSize, ACL_MEM_MALLOC_HUGE_FIRST)); + ACL_CHECK(aclrtMalloc(reinterpret_cast(&cDevice), cSize, ACL_MEM_MALLOC_HUGE_FIRST)); uint8_t *cHost; - ACL_CHECK(aclrtMallocHost((void **)(&cHost), cSize)); - memset(cHost, 0, cSize); // 零初始化 C 矩阵 + ACL_CHECK(aclrtMallocHost(reinterpret_cast(&cHost), cSize)); + explicit_bzero(cHost, cSize); // 零初始化 C 矩阵 ACL_CHECK(aclrtMemcpy(cDevice, cSize, cHost, cSize, ACL_MEMCPY_HOST_TO_DEVICE)); void *symmPtr = shmem_malloc((204 * 1024 * 1024) * sizeof(__fp16)); diff --git a/src/host/init/shmem_init.cpp b/src/host/init/shmem_init.cpp index fccc2880dfee50138c02542696c631c1f98e64de..57baef6d98e52fb44307bbc5d31fc69afba6f816 100644 --- a/src/host/init/shmem_init.cpp +++ b/src/host/init/shmem_init.cpp @@ -7,8 +7,8 @@ * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. * See LICENSE in the root of the software repository for the full text of the License. */ -#include -#include +#include +#include #include #include #include "acl/acl.h" diff --git a/src/host/mem/shmem_rma.cpp b/src/host/mem/shmem_rma.cpp index a0c86f574bd953d14cd230cf6ea2a3a5ef7c0a05..7f3ffd0722328946b1f23584f240ced636e7ee19 100644 --- a/src/host/mem/shmem_rma.cpp +++ b/src/host/mem/shmem_rma.cpp @@ -27,7 +27,7 @@ void* shmem_ptr(void *ptr, int32_t pe) uint64_t offset = (uint64_t)ptr - (uint64_t)shm::g_state.heap_base; void *symm_ptr = shm::g_state.p2p_heap_base[pe]; if (symm_ptr != nullptr) { - symm_ptr = (void *)((uint64_t)symm_ptr + offset); + symm_ptr = reinterpret_cast(reinterpret_cast(symm_ptr) + offset); return symm_ptr; } SHM_LOG_ERROR("shmem_ptr Failed. PE: " << shmem_my_pe() << " g_state.p2p_heap_base contains nullptr, Please Check Init Status!!"); diff --git a/src/host/sync/shmemi_sync.cpp b/src/host/sync/shmemi_sync.cpp index 1f30aa7b3f88bbf81c082a5d841155834f707e48..5b232e60c7e9c4b0b4ce39fa41bd66d02e529806 100644 --- a/src/host/sync/shmemi_sync.cpp +++ b/src/host/sync/shmemi_sync.cpp @@ -7,10 +7,10 @@ * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. * See LICENSE in the root of the software repository for the full text of the License. */ -#include -#include -#include -#include +#include +#include +#include +#include #include #include diff --git a/src/host/team/shmem_team.cpp b/src/host/team/shmem_team.cpp index 75093f62a334dac61cf66f87c28215b9b2ed2f3c..ad7e82ff5e63ef6c47c7f2e4cb6bdb6290afd84a 100644 --- a/src/host/team/shmem_team.cpp +++ b/src/host/team/shmem_team.cpp @@ -7,10 +7,10 @@ * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. * See LICENSE in the root of the software repository for the full text of the License. */ -#include -#include -#include -#include +#include +#include +#include +#include #include #include diff --git a/tests/unittest/mem/gm_mem/gm_mem_host_test.cpp b/tests/unittest/mem/gm_mem/gm_mem_host_test.cpp index 6129407f8574555d7784ccec866a05fba6e03dff..cf5271d365feca3a2eb1c81b11d620c8b2878aca 100644 --- a/tests/unittest/mem/gm_mem/gm_mem_host_test.cpp +++ b/tests/unittest/mem/gm_mem/gm_mem_host_test.cpp @@ -31,7 +31,7 @@ SHMEM_FUNC_TYPE_HOST(TEST_FUNC); #define TEST_PUT_GET(NAME, TYPE) \ static void test_##NAME##_put_get(aclrtStream stream, uint8_t *gva, uint32_t rank_id, uint32_t rank_size) \ { \ - int total_size = input_length * (int)rank_size; \ + int total_size = input_length * static_cast(rank_size); \ size_t input_size = total_size * sizeof(TYPE); \ \ std::vector input(total_size, 0); \ diff --git a/tests/unittest/mem/scalar/scalar_host_test.cpp b/tests/unittest/mem/scalar/scalar_host_test.cpp index f8984425e92023f9f14e3333f89db4ee82145f30..5311ec325bd592747fa9dc64660258816f999089 100644 --- a/tests/unittest/mem/scalar/scalar_host_test.cpp +++ b/tests/unittest/mem/scalar/scalar_host_test.cpp @@ -32,14 +32,14 @@ SHMEM_FUNC_TYPE_HOST(PUT_ONE_NUM_DO); { \ TYPE *y_host; \ size_t input_size = total_size * sizeof(TYPE); \ - EXPECT_EQ(aclrtMallocHost((void **)(&y_host), input_size), 0); /* size = 1024 */ \ + EXPECT_EQ(aclrtMallocHost(reinterpret_cast(&y_host), input_size), 0); /* size = 1024 */ \ \ void *dev_ptr; \ EXPECT_EQ(aclrtMalloc(&dev_ptr, input_size, ACL_MEM_MALLOC_NORMAL_ONLY), 0); \ \ uint32_t block_dim = 1; \ \ - TYPE value = static_cast(test_offset) + (TYPE)rank_id; \ + TYPE value = static_cast(test_offset) + static_cast(rank_id); \ EXPECT_EQ(aclrtMemcpy(dev_ptr, 1 * sizeof(TYPE), &value, 1 * sizeof(TYPE), ACL_MEMCPY_DEVICE_TO_HOST), 0); \ void *ptr = shmem_malloc(total_size); \ put_##NAME##_one_num_do(block_dim, stream, shmemx_get_ffts_config(), (uint8_t *)ptr, (uint8_t *)dev_ptr); \ diff --git a/tests/unittest/mem/shmem_host_heap_test.cpp b/tests/unittest/mem/shmem_host_heap_test.cpp index 4dea69eb3e04d52b8266bbe4549c6730a7bf0e9f..e326c1c95a815fb4ff3f9c1580ee2b7bb1e93fd0 100644 --- a/tests/unittest/mem/shmem_host_heap_test.cpp +++ b/tests/unittest/mem/shmem_host_heap_test.cpp @@ -15,7 +15,7 @@ #include "shmemi_host_common.h" #include "unittest_main_test.h" -static uint8_t *const heap_memory_start = (uint8_t *)(ptrdiff_t)0x100000000UL; +static uint8_t *const heap_memory_start = reinterpret_cast(static_cast(0x100000000UL)); static uint64_t heap_memory_size = 4UL * 1024UL * 1024UL; static aclrtStream heap_memory_stream; @@ -141,7 +141,7 @@ TEST_F(ShareMemoryManagerTest, calloc_one_piece_success) auto ptr = static_cast(shmem_calloc(nmemb, elemSize)); EXPECT_NE(nullptr, ptr); uint32_t *ptr_host; - ASSERT_EQ(aclrtMallocHost((void**)&ptr_host, sizeof(uint32_t) * nmemb), 0); + ASSERT_EQ(aclrtMallocHost(reinterpret_cast(&ptr_host), sizeof(uint32_t) * nmemb), 0); ASSERT_EQ(aclrtMemcpy(ptr_host, sizeof(uint32_t) * nmemb, ptr, sizeof(uint32_t) * nmemb, ACL_MEMCPY_DEVICE_TO_HOST), 0); for (size_t i = 0; i < nmemb; ++i) { EXPECT_EQ(ptr_host[i], 0u); @@ -164,7 +164,7 @@ TEST_F(ShareMemoryManagerTest, calloc_full_space_success) auto ptr = shmem_calloc(nmemb, heap_memory_size / nmemb); EXPECT_NE(nullptr, ptr); uint32_t *ptr_host; - ASSERT_EQ(aclrtMallocHost((void**)&ptr_host, sizeof(uint32_t) * nmemb), 0); + ASSERT_EQ(aclrtMallocHost(reinterpret_cast(&ptr_host), sizeof(uint32_t) * nmemb), 0); ASSERT_EQ(aclrtMemcpy(ptr_host, heap_memory_size, ptr, heap_memory_size, ACL_MEMCPY_DEVICE_TO_HOST), 0); for (size_t i = 0; i < nmemb; ++i) { EXPECT_EQ(ptr_host[i], 0u); diff --git a/tests/unittest/mem/shmem_ptr_host_test.cpp b/tests/unittest/mem/shmem_ptr_host_test.cpp index 45b9e6916ddc16219f97a08067d70706dd6fa127..7955805f4ce83df352ccacdc59b3683ee4c2fdf2 100644 --- a/tests/unittest/mem/shmem_ptr_host_test.cpp +++ b/tests/unittest/mem/shmem_ptr_host_test.cpp @@ -20,7 +20,7 @@ static int32_t test_get_device_ptr(aclrtStream stream, uint8_t *ptr, int rank_id { int *y_host; size_t input_size = 2 * sizeof(int); - EXPECT_EQ(aclrtMallocHost((void **)(&y_host), input_size), 0); + EXPECT_EQ(aclrtMallocHost(reinterpret_cast(&y_host), input_size), 0); uint32_t block_dim = 1; int32_t device_id; diff --git a/tests/unittest/sync/barrier/barrier_host_test.cpp b/tests/unittest/sync/barrier/barrier_host_test.cpp index 30a3d3dd944c8611ce0c384a6cc14971d44a9f90..9dbe7895db67318619248e02d64b26377a80e6be 100644 --- a/tests/unittest/sync/barrier/barrier_host_test.cpp +++ b/tests/unittest/sync/barrier/barrier_host_test.cpp @@ -25,10 +25,10 @@ static void test_barrier_black_box(int32_t rank_id, int32_t n_ranks, uint64_t lo test_init(rank_id, n_ranks, local_mem_size, &stream); ASSERT_NE(stream, nullptr); - uint64_t *addr_dev = (uint64_t *)shmem_malloc(sizeof(uint64_t)); + uint64_t *addr_dev = reinterpret_cast(shmem_malloc(sizeof(uint64_t))); ASSERT_EQ(aclrtMemset(addr_dev, sizeof(uint64_t), 0, sizeof(uint64_t)), 0); uint64_t *addr_host; - ASSERT_EQ(aclrtMallocHost((void **)&addr_host, sizeof(uint64_t)), 0); + ASSERT_EQ(aclrtMallocHost(reinterpret_cast(&addr_host), sizeof(uint64_t)), 0); for (int32_t i = 1; i <= SHMEM_BARRIER_TEST_NUM; i++) { std::cout << "[TEST] barriers test blackbox rank_id: " << rank_id << " time: " << i << std::endl; @@ -39,10 +39,10 @@ static void test_barrier_black_box(int32_t rank_id, int32_t n_ranks, uint64_t lo shm::shmemi_control_barrier_all(); } - uint64_t *addr_dev_vec = (uint64_t *)shmem_malloc(sizeof(uint64_t)); + uint64_t *addr_dev_vec = reinterpret_cast(shmem_malloc(sizeof(uint64_t))); ASSERT_EQ(aclrtMemset(addr_dev_vec, sizeof(uint64_t), 0, sizeof(uint64_t)), 0); uint64_t *addr_host_vec; - ASSERT_EQ(aclrtMallocHost((void **)&addr_host_vec, sizeof(uint64_t)), 0); + ASSERT_EQ(aclrtMallocHost(reinterpret_cast(&addr_host_vec), sizeof(uint64_t)), 0); for (int32_t i = 1; i <= SHMEM_BARRIER_TEST_NUM; i++) { std::cout << "[TEST] vec barriers test blackbox rank_id: " << rank_id << " time: " << i << std::endl; @@ -76,15 +76,15 @@ static void test_barrier_black_box_odd_team(int32_t rank_id, int32_t n_ranks, ui int team_size = n_ranks / 2; shmem_team_split_strided(SHMEM_TEAM_WORLD, start, stride, team_size, &team_odd); - uint64_t *addr_dev = (uint64_t *)shmem_malloc(sizeof(uint64_t)); + uint64_t *addr_dev = reinterpret_cast(shmem_malloc(sizeof(uint64_t))); ASSERT_EQ(aclrtMemset(addr_dev, sizeof(uint64_t), 0, sizeof(uint64_t)), 0); uint64_t *addr_host; - ASSERT_EQ(aclrtMallocHost((void **)&addr_host, sizeof(uint64_t)), 0); + ASSERT_EQ(aclrtMallocHost(reinterpret_cast(&addr_host), sizeof(uint64_t)), 0); - uint64_t *addr_dev_vec = (uint64_t *)shmem_malloc(sizeof(uint64_t)); + uint64_t *addr_dev_vec = reinterpret_cast(shmem_malloc(sizeof(uint64_t))); ASSERT_EQ(aclrtMemset(addr_dev_vec, sizeof(uint64_t), 0, sizeof(uint64_t)), 0); uint64_t *addr_host_vec; - ASSERT_EQ(aclrtMallocHost((void **)&addr_host_vec, sizeof(uint64_t)), 0); + ASSERT_EQ(aclrtMallocHost(reinterpret_cast(&addr_host_vec), sizeof(uint64_t)), 0); if (rank_id & 1) { for (int32_t i = 1; i <= SHMEM_BARRIER_TEST_NUM; i++) { diff --git a/tests/unittest/sync/order/order_host_test.cpp b/tests/unittest/sync/order/order_host_test.cpp index 409c876d5327459ad8603cc1ab233dfa93e4c886..368f6dc0bc867ab0426d389a7adcdbd7279fdffb 100644 --- a/tests/unittest/sync/order/order_host_test.cpp +++ b/tests/unittest/sync/order/order_host_test.cpp @@ -25,7 +25,7 @@ static void test_quiet_order(int32_t rank_id, int32_t n_ranks, uint64_t local_me ASSERT_NE(stream, nullptr); int total_size = 64; - uint64_t *dev_ptr = (uint64_t*)shmem_malloc(total_size * sizeof(uint64_t)); + uint64_t *dev_ptr = reinterpret_cast(shmem_malloc(total_size * sizeof(uint64_t))); ASSERT_EQ(aclrtMemset(dev_ptr, 64 * sizeof(uint64_t), 0, total_size * sizeof(uint64_t)), 0); std::vector host_buf(total_size, 0); @@ -61,7 +61,7 @@ static void test_fence_order(int32_t rank_id, int32_t n_ranks, uint64_t local_me ASSERT_NE(stream, nullptr); int total_size = 64; - uint64_t *addr_dev = (uint64_t *)shmem_malloc(total_size * sizeof(uint64_t)); + uint64_t *addr_dev = reinterpret_cast(shmem_malloc(total_size * sizeof(uint64_t))); ASSERT_EQ(aclrtMemset(addr_dev, total_size * sizeof(uint64_t), 0, total_size * sizeof(uint64_t)), 0); std::vector addr_host(total_size, 0); diff --git a/tests/unittest/sync/p2p/p2p_host_test.cpp b/tests/unittest/sync/p2p/p2p_host_test.cpp index b6d0e1c18fd1940da98bab421951d794e118663f..bf8bda79169f1b37311b35808c7e24918038a525 100644 --- a/tests/unittest/sync/p2p/p2p_host_test.cpp +++ b/tests/unittest/sync/p2p/p2p_host_test.cpp @@ -21,7 +21,7 @@ static void test_p2p(int rank_id, int rank_size, uint64_t local_mem_size) { aclrtStream stream; test_init(rank_id, rank_size, local_mem_size, &stream); - int32_t *addr_dev = (int32_t *)shmem_malloc(sizeof(int32_t)); + int32_t *addr_dev = reinterpret_cast(shmem_malloc(sizeof(int32_t))); ASSERT_EQ(aclrtMemset(addr_dev, sizeof(int32_t), 0, sizeof(int32_t)), 0); p2p_chain_do(stream, shmemx_get_ffts_config(), (uint8_t *)addr_dev, rank_id, rank_size); ASSERT_EQ(aclrtSynchronizeStream(stream), 0); diff --git a/tests/unittest/team/team/team_host_test.cpp b/tests/unittest/team/team/team_host_test.cpp index aea742d74ca768bbba40ad1582c6bbcb2464dc2a..1e7466a490b1cf37d006904da47fe0feacbe9c77 100644 --- a/tests/unittest/team/team/team_host_test.cpp +++ b/tests/unittest/team/team/team_host_test.cpp @@ -24,7 +24,7 @@ static int32_t test_get_device_state(aclrtStream stream, uint8_t *gva, uint32_t { int *y_host; size_t input_size = 1024 * sizeof(int); - EXPECT_EQ(aclrtMallocHost((void **) (&y_host), input_size), 0); // size = 1024 + EXPECT_EQ(aclrtMallocHost(reinterpret_cast(&y_host), input_size), 0); // size = 1024 uint32_t block_dim = 1; void *ptr = shmem_malloc(1024); diff --git a/tests/unittest/team/team_allgather/team_allgather_test.cpp b/tests/unittest/team/team_allgather/team_allgather_test.cpp index 67016c5a852ed32eb659fd67adad7d5fbad03a0a..185ccf025876c99982a19eda85d39c323709cf06 100644 --- a/tests/unittest/team/team_allgather/team_allgather_test.cpp +++ b/tests/unittest/team/team_allgather/team_allgather_test.cpp @@ -49,7 +49,7 @@ void test_shmem_team_all_gather(int rank_id, int n_ranks, uint64_t local_mem_siz // Check results int32_t *y_host; size_t input_size = team_size * trans_size * sizeof(int32_t); - EXPECT_EQ(aclrtMallocHost((void **) (&y_host), input_size), 0); + EXPECT_EQ(aclrtMallocHost(reinterpret_cast(&y_host), input_size), 0); EXPECT_EQ(aclrtMemcpy(y_host, input_size, ptr, input_size, ACL_MEMCPY_DEVICE_TO_HOST), 0); for (int i = 0; i < team_size; i++) {