From 176cd8f71009dfad861aef3e06a3227908ea894d Mon Sep 17 00:00:00 2001 From: ddd <10165743+oniond@user.noreply.gitee.com> Date: Tue, 16 Sep 2025 09:31:52 +0000 Subject: [PATCH] add comm test Signed-off-by: ddd <10165743+oniond@user.noreply.gitee.com> --- src/comm/kupl.h | 2 +- test/Makefile | 17 +++++++++ test/all_gather_test.cpp | 69 +++++++++++++++++++++++++++++++++ test/comm.sh | 23 +++++++++++ test/transpose_test.cpp | 82 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 192 insertions(+), 1 deletion(-) create mode 100644 test/Makefile create mode 100644 test/all_gather_test.cpp create mode 100644 test/comm.sh create mode 100644 test/transpose_test.cpp diff --git a/src/comm/kupl.h b/src/comm/kupl.h index 0570e28..313677e 100644 --- a/src/comm/kupl.h +++ b/src/comm/kupl.h @@ -12,7 +12,7 @@ * SOFTWARE. */ -#ifdef KUTACC_KUPL_H +#ifndef KUTACC_KUPL_H #define KUTACC_KUPL_H #include diff --git a/test/Makefile b/test/Makefile new file mode 100644 index 0000000..d568857 --- /dev/null +++ b/test/Makefile @@ -0,0 +1,17 @@ +KUTACC_ROOT ?= $(PWD)/.. + +CXX = mpic++ +CXXFLAGS = -I$(KUTACC_ROOT)/include/ -L$(KUTACC_ROOT)/install/lib/ -lkutacc -stdlib=libc++ + +TARGETS = transpose all_gather + +all: $(TARGETS) + +transpose: transpose_test.cpp + $(CXX) -o $@ $^ $(CXXFLAGS) + +all_gather: all_gather_test.cpp + $(CXX) -o $@ $^ $(CXXFLAGS) + +clean: + rm -f $(OBJECTS) $(TARGETS) diff --git a/test/all_gather_test.cpp b/test/all_gather_test.cpp new file mode 100644 index 0000000..71f9788 --- /dev/null +++ b/test/all_gather_test.cpp @@ -0,0 +1,69 @@ +#include +#include +#include +#include "kutacc.h" + +int main(int argc, char *argv[]) +{ + int world_size = 2, rank = 0; + int m = 4, n = 5, len = 1; + int64_t dim = 3, buffer_size = m * n * len; + + MPI_Init(&argc, &argv); + MPI_Comm_size(MPI_COMM_WORLD, &world_size); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + kutacc_initialize(world_size, rank, buffer_size); + + std::vector expect_data(world_size * m * n * len); + int64_t val = 0; + for (int i = 0; i < world_size * m; ++i) { + for (int j = 0; j < n; ++j) { + if (i * n * len + j * len == m * n * len) val = 32; + expect_data[i * n * len + j * len] = val++; + } + } + + std::vector sizes = {m, n, len}; + std::vector strides = {n * len, len, 1}; + std::vector data(m * n * len); + + if (rank == 0) { + val = 0; + } else if (rank == 1) { + val = 32; + } + for (int i = 0; i < m; ++i) { + for (int j = 0; j < n; ++j) { + data[i * n * len + j * len] = val++; + } + } + + std::vector out_sizes = {world_size * m, n, len}; + std::vector out_strides = {n * len, len, 1}; + std::vector out_data(world_size * m * n * len); + + kutacc::TensorWrapper in(data.data(), sizes, strides, dim, kutacc::kBF16); + kutacc::TensorWrapper out(out_data.data(), out_sizes, out_strides, dim, kutacc::kBF16); + + kutacc_af2_all_gather(in.get_tensor(), out.get_tensor()); + + int error = 0; + for (int i = 0; i < world_size * m; ++i) { + for (int j = 0; j < n; ++j) { + if (out_data[i * n * len + j * len] != expect_data[i * n * len + j * len] && error == 0) { + error = 1; + } + } + } + + if (error == 0) { + std::cout << "allgather pass"<< std::endl; + } else { + std::cout << "allgather fail"<< std::endl; + } + + kutacc_finalize(); + MPI_Finalize(); + + return 0; +} diff --git a/test/comm.sh b/test/comm.sh new file mode 100644 index 0000000..724d119 --- /dev/null +++ b/test/comm.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +world_size=2 +num_threads=64 + +function test() +{ + method=$1 + SHMID=kutacc_comm_test mpirun \ + --allow-run-as-root \ + -n ${world_size} \ + --map-by numa \ + -x OMP_NUM_THREADS=${num_threads} \ + ./${method} +} + +function main() +{ + test all_gather + test transpose +} + +main \ No newline at end of file diff --git a/test/transpose_test.cpp b/test/transpose_test.cpp new file mode 100644 index 0000000..e2e8ffc --- /dev/null +++ b/test/transpose_test.cpp @@ -0,0 +1,82 @@ +#include +#include +#include +#include "kutacc.h" + +int main(int argc, char *argv[]) +{ + int world_size = 2, rank = 0; + int m = 5, n = 6, len = 1; + int64_t dim = 3, buffer_size = m * n * len; + int64_t block_n = (n + world_size - 1) / world_size; + + MPI_Init(&argc, &argv); + MPI_Comm_size(MPI_COMM_WORLD, &world_size); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + kutacc_initialize(world_size, rank, buffer_size); + + std::vector expect_data_0(world_size * m * block_n * len); + std::vector expect_data_1(world_size * m * block_n * len); + int64_t val = 0; + for (int i = 0; i < world_size * m; ++i) { + for (int j = 0; j < block_n; ++j) { + if (i * block_n * len + j * len == m * block_n * len) val = 32; + expect_data_0[i * block_n * len + j * len] = val++; + } + for (int j = 0; j < block_n; ++j) { + expect_data_1[i * block_n * len + j * len] = val++; + } + } + + std::vector sizes = {m, n, len}; + std::vector strides = {n * len, len, 1}; + std::vector data(m * n * len); + + if (rank == 0) { + val = 0; + } else if (rank == 1) { + val = 32; + } + for (int i = 0; i < m; ++i) { + for (int j = 0; j < n; ++j) { + data[i * n * len + j * len] = val++; + } + } + + std::vector out_sizes = {world_size * m, block_n, len}; + std::vector out_strides = {block_n * len, len, 1}; + std::vector out_data(world_size * m * block_n * len); + + kutacc::TensorWrapper in(data.data(), sizes, strides, dim, kutacc::kBF16); + kutacc::TensorWrapper out(out_data.data(), out_sizes, out_strides, dim, kutacc::kBF16); + + kutacc_af2_transpose(in.get_tensor(), out.get_tensor()); + + int error = 0; + for (int i = 0; i < world_size * m; ++i) { + for (int j = 0; j < block_n; ++j) { + if (error == 0) { + if (rank == 0) { + if (out_data[i * block_n * len + j * len] != expect_data_0[i * block_n * len + j * len]) { + error = 1; + } + } else if (rank == 1) { + if (out_data[i * block_n * len + j * len] != expect_data_1[i * block_n * len + j * len]) { + error = 1; + } + } + } + } + } + + if (error == 0) { + std::cout << "transpose pass"<< std::endl; + } else { + std::cout << "transpose fail"<< std::endl; + } + + kutacc_finalize(); + MPI_Finalize(); + + return 0; +} -- Gitee