diff --git a/.gitignore b/.gitignore index c2e309d16223c13413003dc624922047b7722e86..bbdfed364428fc19a52f497fc226b771e01d11a0 100644 --- a/.gitignore +++ b/.gitignore @@ -31,7 +31,11 @@ Makefile # IDE .vscode .idea +.clangd # Python files *__pycache__* .pytest_cache + +# Third party libs +.mrtlibs/ diff --git a/CMakeLists.txt b/CMakeLists.txt index aa6bc81e1180a918c0e10f57e29752671016ee8b..b49125ff2f51c06aba0901e7f46b22c9cd4b353b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -38,5 +38,9 @@ if(ENABLE_TORCH_FRONT) endif() endif() +set(TOP_DIR ${PROJECT_SOURCE_DIR}) +include(${PROJECT_SOURCE_DIR}/cmake/utils.cmake) +include(${PROJECT_SOURCE_DIR}/cmake/securec.cmake) + add_subdirectory(${PROJECT_SOURCE_DIR}/inferrt/src) add_subdirectory(${PROJECT_SOURCE_DIR}/tests) \ No newline at end of file diff --git a/cmake/securec.cmake b/cmake/securec.cmake new file mode 100644 index 0000000000000000000000000000000000000000..2b7ae7bf74ce367080c6541abeaa2f2c911f3452 --- /dev/null +++ b/cmake/securec.cmake @@ -0,0 +1,25 @@ +set(securec_USE_STATIC_LIBS ON) + +if(MSVC) + # add "/EHsc", for vs2019 warning C4530 about securec + set(securec_CXXFLAGS "${CMAKE_CXX_FLAGS} /EHsc") +else() + set(securec_CXXFLAGS "${CMAKE_CXX_FLAGS}") +endif() + +# libboundscheck-v1.1.16 +set(REQ_URL "https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.16.zip") +set(SHA256 "5119bda1ee96440c1a45e23f0cb8b079cc6697e052c4a78f27d0869f84ba312b") + +mrt_add_pkg(securec + VER 1.1.16 + LIBS securec + URL ${REQ_URL} + SHA256 ${SHA256} + CMAKE_OPTION ${CMAKE_OPTION} -DTARGET_OHOS_LITE=OFF + PATCHES ${CMAKE_SOURCE_DIR}/third_party/patch/securec/securec.patch001 + ) + +include_directories(${securec_INC}) +include_directories(${securec_INC}/../) +add_library(mrt::securec ALIAS securec::securec) diff --git a/cmake/utils.cmake b/cmake/utils.cmake new file mode 100644 index 0000000000000000000000000000000000000000..3458ecc1cce6d33f12cbcf472411141d19377f1a --- /dev/null +++ b/cmake/utils.cmake @@ -0,0 +1,453 @@ +include(FetchContent) +set(FETCHCONTENT_QUIET OFF) + +if(DEFINED ENV{MRTLIBS_CACHE_PATH}) + set(_MRT_LIB_CACHE $ENV{MRTLIBS_CACHE_PATH}) +else() + set(_MRT_LIB_CACHE ${CMAKE_BINARY_DIR}/.mrtlib) +endif() +message("MRT LIBS CACHE PATH: ${_MRT_LIB_CACHE}") + +if(NOT EXISTS ${_MRT_LIB_CACHE}) + file(MAKE_DIRECTORY ${_MRT_LIB_CACHE}) +endif() + +if(DEFINED ENV{MRTLIBS_SERVER} AND NOT ENABLE_GITEE) + set(LOCAL_LIBS_SERVER $ENV{MRTLIBS_SERVER}) + message("LOCAL_LIBS_SERVER: ${LOCAL_LIBS_SERVER}") +endif() + +if(LOCAL_LIBS_SERVER) + if(NOT ENV{no_proxy}) + set(ENV{no_proxy} "${LOCAL_LIBS_SERVER}") + else() + string(FIND $ENV{no_proxy} ${LOCAL_LIBS_SERVER} IP_POS) + if(${IP_POS} EQUAL -1) + set(ENV{no_proxy} "$ENV{no_proxy},${LOCAL_LIBS_SERVER}") + endif() + endif() +endif() + +find_package(Patch) +message(PATCH_EXECUTABLE = ${Patch_EXECUTABLE}) + +function(__exec_cmd) + set(options) + set(oneValueArgs WORKING_DIRECTORY) + set(multiValueArgs COMMAND) + + cmake_parse_arguments(EXEC "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + execute_process(COMMAND ${EXEC_COMMAND} + WORKING_DIRECTORY ${EXEC_WORKING_DIRECTORY} + RESULT_VARIABLE RESULT) + if(NOT RESULT EQUAL "0") + message(FATAL_ERROR "error! when ${EXEC_COMMAND} in ${EXEC_WORKING_DIRECTORY}") + endif() +endfunction() + +function(__download_pkg_with_git pkg_name pkg_url pkg_git_commit pkg_sha256) + + if(LOCAL_LIBS_SERVER) + set(pkg_url "http://${LOCAL_LIBS_SERVER}:8081/libs/${pkg_name}/${pkg_git_commit}") + FetchContent_Declare( + ${pkg_name} + URL ${pkg_url} + URL_HASH SHA256=${pkg_sha256} + ) + else() + FetchContent_Declare( + ${pkg_name} + GIT_REPOSITORY ${pkg_url} + GIT_TAG ${pkg_git_commit}) + endif() + FetchContent_GetProperties(${pkg_name}) + message("download: ${${pkg_name}_SOURCE_DIR} , ${pkg_name} , ${pkg_url}") + if(NOT ${pkg_name}_POPULATED) + FetchContent_Populate(${pkg_name}) + set(${pkg_name}_SOURCE_DIR ${${pkg_name}_SOURCE_DIR} PARENT_SCOPE) + endif() + +endfunction() + +function(__download_pkg pkg_name pkg_url pkg_sha256) + set(custom_func "") + if(ARGN) + list(GET ARGN 0 custom_func) + endif() + + if(LOCAL_LIBS_SERVER) + set(REGEX_IP_ADDRESS "^([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+)$") + get_filename_component(_URL_FILE_NAME ${pkg_url} NAME) + if(${LOCAL_LIBS_SERVER} MATCHES ${REGEX_IP_ADDRESS}) + set(pkg_url "http://${LOCAL_LIBS_SERVER}:8081/libs/${pkg_name}/${_URL_FILE_NAME}" ${pkg_url}) + else() + set(pkg_url "https://${LOCAL_LIBS_SERVER}/libs/${pkg_name}/${_URL_FILE_NAME}" ${pkg_url}) + endif() + endif() + + FetchContent_Declare( + ${pkg_name} + URL ${pkg_url} + URL_HASH SHA256=${pkg_sha256} + ) + FetchContent_GetProperties(${pkg_name}) + message("download: ${${pkg_name}_SOURCE_DIR} , ${pkg_name} , ${pkg_url}") + if(NOT ${pkg_name}_POPULATED) + FetchContent_Populate(${pkg_name}) + set(${pkg_name}_SOURCE_DIR ${${pkg_name}_SOURCE_DIR} PARENT_SCOPE) + + if(custom_func) + cmake_language(CALL ${custom_func}) + endif() + endif() + +endfunction() + +function(__find_pkg_then_add_target pkg_name pkg_exe lib_path) + set(options) + set(oneValueArgs PATH) + set(multiValueArgs SUFFIXES_PATH NAMES) + cmake_parse_arguments(LIB "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + unset(${pkg_name}_LIBS) + + message("_FIND:${${pkg_name}_BASE_DIR}") + + if(pkg_exe) + unset(${pkg_exe}_EXE CACHE) + find_program(${pkg_exe}_EXE ${pkg_exe} PATHS ${${pkg_name}_BASE_DIR}/bin NO_DEFAULT_PATH) + if(NOT ${pkg_exe}_EXE) + return() + endif() + add_executable(${pkg_name}::${pkg_exe} IMPORTED GLOBAL) + set_target_properties(${pkg_name}::${pkg_exe} PROPERTIES + IMPORTED_LOCATION ${${pkg_exe}_EXE} + ) + message("found ${${pkg_exe}_EXE}") + endif() + + foreach(_LIB_NAME ${LIB_NAMES}) + set(_LIB_SEARCH_NAME ${_LIB_NAME}) + if(MSVC AND ${pkg_name}_Debug) + set(_LIB_SEARCH_NAME ${_LIB_SEARCH_NAME}d) + endif() + set(_LIB_TYPE SHARED) + if(${pkg_name}_USE_STATIC_LIBS) + set(_LIB_SEARCH_NAME "${CMAKE_STATIC_LIBRARY_PREFIX}${_LIB_SEARCH_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}") + set(_LIB_TYPE STATIC) + endif() + set(${_LIB_NAME}_LIB ${_LIB_NAME}_LIB-NOTFOUND) + if(APPLE) + find_library(${_LIB_NAME}_LIB ${_LIB_SEARCH_NAME} PATHS ${${pkg_name}_BASE_DIR}/${lib_path} + PATH_SUFFIXES ${LIB_SUFFIXES_PATH} NO_DEFAULT_PATH NO_CMAKE_FIND_ROOT_PATH) + else() + find_library(${_LIB_NAME}_LIB ${_LIB_SEARCH_NAME} PATHS ${${pkg_name}_BASE_DIR}/${lib_path} + PATH_SUFFIXES ${LIB_SUFFIXES_PATH} NO_DEFAULT_PATH) + endif() + if(NOT ${_LIB_NAME}_LIB) + message("not find ${_LIB_SEARCH_NAME} in path: ${${pkg_name}_BASE_DIR}/${lib_path}") + return() + endif() + + add_library(${pkg_name}::${_LIB_NAME} ${_LIB_TYPE} IMPORTED GLOBAL) + if(WIN32 AND ${_LIB_TYPE} STREQUAL "SHARED") + if(DEBUG_MODE) + set_target_properties(${pkg_name}::${_LIB_NAME} PROPERTIES IMPORTED_IMPLIB_DEBUG ${${_LIB_NAME}_LIB}) + else() + set_target_properties(${pkg_name}::${_LIB_NAME} PROPERTIES IMPORTED_IMPLIB_RELEASE ${${_LIB_NAME}_LIB}) + endif() + else() + set_target_properties(${pkg_name}::${_LIB_NAME} PROPERTIES IMPORTED_LOCATION ${${_LIB_NAME}_LIB}) + endif() + + if(EXISTS ${${pkg_name}_BASE_DIR}/include) + set_target_properties(${pkg_name}::${_LIB_NAME} PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${${pkg_name}_BASE_DIR}/include") + endif() + + list(APPEND ${pkg_name}_LIBS ${pkg_name}::${_LIB_NAME}) + message("found ${${_LIB_NAME}_LIB}") + STRING(REGEX REPLACE "(.+)/(.+)" "\\1" LIBPATH ${${_LIB_NAME}_LIB}) + unset(${pkg_name}_LIBPATH CACHE) + set(${pkg_name}_LIBPATH ${LIBPATH} CACHE STRING INTERNAL) + endforeach() + + set(${pkg_name}_LIBS ${${pkg_name}_LIBS} PARENT_SCOPE) +endfunction() + +function(mrt_add_pkg pkg_name) + + set(options) + set(oneValueArgs URL SHA256 GIT_REPOSITORY GIT_TAG VER EXE DIR HEAD_ONLY CMAKE_PATH RELEASE + LIB_PATH CUSTOM_CMAKE CUSTOM_SUBMODULE_DOWNLOAD CUSTOM_SUBMODULE_INFO) + set(multiValueArgs + CMAKE_OPTION LIBS PRE_CONFIGURE_COMMAND CONFIGURE_COMMAND BUILD_OPTION INSTALL_INCS + INSTALL_LIBS PATCHES SUBMODULES SOURCEMODULES ONLY_MAKE ONLY_MAKE_INCS ONLY_MAKE_LIBS + LIB_SUFFIXES_PATH) + cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + if(NOT PKG_LIB_PATH) + set(PKG_LIB_PATH lib) + endif() + + if(NOT PKG_EXE) + set(PKG_EXE 0) + endif() + + set(__FIND_PKG_NAME ${pkg_name}) + string(TOLOWER ${pkg_name} pkg_name) + message("pkg name:${__FIND_PKG_NAME},${pkg_name}") + + set(${pkg_name}_PATCHES_HASH) + foreach(_PATCH ${PKG_PATCHES}) + file(SHA256 ${_PATCH} _PF_SHA256) + set(${pkg_name}_PATCHES_HASH "${${pkg_name}_PATCHES_HASH},${_PF_SHA256}") + endforeach() + + # strip directory variables to ensure third party packages are installed in consistent locations + string(REPLACE ${TOP_DIR} "" ARGN_STRIPPED ${ARGN}) + string(REPLACE ${_MRT_LIB_CACHE} "" ARGN_STRIPPED ${ARGN_STRIPPED}) + # check options + set(${pkg_name}_CONFIG_TXT + "${CMAKE_CXX_COMPILER_VERSION}-${CMAKE_C_COMPILER_VERSION} + ${ARGN_STRIPPED}-${${pkg_name}_USE_STATIC_LIBS}-${${pkg_name}_PATCHES_HASH} + ${${pkg_name}_CXXFLAGS}-${${pkg_name}_CFLAGS}-${${pkg_name}_LDFLAGS}") + if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") + set(${pkg_name}_CONFIG_TXT "${${pkg_name}_CONFIG_TXT}--${CMAKE_OSX_DEPLOYMENT_TARGET}") + endif() + if(PKG_CUSTOM_SUBMODULE_INFO) + set(${pkg_name}_CONFIG_TXT "${${pkg_name}_CONFIG_TXT}-${PKG_CUSTOM_SUBMODULE_INFO}") + endif() + string(REPLACE ";" "-" ${pkg_name}_CONFIG_TXT ${${pkg_name}_CONFIG_TXT}) + string(SHA256 ${pkg_name}_CONFIG_HASH ${${pkg_name}_CONFIG_TXT}) + + message("${pkg_name} config hash: ${${pkg_name}_CONFIG_HASH}") + + set(${pkg_name}_BASE_DIR ${_MRT_LIB_CACHE}/${pkg_name}_${PKG_VER}_${${pkg_name}_CONFIG_HASH}) + set(${pkg_name}_DIRPATH ${${pkg_name}_BASE_DIR} CACHE STRING INTERNAL) + + if(EXISTS ${${pkg_name}_BASE_DIR}/options.txt AND PKG_HEAD_ONLY) + set(${pkg_name}_INC ${${pkg_name}_BASE_DIR}/${PKG_HEAD_ONLY} PARENT_SCOPE) + add_library(${pkg_name} INTERFACE) + target_include_directories(${pkg_name} INTERFACE ${${pkg_name}_INC}) + if(${PKG_RELEASE}) + __find_pkg_then_add_target(${pkg_name} ${PKG_EXE} ${PKG_LIB_PATH} + SUFFIXES_PATH ${PKG_LIB_SUFFIXES_PATH} + NAMES ${PKG_LIBS}) + endif() + return() + endif() + + set(${__FIND_PKG_NAME}_ROOT ${${pkg_name}_BASE_DIR}) + set(${__FIND_PKG_NAME}_ROOT ${${pkg_name}_BASE_DIR} PARENT_SCOPE) + + if(PKG_LIBS) + __find_pkg_then_add_target(${pkg_name} ${PKG_EXE} ${PKG_LIB_PATH} + SUFFIXES_PATH ${PKG_LIB_SUFFIXES_PATH} + NAMES ${PKG_LIBS}) + if(${pkg_name}_LIBS) + set(${pkg_name}_INC ${${pkg_name}_BASE_DIR}/include PARENT_SCOPE) + message("Found libs: ${${pkg_name}_LIBS}") + return() + endif() + elseif(NOT PKG_HEAD_ONLY) + find_package(${__FIND_PKG_NAME} ${PKG_VER} PATHS ${${pkg_name}_BASE_DIR} ${MS_FIND_NO_DEFAULT_PATH}) + if(${__FIND_PKG_NAME}_FOUND) + set(${pkg_name}_INC ${${pkg_name}_BASE_DIR}/include PARENT_SCOPE) + message("Found pkg: ${__FIND_PKG_NAME}") + return() + endif() + endif() + + if(NOT PKG_DIR) + if(PKG_GIT_REPOSITORY) + __download_pkg_with_git(${pkg_name} ${PKG_GIT_REPOSITORY} ${PKG_GIT_TAG} ${PKG_SHA256}) + else() + if(PKG_CUSTOM_SUBMODULE_DOWNLOAD) + __download_pkg(${pkg_name} ${PKG_URL} ${PKG_SHA256} ${PKG_CUSTOM_SUBMODULE_DOWNLOAD}) + else() + __download_pkg(${pkg_name} ${PKG_URL} ${PKG_SHA256}) + endif() + endif() + foreach(_SUBMODULE_FILE ${PKG_SUBMODULES}) + STRING(REGEX REPLACE "(.+)_(.+)" "\\1" _SUBMODEPATH ${_SUBMODULE_FILE}) + STRING(REGEX REPLACE "(.+)/(.+)" "\\2" _SUBMODENAME ${_SUBMODEPATH}) + file(GLOB ${pkg_name}_INSTALL_SUBMODULE ${_SUBMODULE_FILE}/*) + file(COPY ${${pkg_name}_INSTALL_SUBMODULE} DESTINATION ${${pkg_name}_SOURCE_DIR}/3rdparty/${_SUBMODENAME}) + endforeach() + else() + set(${pkg_name}_SOURCE_DIR ${PKG_DIR}) + endif() + file(WRITE ${${pkg_name}_BASE_DIR}/options.txt ${${pkg_name}_CONFIG_TXT}) + message("${pkg_name}_SOURCE_DIR : ${${pkg_name}_SOURCE_DIR}") + + foreach(_PATCH_FILE ${PKG_PATCHES}) + get_filename_component(_PATCH_FILE_NAME ${_PATCH_FILE} NAME) + + # convert line-endings of patch file to UNIX LF + set(_LF_PATCH_FILE ${CMAKE_BINARY_DIR}/_mrt_patch/${_PATCH_FILE_NAME}) + configure_file(${_PATCH_FILE} ${_LF_PATCH_FILE} NEWLINE_STYLE LF @ONLY) + + # convert line-endings of source file to be patched to UNIX LF + file(READ ${_LF_PATCH_FILE} _LF_PATCH_CONTENT) + string(REGEX MATCHALL "diff --git a/[/A-Za-z0-9\.\-_]*" _PATCH_SOURCE_LIST "${_LF_PATCH_CONTENT}") + list(TRANSFORM _PATCH_SOURCE_LIST REPLACE "diff --git a/" "") # strip prefix of file path + + foreach(_PATCH_SOURCE ${_PATCH_SOURCE_LIST}) + if(EXISTS ${${pkg_name}_SOURCE_DIR}/${_PATCH_SOURCE}) + execute_process(COMMAND bash -c "sed -i \'s@\\r@@g\' ${${pkg_name}_SOURCE_DIR}/${_PATCH_SOURCE}" + COMMAND_ECHO STDOUT) + endif() + endforeach() + + # apply patch + message("patching ${${pkg_name}_SOURCE_DIR} -p1 < ${_LF_PATCH_FILE}") + execute_process(COMMAND ${Patch_EXECUTABLE} -p1 INPUT_FILE ${_LF_PATCH_FILE} + WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR} + RESULT_VARIABLE Result) + if(NOT Result EQUAL "0") + message(FATAL_ERROR "Failed patch: ${_LF_PATCH_FILE}") + endif() + endforeach() + foreach(_SOURCE_DIR ${PKG_SOURCEMODULES}) + file(GLOB ${pkg_name}_INSTALL_SOURCE ${${pkg_name}_SOURCE_DIR}/${_SOURCE_DIR}/*) + file(COPY ${${pkg_name}_INSTALL_SOURCE} DESTINATION ${${pkg_name}_BASE_DIR}/${_SOURCE_DIR}/) + endforeach() + file(LOCK ${${pkg_name}_BASE_DIR} DIRECTORY GUARD FUNCTION RESULT_VARIABLE ${pkg_name}_LOCK_RET TIMEOUT 600) + if(NOT ${pkg_name}_LOCK_RET EQUAL "0") + message(FATAL_ERROR "error! when try lock ${${pkg_name}_BASE_DIR} : ${${pkg_name}_LOCK_RET}") + endif() + + if(PKG_CUSTOM_CMAKE) + file(GLOB ${pkg_name}_cmake ${PKG_CUSTOM_CMAKE}/CMakeLists.txt) + file(COPY ${${pkg_name}_cmake} DESTINATION ${${pkg_name}_SOURCE_DIR}) + endif() + + if(${pkg_name}_SOURCE_DIR) + if(PKG_HEAD_ONLY) + file(GLOB ${pkg_name}_SOURCE_SUBDIRS ${${pkg_name}_SOURCE_DIR}/*) + file(COPY ${${pkg_name}_SOURCE_SUBDIRS} DESTINATION ${${pkg_name}_BASE_DIR}) + set(${pkg_name}_INC ${${pkg_name}_BASE_DIR}/${PKG_HEAD_ONLY} PARENT_SCOPE) + if(NOT PKG_RELEASE) + add_library(${pkg_name} INTERFACE) + target_include_directories(${pkg_name} INTERFACE ${${pkg_name}_INC}) + endif() + + elseif(PKG_ONLY_MAKE) + __exec_cmd(COMMAND ${CMAKE_MAKE_PROGRAM} ${${pkg_name}_CXXFLAGS} -j${THNUM} + WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}) + set(PKG_INSTALL_INCS ${PKG_ONLY_MAKE_INCS}) + set(PKG_INSTALL_LIBS ${PKG_ONLY_MAKE_LIBS}) + file(GLOB ${pkg_name}_INSTALL_INCS ${${pkg_name}_SOURCE_DIR}/${PKG_INSTALL_INCS}) + file(GLOB ${pkg_name}_INSTALL_LIBS ${${pkg_name}_SOURCE_DIR}/${PKG_INSTALL_LIBS}) + file(COPY ${${pkg_name}_INSTALL_INCS} DESTINATION ${${pkg_name}_BASE_DIR}/include) + file(COPY ${${pkg_name}_INSTALL_LIBS} DESTINATION ${${pkg_name}_BASE_DIR}/lib) + + elseif(PKG_CMAKE_OPTION) + # in cmake + file(MAKE_DIRECTORY ${${pkg_name}_SOURCE_DIR}/_build) + if(${pkg_name}_CFLAGS) + set(${pkg_name}_CMAKE_CFLAGS "-DCMAKE_C_FLAGS=${${pkg_name}_CFLAGS}") + endif() + if(${pkg_name}_CXXFLAGS) + set(${pkg_name}_CMAKE_CXXFLAGS "-DCMAKE_CXX_FLAGS=${${pkg_name}_CXXFLAGS}") + endif() + + if(${pkg_name}_LDFLAGS) + if(${pkg_name}_USE_STATIC_LIBS) + #set(${pkg_name}_CMAKE_LDFLAGS "-DCMAKE_STATIC_LINKER_FLAGS=${${pkg_name}_LDFLAGS}") + else() + set(${pkg_name}_CMAKE_LDFLAGS "-DCMAKE_SHARED_LINKER_FLAGS=${${pkg_name}_LDFLAGS}") + endif() + endif() + if(APPLE) + __exec_cmd(COMMAND ${CMAKE_COMMAND} -DCMAKE_CXX_COMPILER_ARG1=${CMAKE_CXX_COMPILER_ARG1} + -DCMAKE_C_COMPILER_ARG1=${CMAKE_C_COMPILER_ARG1} ${PKG_CMAKE_OPTION} + ${${pkg_name}_CMAKE_CFLAGS} ${${pkg_name}_CMAKE_CXXFLAGS} ${${pkg_name}_CMAKE_LDFLAGS} + -DCMAKE_INSTALL_PREFIX=${${pkg_name}_BASE_DIR} ${${pkg_name}_SOURCE_DIR}/${PKG_CMAKE_PATH} + WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}/_build) + __exec_cmd(COMMAND ${CMAKE_COMMAND} --build . --target install -- + WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}/_build) + else() + __exec_cmd(COMMAND ${CMAKE_COMMAND} -DCMAKE_CXX_COMPILER_ARG1=${CMAKE_CXX_COMPILER_ARG1} + -DCMAKE_C_COMPILER_ARG1=${CMAKE_C_COMPILER_ARG1} ${PKG_CMAKE_OPTION} -G ${CMAKE_GENERATOR} + ${${pkg_name}_CMAKE_CFLAGS} ${${pkg_name}_CMAKE_CXXFLAGS} ${${pkg_name}_CMAKE_LDFLAGS} + -DCMAKE_INSTALL_PREFIX=${${pkg_name}_BASE_DIR} ${${pkg_name}_SOURCE_DIR}/${PKG_CMAKE_PATH} + WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}/_build) + if(MSVC) + set(CONFIG_TYPE Release) + if(DEBUG_MODE) + set(CONFIG_TYPE Debug) + endif() + __exec_cmd(COMMAND ${CMAKE_COMMAND} --build . --config ${CONFIG_TYPE} --target install -- + WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}/_build) + else() + __exec_cmd(COMMAND ${CMAKE_COMMAND} --build . --target install -- -j${THNUM} + WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}/_build) + endif() + endif() + else() + if(${pkg_name}_CFLAGS) + set(${pkg_name}_MAKE_CFLAGS "CFLAGS=${${pkg_name}_CFLAGS}") + endif() + if(${pkg_name}_CXXFLAGS) + set(${pkg_name}_MAKE_CXXFLAGS "CXXFLAGS=${${pkg_name}_CXXFLAGS}") + endif() + if(${pkg_name}_LDFLAGS) + set(${pkg_name}_MAKE_LDFLAGS "LDFLAGS=${${pkg_name}_LDFLAGS}") + endif() + # in configure && make + if(PKG_PRE_CONFIGURE_COMMAND) + __exec_cmd(COMMAND ${PKG_PRE_CONFIGURE_COMMAND} + WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}) + endif() + + if(PKG_CONFIGURE_COMMAND) + __exec_cmd(COMMAND ${PKG_CONFIGURE_COMMAND} + ${${pkg_name}_MAKE_CFLAGS} ${${pkg_name}_MAKE_CXXFLAGS} ${${pkg_name}_MAKE_LDFLAGS} + --prefix=${${pkg_name}_BASE_DIR} + WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}) + endif() + set(${pkg_name}_BUILD_OPTION ${PKG_BUILD_OPTION}) + if(NOT PKG_CONFIGURE_COMMAND) + set(${pkg_name}_BUILD_OPTION ${${pkg_name}_BUILD_OPTION} + ${${pkg_name}_MAKE_CFLAGS} ${${pkg_name}_MAKE_CXXFLAGS} ${${pkg_name}_MAKE_LDFLAGS}) + endif() + # build + if(APPLE) + __exec_cmd(COMMAND ${CMAKE_MAKE_PROGRAM} ${${pkg_name}_BUILD_OPTION} + WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}) + else() + __exec_cmd(COMMAND ${CMAKE_MAKE_PROGRAM} ${${pkg_name}_BUILD_OPTION} -j${THNUM} + WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}) + endif() + + if(PKG_INSTALL_INCS OR PKG_INSTALL_LIBS) + file(GLOB ${pkg_name}_INSTALL_INCS ${${pkg_name}_SOURCE_DIR}/${PKG_INSTALL_INCS}) + file(GLOB ${pkg_name}_INSTALL_LIBS ${${pkg_name}_SOURCE_DIR}/${PKG_INSTALL_LIBS}) + file(COPY ${${pkg_name}_INSTALL_INCS} DESTINATION ${${pkg_name}_BASE_DIR}/include) + file(COPY ${${pkg_name}_INSTALL_LIBS} DESTINATION ${${pkg_name}_BASE_DIR}/lib) + else() + __exec_cmd(COMMAND ${CMAKE_MAKE_PROGRAM} install WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}) + endif() + endif() + endif() + + if(PKG_LIBS) + __find_pkg_then_add_target(${pkg_name} ${PKG_EXE} ${PKG_LIB_PATH} + SUFFIXES_PATH ${PKG_LIB_SUFFIXES_PATH} + NAMES ${PKG_LIBS}) + set(${pkg_name}_INC ${${pkg_name}_BASE_DIR}/include PARENT_SCOPE) + if(NOT ${pkg_name}_LIBS) + message(FATAL_ERROR "Can not find pkg: ${pkg_name}") + endif() + else() + find_package(${__FIND_PKG_NAME} ${PKG_VER} QUIET ${MS_FIND_NO_DEFAULT_PATH}) + if(${__FIND_PKG_NAME}_FOUND) + set(${pkg_name}_INC ${${pkg_name}_BASE_DIR}/include PARENT_SCOPE) + message("Found pkg: ${${__FIND_PKG_NAME}_LIBRARIES}") + return() + endif() + endif() +endfunction() diff --git a/inferrt/src/CMakeLists.txt b/inferrt/src/CMakeLists.txt index ddfc23311bd8b222a6299033328120dbbbe76d67..05d5d2bd4d603ccc0dd4b6813c1ef55565e7ee7a 100644 --- a/inferrt/src/CMakeLists.txt +++ b/inferrt/src/CMakeLists.txt @@ -33,6 +33,7 @@ include_directories(${PROJECT_SOURCE_DIR}/../include) include_directories(${PROJECT_SOURCE_DIR}) # Add compiler, ir, runtime and kernels directories +add_subdirectory(common) add_subdirectory(hardware) add_subdirectory(ir) add_subdirectory(lang) @@ -41,7 +42,7 @@ add_subdirectory(optimize) add_subdirectory(pybind) add_subdirectory(runtime) -set(OBJECTS hardware_abstract lexer_obj parser_obj ir_obj compiler_obj vm_obj runtime_obj ops_obj pass_obj mrt_ir_obj) +set(OBJECTS mrt_common hardware_abstract lexer_obj parser_obj ir_obj compiler_obj vm_obj runtime_obj ops_obj pass_obj mrt_ir_obj) # Create da execution file add_executable(da lang/cli/main.cc lang/cli/options.cc) diff --git a/inferrt/src/common/CMakeLists.txt b/inferrt/src/common/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..91700303ccf5bf2f63ab4bb0eecf6935936c0b9b --- /dev/null +++ b/inferrt/src/common/CMakeLists.txt @@ -0,0 +1,5 @@ +check_debug_log_out() + +file(GLOB_RECURSE COMMON_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") +add_library(mrt_common SHARED ${COMMON_SRC_FILES}) +target_link_libraries(mrt_common dl) diff --git a/inferrt/src/common/dynamic_lib_loader.cc b/inferrt/src/common/dynamic_lib_loader.cc new file mode 100644 index 0000000000000000000000000000000000000000..c471a02921f02db405c68f8080fc261666a0a02c --- /dev/null +++ b/inferrt/src/common/dynamic_lib_loader.cc @@ -0,0 +1,117 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _WIN32 +#include +#else +#include +#endif +#include +#include +#include + +#include "common/common.h" +#include "common/dynamic_lib_loader.h" + +namespace mrt { +namespace common { +namespace { +std::string GetErrorMsg() { +#ifndef _WIN32 + const char *result = dlerror(); + return (result == nullptr) ? "Unknown" : result; +#else + return std::to_string(GetLastError()); +#endif +} +} // namespace + +DynamicLibLoader::~DynamicLibLoader() { + for (const auto &[dlName, handle] : allHandles_) { + if (dlclose(handle) != 0) { + LOG_ERROR << "Closing dynamic lib: " << dlName << "failed, error message: " << GetErrorMsg(); + } + LOG_OUT << "Close dynamic library: " << dlName << " successfully."; + } +} + +std::string DynamicLibLoader::GetFilePathFromDlInfo() { + Dl_info dlInfo; + if (dladdr(reinterpret_cast(DynamicLibLoader::GetFilePathFromDlInfo), &dlInfo) == 0) { + LOG_ERROR << "Get file path by dladdr failed"; + return ""; + } + std::string curSoPath = dlInfo.dli_fname; + LOG_OUT << "Current so path : " << curSoPath; + + auto lastSlashPos = curSoPath.find_last_of("/"); + if (curSoPath.empty() || lastSlashPos == std::string::npos) { + LOG_ERROR << "Current so path empty or the path [" << curSoPath << "] is invalid."; + return ""; + } + // During project build, place the current shared library (libmrt_common.so) and various plugins in + // the same directory. + auto dynamicLibPath = curSoPath.substr(0, lastSlashPos); + LOG_OUT << "Current so dir path : " << dynamicLibPath; + if (dynamicLibPath.size() >= PATH_MAX) { + LOG_ERROR << "Current path [" << dynamicLibPath << "] is invalid."; + return ""; + } + char realPathMem[PATH_MAX] = {0}; + if (realpath(dynamicLibPath.c_str(), realPathMem) == nullptr) { + LOG_ERROR << "Dynamic library path is invalid: [" << dynamicLibPath << "], skip!"; + return ""; + } + return std::string(realPathMem); +} + +bool DynamicLibLoader::LoadDynamicLib(const std::string &dlName, std::stringstream *errMsg) { + CHECK_IF_NULL(errMsg); + if (dlName.empty()) { + LOG_ERROR << "Dynamic library name is empty"; + *errMsg << "Dynamic library name is empty" << std::endl; + return false; + } + if (allHandles_.find(dlName) != allHandles_.end()) { + LOG_OUT << "Dynamic library: " << dlName << " already loaded"; + return true; + } + void *handle = dlopen((filePath_ + "/" + dlName).c_str(), RTLD_LAZY | RTLD_LOCAL); + if (handle == nullptr) { + std::string errMsgStr = GetErrorMsg(); + LOG_ERROR << "Load dynamic library: " << dlName << " failed. " << errMsgStr; + *errMsg << "Load dynamic library: " << dlName << " failed. " << errMsgStr << std::endl; + return false; + } + allHandles_[dlName] = handle; + LOG_OUT << "Load dynamic library: " << dlName << " successfully."; + return true; +} + +void DynamicLibLoader::CloseDynamicLib(const std::string &dlName) { + if (allHandles_.find(dlName) == allHandles_.end()) { + LOG_OUT << "Dynamic library: " << dlName << " not found"; + return; + } + if (dlclose(allHandles_[dlName]) != 0) { + LOG_ERROR << "Closing dynamic lib: " << dlName << "failed, error message: " << GetErrorMsg(); + } + allHandles_.erase(dlName); + LOG_OUT << "Close dynamic library: " << dlName << " successfully."; +} + +} // namespace common +} // namespace mrt diff --git a/inferrt/src/common/dynamic_lib_loader.h b/inferrt/src/common/dynamic_lib_loader.h new file mode 100644 index 0000000000000000000000000000000000000000..7d0973b7f66cca8d9b8fcd130e917ffca8e10d51 --- /dev/null +++ b/inferrt/src/common/dynamic_lib_loader.h @@ -0,0 +1,52 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __COMMON_DYNAMIC_LIB_LOADER_H__ +#define __COMMON_DYNAMIC_LIB_LOADER_H__ + +#include +#include +#include "common/common.h" +#include "common/visible.h" + +namespace mrt { +namespace common { +class DA_API DynamicLibLoader { + public: + DynamicLibLoader() { + filePath_ = GetFilePathFromDlInfo(); + if (filePath_.empty()) { + LOG_ERROR << "Get dynamic library file path by dladdr failed"; + } + } + DynamicLibLoader(const std::string &&filePath) : filePath_(std::move(filePath)) {} + ~DynamicLibLoader(); + + bool LoadDynamicLib(const std::string &dlName, std::stringstream *errMsg); + void CloseDynamicLib(const std::string &dlName); + + const std::string &GetDynamicLibFilePath() const { return filePath_; } + + private: + DISABLE_COPY_AND_ASSIGN(DynamicLibLoader) + static std::string GetFilePathFromDlInfo(); + std::map allHandles_; + std::string filePath_; +}; +} // namespace common +} // namespace mrt + +#endif // __COMMON_DYNAMIC_LIB_LOADER_H__ diff --git a/inferrt/src/hardware/ascend/res_manager/symbol_interface/symbol_utils.h b/inferrt/src/hardware/ascend/res_manager/symbol_interface/symbol_utils.h index 6674ee93f387dec4bb0d5d29ee8a20f09019cd2c..494a5bf1daa583bd56aad50b197bab328ae4c16e 100644 --- a/inferrt/src/hardware/ascend/res_manager/symbol_interface/symbol_utils.h +++ b/inferrt/src/hardware/ascend/res_manager/symbol_interface/symbol_utils.h @@ -74,7 +74,7 @@ namespace mrt::device::ascend { #define HAS_ASCEND_API(funcName) HasAscendApi(mrt::device::ascend::funcName##_) -std::string GetAscendPath(); +DA_API std::string GetAscendPath(); void *GetLibHandler(const std::string &libPath, bool ifGlobal = false); void LoadAscendApiSymbols(); void LoadSimulationApiSymbols(); diff --git a/inferrt/src/hardware/hardware_abstract/CMakeLists.txt b/inferrt/src/hardware/hardware_abstract/CMakeLists.txt index 4bb93ea35adb9e614003489de983dc42baa68982..2cd6053b6e91baaeefffd85460bfa6b3f9d05354 100644 --- a/inferrt/src/hardware/hardware_abstract/CMakeLists.txt +++ b/inferrt/src/hardware/hardware_abstract/CMakeLists.txt @@ -2,4 +2,4 @@ check_debug_log_out() file(GLOB_RECURSE HARDWARE_ABSTRACT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") add_library(hardware_abstract SHARED ${HARDWARE_ABSTRACT_SRC_FILES}) -target_link_libraries(hardware_abstract dl) +target_link_libraries(hardware_abstract mrt_common dl) diff --git a/inferrt/src/hardware/hardware_abstract/device_context_manager.cc b/inferrt/src/hardware/hardware_abstract/device_context_manager.cc index 7433da671e0f501999c9fe51cc0dd88a19242455..e0fd083d8391febd38dd8efa3a423c32b523bfa6 100644 --- a/inferrt/src/hardware/hardware_abstract/device_context_manager.cc +++ b/inferrt/src/hardware/hardware_abstract/device_context_manager.cc @@ -53,67 +53,7 @@ constexpr bool kIsWindowsPlatform = false; #endif } // namespace namespace device { -bool PluginLoader::LoadDynamicLib(const std::string &pluginFile, std::map *allHandles, - std::stringstream *errMsg) { - CHECK_IF_NULL(allHandles); - CHECK_IF_NULL(errMsg); - auto soName = GetDynamicLibName(pluginFile); - void *handle = dlopen(pluginFile.c_str(), RTLD_LAZY | RTLD_LOCAL); - if (handle == nullptr) { - std::string errMsgStr = GetDlErrorMsg(); - LOG_OUT << "Load dynamic library: " << soName << " failed. " << errMsgStr; - *errMsg << "Load dynamic library: " << soName << " failed. " << errMsgStr << std::endl; - return false; - } - (*allHandles)[soName] = handle; - return true; -} - -void PluginLoader::CloseDynamicLib(const std::string &dlName, void *handle) { - if (dlclose(handle) != 0) { - LOG_ERROR << "Closing dynamic lib: " << dlName << "failed, error message: " << GetDlErrorMsg(); - } -} - -bool PluginLoader::GetPluginPath(std::string *filePath) { - CHECK_IF_NULL(filePath); - Dl_info dlInfo; - if (dladdr(reinterpret_cast(PluginLoader::GetPluginPath), &dlInfo) == 0) { - LOG_ERROR << "Get file path by dladdr failed"; - return false; - } - std::string curSoPath = dlInfo.dli_fname; - LOG_OUT << "Current so path : " << curSoPath; - - auto lastSlashPos = curSoPath.find_last_of("/"); - if (curSoPath.empty() || lastSlashPos == std::string::npos) { - LOG_ERROR << "Current so path empty or the path [" << curSoPath << "] is invalid."; - return false; - } - // During project build, place the current shared library (libhardware_abstract.so) and various hardware plugins in - // the same directory. - auto pluginSoPath = curSoPath.substr(0, lastSlashPos); - LOG_OUT << "Current plugin so dir path : " << pluginSoPath; - if (pluginSoPath.size() >= PATH_MAX) { - LOG_ERROR << "Current path [" << pluginSoPath << "] is invalid."; - return false; - } - char realPathMem[PATH_MAX] = {0}; - if (realpath(pluginSoPath.c_str(), realPathMem) == nullptr) { - LOG_ERROR << "Plugin path is invalid: [" << pluginSoPath << "], skip!"; - return false; - } - *filePath = std::string(realPathMem); - return true; -} - -std::string PluginLoader::GetDynamicLibName(const std::string &pluginFile) { - auto p1 = pluginFile.find_last_of("/") + 1; - auto targetSo = pluginFile.substr(p1); - return targetSo; -} - -DeviceContextManager::~DeviceContextManager() { UnloadPlugin(); } +DeviceContextManager::~DeviceContextManager() { Clear(); } DeviceContextManager &DeviceContextManager::GetInstance() { static DeviceContextManager instance{}; @@ -251,22 +191,17 @@ void DeviceContextManager::LoadPlugin() { return; } loadInit_ = true; - if (pluginPath_.empty() && !PluginLoader::GetPluginPath(&pluginPath_)) { - LOG_ERROR << "Plugin path is invalid, skip!"; - dlopenErrorMsg_ << "Plugin path is invalid, skip!" << std::endl; - return; - } - DIR *dir = opendir(pluginPath_.c_str()); + DIR *dir = opendir(dynamicLibLoader_.GetDynamicLibFilePath().c_str()); if (dir == nullptr) { - LOG_ERROR << "Open plugin dir failed, plugin path:" << pluginPath_; - dlopenErrorMsg_ << "Open plugin dir failed, plugin path:" << pluginPath_ << std::endl; + LOG_ERROR << "Open plugin dir failed, plugin path:" << dynamicLibLoader_.GetDynamicLibFilePath(); + dlopenErrorMsg_ << "Open plugin dir failed, plugin path:" << dynamicLibLoader_.GetDynamicLibFilePath() << std::endl; return; } struct dirent *entry; - std::map > multiVersionPluginMap; // key: plugin name, value: so file name + std::set pluginFiles; while ((entry = readdir(dir)) != nullptr) { - auto pluginFile = pluginPath_ + "/" + entry->d_name; + std::string pluginFile = entry->d_name; constexpr auto pluginPrefix = "libhardware_"; if (pluginFile.find(pluginPrefix) == std::string::npos) { continue; @@ -274,43 +209,26 @@ void DeviceContextManager::LoadPlugin() { if (pluginFile.find("libhardware_abstract") != std::string::npos) { continue; } - std::string fileName = entry->d_name; - auto dot = fileName.find_first_of("."); - if (dot == std::string::npos) { + if (pluginFile.find_first_of(".") == std::string::npos) { continue; } - (void)multiVersionPluginMap[fileName.substr(0, dot)].insert(pluginFile); + pluginFiles.insert(pluginFile); } - for (const auto &[pluginName, fileNames] : multiVersionPluginMap) { - for (auto iter = fileNames.rbegin(); iter != fileNames.rend(); iter++) { - const auto &fileName = *iter; - auto ret = PluginLoader::LoadDynamicLib(fileName, &pluginMaps_, &dlopenErrorMsg_); - if (ret) { - LOG_OUT << "Load " << pluginName << " plugin file " << fileName << " successfully."; - } else { - LOG_ERROR << "Load " << pluginName << " plugin file " << fileName << " failed."; - } + for (const auto &targetPluginFile : pluginFiles) { + if (!dynamicLibLoader_.LoadDynamicLib(targetPluginFile, &dlopenErrorMsg_)) { + LOG_ERROR << "Load " << targetPluginFile << " plugin file failed, error message: " << dlopenErrorMsg_.str(); } } + (void)closedir(dir); } -void DeviceContextManager::UnloadPlugin() { +void DeviceContextManager::Clear() { backendToDeviceContext_.clear(); deviceContexts_.clear(); deviceContextCreators_.clear(); multiStreamControllers_.clear(); - - if (pluginMaps_.empty()) { - return; - } - auto iter = pluginMaps_.begin(); - while (iter != pluginMaps_.end()) { - PluginLoader::CloseDynamicLib(iter->first, iter->second); - (void)iter++; - } - pluginMaps_.clear(); } } // namespace device diff --git a/inferrt/src/hardware/hardware_abstract/device_context_manager.h b/inferrt/src/hardware/hardware_abstract/device_context_manager.h index 7d9904345aa320f079de98d65ea2682f62f04416..9c9a6c9f5803d31a94f2d50b0a32e3198f1dec45 100644 --- a/inferrt/src/hardware/hardware_abstract/device_context_manager.h +++ b/inferrt/src/hardware/hardware_abstract/device_context_manager.h @@ -28,6 +28,7 @@ #include #include "hardware/hardware_abstract/device_context.h" #include "common/visible.h" +#include "common/dynamic_lib_loader.h" namespace mrt { namespace device { @@ -35,17 +36,6 @@ class MultiStreamController; using DeviceContextCreator = std::function(const DeviceContextKey &)>; using MultiStreamControllerPtr = std::shared_ptr; -class PluginLoader { - public: - static bool LoadDynamicLib(const std::string &pluginFile, std::map *allHandles, - std::stringstream *errMsg); - static void CloseDynamicLib(const std::string &dlName, void *handle); - static bool GetPluginPath(std::string *filePath); - - private: - static std::string GetDynamicLibName(const std::string &pluginFile); -}; - class MRT_EXPORT DeviceContextManager { public: static DeviceContextManager &GetInstance(); @@ -67,11 +57,10 @@ class MRT_EXPORT DeviceContextManager { private: DeviceContextManager() = default; void LoadPlugin(); - void UnloadPlugin(); + void Clear(); - std::map pluginMaps_; + common::DynamicLibLoader dynamicLibLoader_; bool loadInit_; - std::string pluginPath_; // The string converted from DeviceContextKey -> DeviceContextPtr. std::map deviceContexts_; diff --git a/inferrt/src/ir/CMakeLists.txt b/inferrt/src/ir/CMakeLists.txt index 2266efb5ec7a55cc4e9d6dc3ad818c947cc89c1d..8dffd7140567941528c524d7d5a8978ceb84bf3c 100644 --- a/inferrt/src/ir/CMakeLists.txt +++ b/inferrt/src/ir/CMakeLists.txt @@ -2,3 +2,4 @@ check_debug_log_out() file(GLOB_RECURSE IR_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") add_library(mrt_ir_obj STATIC ${IR_SRC_FILES}) +target_link_libraries(mrt_ir_obj hardware_abstract) diff --git a/inferrt/src/ir/common/intrusive_ptr.h b/inferrt/src/ir/common/intrusive_ptr.h index 4a3d5dc1305e92ab496e6a39646c0d49c92ed98b..c7aece08e78848360045cda98b4be46244a72e23 100644 --- a/inferrt/src/ir/common/intrusive_ptr.h +++ b/inferrt/src/ir/common/intrusive_ptr.h @@ -20,6 +20,7 @@ #include #include #include +#include namespace mrt { namespace ir { @@ -102,6 +103,26 @@ class IntrusivePtr { */ IntrusivePtr(IntrusivePtr &&other) noexcept : ptr_(other.ptr_) { other.ptr_ = nullptr; } + /** + * @brief Conversion constructor from derived type. + * @tparam U The derived type that inherits from T. + * @param other The IntrusivePtr of the derived type. + */ + template >> + IntrusivePtr(const IntrusivePtr &other) : ptr_(other.get()) { + if (ptr_) { + ptr_->AddRef(); + } + } + + /** + * @brief Move conversion constructor from derived type. + * @tparam U The derived type that inherits from T. + * @param other The IntrusivePtr of the derived type to move from. + */ + template >> + IntrusivePtr(IntrusivePtr &&other) noexcept : ptr_(other.Release()) {} + /** * @brief Destructor. Decrements the reference count. */ @@ -145,6 +166,43 @@ class IntrusivePtr { return *this; } + /** + * @brief Copy assignment operator from derived type. + * @tparam U The derived type that inherits from T. + * @param other The IntrusivePtr of the derived type to copy from. + * @return *this + */ + template >> + IntrusivePtr &operator=(const IntrusivePtr &other) { + if (ptr_ != other.get()) { + if (ptr_) { + ptr_->DecRef(); + } + ptr_ = other.get(); + if (ptr_) { + ptr_->AddRef(); + } + } + return *this; + } + + /** + * @brief Move assignment operator from derived type. + * @tparam U The derived type that inherits from T. + * @param other The IntrusivePtr of the derived type to move from. + * @return *this + */ + template >> + IntrusivePtr &operator=(IntrusivePtr &&other) noexcept { + if (ptr_ != other.get()) { + if (ptr_) { + ptr_->DecRef(); + } + ptr_ = other.Release(); + } + return *this; + } + /** * @brief Gets the raw pointer. * @return The managed pointer. diff --git a/inferrt/src/ir/tensor/format.h b/inferrt/src/ir/tensor/format.h new file mode 100644 index 0000000000000000000000000000000000000000..f928ffc2e2aa7534bdeb99e579ac6d12d1c9974a --- /dev/null +++ b/inferrt/src/ir/tensor/format.h @@ -0,0 +1,70 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __IR_TENSOR_FORMAT_H__ +#define __IR_TENSOR_FORMAT_H__ + +#include +#include +#include + +namespace mrt { +namespace ir { +enum MemoryFormat : int8_t { + DEFAULT_FORMAT = -1, + ND, // Nd Tensor + FRACTAL_NZ, + NC1HWC0, // NC1HWC0 + FRACTAL_Z, // FRACTAL_Z + NUM_OF_FORMAT +}; + +inline const std::vector &GetFormatNames() { + static std::vector names = { + "ND", + "FRACTAL_NZ", + "NC1HWC0", + "FRACTAL_Z", + }; + return names; +} + +inline std::string FormatEnumToString(MemoryFormat format) { + const auto &names = GetFormatNames(); + if (format == MemoryFormat::DEFAULT_FORMAT) { + return "DefaultFormat"; + } + if (format < MemoryFormat::ND || format >= MemoryFormat::NUM_OF_FORMAT) { + return ""; + } + return names[format]; +} + +inline MemoryFormat FormatFromStrToEnum(const std::string &formatStr) { + if (formatStr == "DefaultFormat") { + return MemoryFormat::DEFAULT_FORMAT; + } + const auto &names = GetFormatNames(); + for (size_t i = 0; i < names.size(); ++i) { + if (names[i] == formatStr) { + return static_cast(i); + } + } + return MemoryFormat::DEFAULT_FORMAT; +} +} // namespace ir +} // namespace mrt +#endif // __IR_TENSOR_FORMAT_H__ diff --git a/inferrt/src/ir/tensor/tensor.cc b/inferrt/src/ir/tensor/tensor.cc index 9dbae6b5fb5f8eb65e849c901ee4ff9f0b29d37c..674f61bdab2a93afb5aa9972c5d67db82d682315 100644 --- a/inferrt/src/ir/tensor/tensor.cc +++ b/inferrt/src/ir/tensor/tensor.cc @@ -18,6 +18,8 @@ #include #include #include +#include +#include #include "common/common.h" #include "ir/tensor/tensor.h" @@ -132,20 +134,31 @@ std::ostream &operator<<(std::ostream &os, const TensorPtr &tensor) { return os; } +std::string ShapeToString(const std::vector &shape) { + std::string str = "["; + const size_t count = shape.size(); + for (size_t i = 0; i < count; ++i) { + if (i > 0) { + str.append(", "); + } + str.append(std::to_string(shape[i])); + } + return str.append("]"); +} + std::ostream &operator<<(std::ostream &os, const Tensor &tensor) { constexpr size_t numelLimit = 30; - os << "Tensor(shape=["; + os << "Tensor(shape="; const auto &shape = tensor.Shape(); - for (size_t i = 0; i < shape.size(); ++i) { - os << shape[i]; - if (i < shape.size() - 1) { - os << ", "; - } - } - os << "], dtype=" << tensor.Dtype().ToString(); + os << ShapeToString(shape); + os << ", dtype=" << tensor.Dtype().ToString(); + os << ", device=[type=" << hardware::GetDeviceNameByType(tensor.GetDevice().type) + << ", index:" << int(tensor.GetDevice().index) << "]"; os << ", data=["; if (tensor.DataPtr()) { - if (tensor.HasDynamicShape()) { + if (tensor.GetDevice().type != hardware::DeviceType::CPU) { + os << "..."; + } else if (tensor.HasDynamicShape()) { os << "dynamic shape, not materialized"; } else if (tensor.Numel() > 0) { switch (tensor.Dtype()) { diff --git a/inferrt/src/ir/tensor/tensor.h b/inferrt/src/ir/tensor/tensor.h index 0853bb673186d61e79999e0c26f3c773afe3aca1..18c56f2fafc93b915f55082dd6bef05f04e213b2 100644 --- a/inferrt/src/ir/tensor/tensor.h +++ b/inferrt/src/ir/tensor/tensor.h @@ -25,6 +25,7 @@ #include "ir/common/dtype.h" #include "ir/common/intrusive_ptr.h" #include "ir/tensor/storage.h" +#include "ir/tensor/format.h" namespace mrt { namespace ir { @@ -84,6 +85,11 @@ class Tensor : public RefCounted { * @return A mutable reference to the vector of dimensions. */ std::vector &Shape() { return shape_; } + /** + * @brief Gets the memory format of the tensor. + * @return The memory format enum value. + */ + MemoryFormat Format() const { return memoryFormat_; } /** * @brief Gets the strides of the tensor. * @return A const reference to the vector of strides. @@ -114,6 +120,11 @@ class Tensor : public RefCounted { * @return The storage. */ const StoragePtr &GetStorage() const { return storage_; } + /** + * @brief Gets the storage offset of the tensor. + * @return The storage offset. + */ + int64_t StorageOffset() const { return storageOffset_; } /** * @brief Resizes the storage of the tensor. * Note: The shape and dtype must be set before resizing the storage. @@ -158,6 +169,11 @@ class Tensor : public RefCounted { * @param storage The new storage to set. */ void SetStorage(const StoragePtr &storage) { storage_ = storage; } + /** + * @brief Sets the memory format of the tensor. + * @param memoryFromat The memory format enum value. + */ + void SetFormat(MemoryFormat memoryFormat) { memoryFormat_ = memoryFormat; } private: /** @@ -165,19 +181,20 @@ class Tensor : public RefCounted { */ void ComputeStrides(); - DataType dtype_; ///< The data type of the elements. - std::vector shape_; ///< The dimensions of the tensor. - std::vector strides_; ///< The strides of the tensor. - int64_t numel_ = 0; ///< The total number of elements. - StoragePtr storage_{nullptr}; ///< The underlying storage. - int64_t storageOffset_ = 0; ///< The offset in the storage, in number of elements. + DataType dtype_; ///< The data type of the elements. + std::vector shape_; ///< The dimensions of the tensor. + std::vector strides_; ///< The strides of the tensor. + MemoryFormat memoryFormat_{MemoryFormat::DEFAULT_FORMAT}; ///< The memory format of the tensor. + int64_t numel_ = 0; ///< The total number of elements. + StoragePtr storage_{nullptr}; ///< The underlying storage. + int64_t storageOffset_ = 0; ///< The offset in the storage, in number of elements. }; using TensorPtr = IntrusivePtr; std::ostream &operator<<(std::ostream &os, const Tensor &tensor); std::ostream &operator<<(std::ostream &os, const TensorPtr &tensor); - +std::string ShapeToString(const std::vector &shape); } // namespace ir } // namespace mrt diff --git a/inferrt/src/ir/value/value.cc b/inferrt/src/ir/value/value.cc index ffa46301604520ce37161d1c35ea4d9508514c8c..5c63e0eb10438b7728e4a88cd95c7706d8dd41c1 100644 --- a/inferrt/src/ir/value/value.cc +++ b/inferrt/src/ir/value/value.cc @@ -49,6 +49,9 @@ Value::Value(Value &&other) noexcept : tag_(other.tag_) { case Tag::Tensor: new (&tensor_) TensorPtr(std::move(other.tensor_)); break; + case Tag::Float: + float_ = other.float_; + break; case Tag::Double: double_ = other.double_; break; @@ -86,6 +89,14 @@ const TensorPtr &Value::ToTensor() const { CHECK_TAG(Tag::Tensor); return tensor_; } +TensorPtr &Value::ToTensor() { + CHECK_TAG(Tag::Tensor); + return tensor_; +} +float Value::ToFloat() const { + CHECK_TAG(Tag::Float); + return float_; +} double Value::ToDouble() const { CHECK_TAG(Tag::Double); return double_; @@ -154,6 +165,9 @@ std::ostream &operator<<(std::ostream &os, const Value &value) { case Value::Tag::Tensor: os << value.ToTensor(); break; + case Value::Tag::Float: + os << value.ToFloat(); + break; case Value::Tag::Double: os << value.ToDouble(); break; diff --git a/inferrt/src/ir/value/value.h b/inferrt/src/ir/value/value.h index de85fb8c88e45832c206c6007ace82b53f54fbdf..c2d96123c094ddf64b61b12c1a32d134f0e712fe 100644 --- a/inferrt/src/ir/value/value.h +++ b/inferrt/src/ir/value/value.h @@ -148,6 +148,7 @@ class Value : public RefCounted { /** @name Type checkers */ ///@{ bool IsTensor() const { return tag_ == Tag::Tensor; } + bool IsFloat() const {return tag_ == Tag::Float;} bool IsDouble() const { return tag_ == Tag::Double; } bool IsInt() const { return tag_ == Tag::Int; } bool IsBool() const { return tag_ == Tag::Bool; } @@ -162,6 +163,8 @@ class Value : public RefCounted { */ ///@{ const TensorPtr &ToTensor() const; + TensorPtr &ToTensor(); + float ToFloat() const; double ToDouble() const; int64_t ToInt() const; bool ToBool() const; @@ -181,11 +184,12 @@ class Value : public RefCounted { /** * @brief Enumeration of the possible types a Value can hold. */ - enum class Tag { None, Tensor, Double, Int, Bool, String, Tuple }; + enum class Tag { None, Tensor, Float, Double, Int, Bool, String, Tuple }; const Tag tag_; ///< The tag indicating the type of the value. union { TensorPtr tensor_; + float float_; double double_; int64_t int_; bool bool_; diff --git a/inferrt/src/ops/CMakeLists.txt b/inferrt/src/ops/CMakeLists.txt index 1a8e5a93654d990633a954c5bd5a28d16c285095..561cd383b8ad0365bc1216f9486328288391b820 100644 --- a/inferrt/src/ops/CMakeLists.txt +++ b/inferrt/src/ops/CMakeLists.txt @@ -3,7 +3,7 @@ check_debug_log_out() add_subdirectory(op_def) add_library(kernel SHARED kernel_lib.cc op_register.cc) -target_link_libraries(kernel PRIVATE mrt_ir_obj ${CMAKE_DL_LIBS}) +target_link_libraries(kernel PRIVATE mrt_common mrt_ir_obj ${CMAKE_DL_LIBS}) add_subdirectory(dummy) add_subdirectory(op_base) @@ -11,3 +11,7 @@ add_subdirectory(op_base) if(ENABLE_TORCH_FRONT) add_subdirectory(cpu/aten) endif() + +if(ENABLE_ASCEND) + add_subdirectory(ascend) +endif() diff --git a/inferrt/src/ops/ascend/CMakeLists.txt b/inferrt/src/ops/ascend/CMakeLists.txt index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..3d0115d363bf5b07e3418c355b15c313ca6de2a6 100644 --- a/inferrt/src/ops/ascend/CMakeLists.txt +++ b/inferrt/src/ops/ascend/CMakeLists.txt @@ -0,0 +1,11 @@ +if(DEFINED ENV{ASCEND_CUSTOM_PATH}) + set(ASCEND_PATH $ENV{ASCEND_CUSTOM_PATH}) +else() + set(ASCEND_PATH /usr/local/Ascend) +endif() + +include_directories(${ASCEND_PATH}/latest/include/) +include_directories(${ASCEND_PATH}/latest/lib64/) +link_directories(${ASCEND_PATH}/latest/lib64/) + +add_subdirectory(aclnn) diff --git a/inferrt/src/ops/ascend/aclnn/CMakeLists.txt b/inferrt/src/ops/ascend/aclnn/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..30e50f89c0acf8b8b50a3df46270d8aa81e08547 --- /dev/null +++ b/inferrt/src/ops/ascend/aclnn/CMakeLists.txt @@ -0,0 +1,6 @@ +check_debug_log_out() + +file(GLOB_RECURSE OPS_ACLNN_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") + +add_library(ops_ascend_aclnn SHARED ${OPS_ACLNN_SRC_FILES}) +target_link_libraries(ops_ascend_aclnn PRIVATE ops_base_obj hardware_ascend kernel mrt::securec) diff --git a/inferrt/src/ops/ascend/aclnn/aclnn_mul.cc b/inferrt/src/ops/ascend/aclnn/aclnn_mul.cc new file mode 100644 index 0000000000000000000000000000000000000000..fb149ead8453e731b277de9cfcd66b25f69551c3 --- /dev/null +++ b/inferrt/src/ops/ascend/aclnn/aclnn_mul.cc @@ -0,0 +1,42 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "ops/ascend/aclnn/aclnn_mul.h" +#include "ops/op_register.h" + +namespace mrt { +namespace ops { +OpsErrorCode AclnnMul::CalcWorkspace(const std::vector &input, const ir::Value *output, + size_t *workspaceSize) { + LOG_OUT << "Begin CalcWorkspace for op [Mul]"; + executor_->GetWorkspaceSize(static_cast(workspaceSize), this, input[kIndex0]->ToTensor(), + input[kIndex1]->ToTensor(), output->ToTensor()); + return SUCCESS; +} + +OpsErrorCode AclnnMul::Launch(const std::vector &input, void *workspace, size_t workspaceSize, + ir::Value *output, void *stream) { + LOG_OUT << "Begin Launch op [Mul]"; + executor_->Launch(workspace, workspaceSize, stream, input[kIndex0]->ToTensor(), input[kIndex1]->ToTensor(), + output->ToTensor()); + return SUCCESS; +} + +MRT_REG_OP(mul, AclnnMul, Ascend); +} // namespace ops +} // namespace mrt diff --git a/inferrt/src/ops/cpu/aten/test_aten.h b/inferrt/src/ops/ascend/aclnn/aclnn_mul.h similarity index 44% rename from inferrt/src/ops/cpu/aten/test_aten.h rename to inferrt/src/ops/ascend/aclnn/aclnn_mul.h index 85cfa5ab245fd36db75c9a119380057de80e50d1..49f64f25959e21b3eafe0fcf4b5fc46337e4c4cb 100644 --- a/inferrt/src/ops/cpu/aten/test_aten.h +++ b/inferrt/src/ops/ascend/aclnn/aclnn_mul.h @@ -14,41 +14,30 @@ * limitations under the License. */ -#ifndef __OPS_CPU_ATEN_TEST_ATEN_MATMUL_H__ -#define __OPS_CPU_ATEN_TEST_ATEN_MATMUL_H__ +#ifndef __OPS_ASCEND_ACLNN_ACLNN_MUL_H__ +#define __OPS_ASCEND_ACLNN_ACLNN_MUL_H__ -#include #include +#include + +#include "ops/op_base/op_mul.h" +#include "ops/ascend/aclnn/utils/aclnn_executor.h" -#include "ops/op_def/ops_name.h" -#include "ops/operator.h" -#include "ops/kernel_lib.h" -#include "ops/op_register.h" -// This file need to be deleted in the future. namespace mrt { namespace ops { -class TestAtenKernel : public DAKernel { +class AclnnMul : public OpMul { public: - explicit TestAtenKernel(ir::NodePtr node) : DAKernel(node) { - operator_ = CreateOperator(ToStr(node->op), hardware::DeviceType::CPU); - } - void Init() override; - void InferShape() override; - void Resize() override; - void Launch() override; + AclnnMul() { executor_ = std::make_unique("aclnnMul"); } + ~AclnnMul() override = default; - private: - std::unique_ptr operator_; - std::vector input_; - ir::Value *output_; -}; + OpsErrorCode CalcWorkspace(const std::vector &input, const ir::Value *output, + size_t *workspaceSize) override; + OpsErrorCode Launch(const std::vector &input, void *workspace, size_t workspaceSize, + ir::Value *output, void *stream) override; -class DA_API TestAtenKernelLib : public KernelLib { - public: - TestAtenKernelLib() : KernelLib("TestAten") {} - ~TestAtenKernelLib() = default; - DAKernel *CreateKernel(ir::NodePtr node) const override { return new TestAtenKernel(node); } + private: + std::unique_ptr executor_{nullptr}; }; } // namespace ops } // namespace mrt -#endif // __OPS_CPU_ATEN_TEST_ATEN_MATMUL_H__ +#endif // __OPS_ASCEND_ACLNN_ACLNN_MUL_H__ diff --git a/inferrt/src/ops/ascend/aclnn/utils/aclnn_cache.h b/inferrt/src/ops/ascend/aclnn/utils/aclnn_cache.h new file mode 100644 index 0000000000000000000000000000000000000000..cbb50facb31996245eae0f805b804a070b696082 --- /dev/null +++ b/inferrt/src/ops/ascend/aclnn/utils/aclnn_cache.h @@ -0,0 +1,287 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OPS_ASCEND_ACLNN_UTILS_ACLNN_CACHE_H__ +#define __OPS_ASCEND_ACLNN_UTILS_ACLNN_CACHE_H__ + +#include +#include +#include +#include +#include +#include +#include + +#include "common/common.h" +#include "ir/value/value.h" +#include "ir/common/intrusive_ptr.h" +#include "ops/ascend/aclnn/utils/aclnn_common_meta.h" +#include "ops/ascend/aclnn/utils/aclnn_deleter.h" +#include "ops/ascend/aclnn/utils/aclnn_converter.h" + +namespace mrt { +namespace ops { +// cache process type +enum class CacheReleaseType { + RELEASE_PARAMS, // release converted params + RELEASE_EXECUTOR, // release executor + RELEASE_PARAMS_AND_EXECUTOR, // release converted params and executor +}; + +// Base class for aclnn cache with reference counting +class CacheEntry : public ir::RefCounted { + public: + CacheEntry() = default; + virtual ~CacheEntry() = default; + + virtual void Release(const CacheReleaseType &type) = 0; + virtual void UpdateTensorAddr(size_t *index, size_t *relativeIndex, void *tensorAddr) = 0; + virtual aclOpExecutor *GetExecutor() = 0; + + private: + DISABLE_COPY_AND_ASSIGN(CacheEntry) +}; +using CacheEntryPtr = ir::IntrusivePtr; + +struct AddrUpdater { + template + static void UpdateAddr(const CacheEntryPtr &cacheEntry, const T &value, size_t *index) { + LOG_OUT << "UpdateAddr for non tensor type, index: " << *index; + ++(*index); + } + + static void UpdateAddr(const CacheEntryPtr &cacheEntry, const ir::TensorPtr &tensor, size_t *index) { + cacheEntry->UpdateTensorAddr(index, nullptr, tensor->DataPtr()); + ++(*index); + } + + static void UpdateAddr(const CacheEntryPtr &cacheEntry, const std::vector &tensorList, size_t *index) { + for (size_t i = 0; i < tensorList.size(); ++i) { + cacheEntry->UpdateTensorAddr(index, &i, tensorList[i]->DataPtr()); + } + ++(*index); + } + + static void UpdateAddr(const CacheEntryPtr &cacheEntry, const ir::TuplePtr &tuple, size_t *index) { + if (tuple == nullptr || tuple->Size() == 0) { + LOG_OUT << "tuple is empty"; + ++(*index); + return; + } + if ((*tuple)[kIndex0]->IsTensor()) { + std::vector tensorList; + TupleToTensorList(*tuple, &tensorList); + UpdateAddr(cacheEntry, tensorList, index); + return; + } + ++(*index); + } + + template + static void CallUpdateAddr(const CacheEntryPtr &cacheEntry, const Args &...args) { + size_t index = 0; + (UpdateAddr(cacheEntry, args, &index), ...); + } + + static void UpdateTensorAddr(aclTensor *tensor, size_t *index, aclOpExecutor *executor, void *tensorAddr) { + static const auto aclSetTensorAddr = GET_ACLNN_COMMON_META_FUNC(aclSetTensorAddr); + if (aclSetTensorAddr == nullptr) { + LOG_EXCEPTION << "aclSetTensorAddr is nullptr"; + return; + } + aclSetTensorAddr(executor, *index, tensor, tensorAddr); + } + + static void UpdateTensorAddr(aclTensorList *tensorList, size_t *index, size_t *relativeIndex, aclOpExecutor *executor, + void *tensorAddr) { + static const auto aclSetDynamicTensorAddr = GET_ACLNN_COMMON_META_FUNC(aclSetDynamicTensorAddr); + if (aclSetDynamicTensorAddr == nullptr) { + LOG_EXCEPTION << "aclSetDynamicTensorAddr is nullptr"; + return; + } + aclSetDynamicTensorAddr(executor, *index, *relativeIndex, tensorList, tensorAddr); + } +}; + +// Cache processor for cache operations +template +class CacheProcessor { + public: + explicit CacheProcessor(Tuple &&tuple, aclOpExecutor *executor) + : convertedParams_(std::move(tuple)), executor_(executor) { + InitTensorAddrUpdaters(); + } + + CacheProcessor(CacheProcessor &&other) noexcept + : convertedParams_(std::move(other.convertedParams_)), + executor_(other.executor_), + isParamsReleased_(other.isParamsReleased_), + isExecutorReleased_(other.isExecutorReleased_) { + other.executor_ = nullptr; + other.isParamsReleased_ = true; + other.isExecutorReleased_ = true; + } + + CacheProcessor &operator=(CacheProcessor &&other) noexcept { + if (this != &other) { + if (!isParamsReleased_) { + AclnnDeleter::ReleaseConvertedParams(convertedParams_); + } + + convertedParams_ = std::move(other.convertedParams_); + executor_ = other.executor_; + isParamsReleased_ = other.isParamsReleased_; + isExecutorReleased_ = other.isExecutorReleased_; + + other.executor_ = nullptr; + other.isParamsReleased_ = true; + other.isExecutorReleased_ = true; + } + return *this; + } + + template + static void BuildTensorAddrUpdater() { + using elementType = std::decay_t>; + if constexpr (std::is_same_v) { + tensorAddrUpdatersMap_[I] = [](const Tuple &convertedParams, aclOpExecutor *executor, size_t *index, + size_t *relativeIndex, void *tensorAddr) { + AddrUpdater::UpdateTensorAddr(std::get(convertedParams), index, executor, tensorAddr); + }; + } + if constexpr (std::is_same_v) { + tensorAddrUpdatersMap_[I] = [](const Tuple &convertedParams, aclOpExecutor *executor, size_t *index, + size_t *relativeIndex, void *tensorAddr) { + AddrUpdater::UpdateTensorAddr(std::get(convertedParams), index, relativeIndex, executor, tensorAddr); + }; + }; + } + + template + static void BuildTensorAddrUpdaters(std::index_sequence) { + (BuildTensorAddrUpdater(), ...); + } + + static void InitTensorAddrUpdaters() { + constexpr size_t tuple_size = std::tuple_size_v; + static_assert(tuple_size > 0, "Tuple size must be greater than 0"); + static bool isInitialized = false; + if (isInitialized) { + return; + } + isInitialized = true; + LOG_OUT << "Initializing tensor address updaters for tuple of size: " << tuple_size; + + BuildTensorAddrUpdaters(std::make_index_sequence{}); + } + + ~CacheProcessor() { + // release params and executor + if (!isParamsReleased_) { + AclnnDeleter::ReleaseConvertedParams(convertedParams_); + } + if (!isExecutorReleased_) { + AclnnDeleter::ReleaseExecutor(executor_); + } + } + + void Release(const CacheReleaseType &type) { + switch (type) { + case CacheReleaseType::RELEASE_PARAMS: + if (!isParamsReleased_) { + AclnnDeleter::ReleaseConvertedParams(convertedParams_); + isParamsReleased_ = true; + } + break; + case CacheReleaseType::RELEASE_EXECUTOR: + if (!isExecutorReleased_) { + AclnnDeleter::ReleaseExecutor(executor_); + isExecutorReleased_ = true; + } + break; + case CacheReleaseType::RELEASE_PARAMS_AND_EXECUTOR: + if (!isParamsReleased_) { + AclnnDeleter::ReleaseConvertedParams(convertedParams_); + isParamsReleased_ = true; + } + if (!isExecutorReleased_) { + AclnnDeleter::Release(executor_); + isExecutorReleased_ = true; + } + break; + default: + LOG_EXCEPTION << "Invalid cache release type: " << static_cast(type); + break; + } + } + + void UpdateTensorAddr(size_t *index, size_t *relativeIndex, void *tensorAddr) { + LOG_OUT << "UpdateTensorAddr called for index: " << *index << ", updaters size: " << tensorAddrUpdatersMap_.size() + << ", relativeIndex: " << (relativeIndex == nullptr ? 0 : *relativeIndex); + + // Use the static map for efficient lookup, no need lookup in the future + auto it = tensorAddrUpdatersMap_.find(*index); + if (it != tensorAddrUpdatersMap_.end()) { + LOG_OUT << "Found updater for index " << *index; + it->second(convertedParams_, executor_, index, relativeIndex, tensorAddr); + } else { + LOG_EXCEPTION << "No updater found for index: " << *index << ", available indices: "; + for (const auto &pair : tensorAddrUpdatersMap_) { + LOG_OUT << pair.first << " "; + } + } + } + + aclOpExecutor *GetExecutor() { return executor_; } + + using TensorAddrUpdater = std::function; + + private: + DISABLE_COPY_AND_ASSIGN(CacheProcessor) + Tuple convertedParams_; + aclOpExecutor *executor_; + + // Static map for updater functions (no instance data) + inline static std::unordered_map tensorAddrUpdatersMap_; + + bool isParamsReleased_{false}; + bool isExecutorReleased_{false}; +}; + +// Wrapper class for CacheEntry +template +class CacheEntryImpl : public CacheEntry { + public: + explicit CacheEntryImpl(CacheProcessor &&cacheProcessor) : cacheProcessor_(std::move(cacheProcessor)) {} + ~CacheEntryImpl() override = default; + + void Release(const CacheReleaseType &type) override { cacheProcessor_.Release(type); } + + void UpdateTensorAddr(size_t *index, size_t *relativeIndex, void *tensorAddr) override { + cacheProcessor_.UpdateTensorAddr(index, relativeIndex, tensorAddr); + } + + aclOpExecutor *GetExecutor() override { return cacheProcessor_.GetExecutor(); } + + private: + DISABLE_COPY_AND_ASSIGN(CacheEntryImpl) + CacheProcessor cacheProcessor_; +}; + +} // namespace ops +} // namespace mrt + +#endif // __OPS_ASCEND_ACLNN_UTILS_ACLNN_CACHE_H__ \ No newline at end of file diff --git a/inferrt/src/ops/ascend/aclnn/utils/aclnn_common_meta.cc b/inferrt/src/ops/ascend/aclnn/utils/aclnn_common_meta.cc new file mode 100644 index 0000000000000000000000000000000000000000..78ae4a18914bf4dc2fa48a312d505ec9dff9b8a4 --- /dev/null +++ b/inferrt/src/ops/ascend/aclnn/utils/aclnn_common_meta.cc @@ -0,0 +1,44 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ops/ascend/aclnn/utils/aclnn_common_meta.h" + +namespace mrt { +namespace ops { +DECLARE_ACLNN_COMMON_META_FUNC(aclCreateTensor); +DECLARE_ACLNN_COMMON_META_FUNC(aclCreateScalar); +DECLARE_ACLNN_COMMON_META_FUNC(aclCreateIntArray); +DECLARE_ACLNN_COMMON_META_FUNC(aclCreateFloatArray); +DECLARE_ACLNN_COMMON_META_FUNC(aclCreateBoolArray); +DECLARE_ACLNN_COMMON_META_FUNC(aclCreateTensorList); + +DECLARE_ACLNN_COMMON_META_FUNC(aclDestroyTensor); +DECLARE_ACLNN_COMMON_META_FUNC(aclDestroyScalar); +DECLARE_ACLNN_COMMON_META_FUNC(aclDestroyIntArray); +DECLARE_ACLNN_COMMON_META_FUNC(aclDestroyFloatArray); +DECLARE_ACLNN_COMMON_META_FUNC(aclDestroyBoolArray); +DECLARE_ACLNN_COMMON_META_FUNC(aclDestroyTensorList); +DECLARE_ACLNN_COMMON_META_FUNC(aclDestroyAclOpExecutor); + +DECLARE_ACLNN_COMMON_META_FUNC(aclnnInit); +DECLARE_ACLNN_COMMON_META_FUNC(aclnnFinalize); + +DECLARE_ACLNN_COMMON_META_FUNC(aclSetAclOpExecutorRepeatable); + +DECLARE_ACLNN_COMMON_META_FUNC(aclSetTensorAddr); +DECLARE_ACLNN_COMMON_META_FUNC(aclSetDynamicTensorAddr); +} // namespace ops +} // namespace mrt diff --git a/inferrt/src/ops/ascend/aclnn/utils/aclnn_common_meta.h b/inferrt/src/ops/ascend/aclnn/utils/aclnn_common_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..0c3f1f6c23df3525fcbf70d848a5968d0a8b2275 --- /dev/null +++ b/inferrt/src/ops/ascend/aclnn/utils/aclnn_common_meta.h @@ -0,0 +1,107 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OPS_ASCEND_ACLNN_UTILS_ACLNN_COMMON_META_H__ +#define __OPS_ASCEND_ACLNN_UTILS_ACLNN_COMMON_META_H__ + +#include "acl/acl_base.h" + +namespace mrt { +namespace ops { + +// Base acl data structure +using aclOpExecutor = struct aclOpExecutor; +using aclTensor = struct aclTensor; +using aclTensorList = struct aclTensorList; +using aclScalar = struct aclScalar; +using aclIntArray = struct aclIntArray; +using aclFloatArray = struct aclFloatArray; +using aclBoolArray = struct aclBoolArray; + +// Base acl creators +using _aclCreateTensorFuncPtr = aclTensor *(*)(const int64_t *viewDims, uint64_t viewDimsNum, aclDataType dataType, + const int64_t *stride, int64_t offset, aclFormat format, + const int64_t *storageDims, uint64_t storageDimsNum, void *tensorData); +using _aclCreateScalarFuncPtr = aclScalar *(*)(void *value, aclDataType dataType); +using _aclCreateIntArrayFuncPtr = aclIntArray *(*)(const int64_t *value, uint64_t size); +using _aclCreateFloatArrayFuncPtr = aclFloatArray *(*)(const float *value, uint64_t size); +using _aclCreateBoolArrayFuncPtr = aclBoolArray *(*)(const bool *value, uint64_t size); +using _aclCreateTensorListFuncPtr = aclTensorList *(*)(const aclTensor *const *value, uint64_t size); + +// Base acl deleters +using _aclDestroyTensorFuncPtr = int (*)(const aclTensor *tensor); +using _aclDestroyScalarFuncPtr = int (*)(const aclScalar *scalar); +using _aclDestroyIntArrayFuncPtr = int (*)(const aclIntArray *array); +using _aclDestroyFloatArrayFuncPtr = int (*)(const aclFloatArray *array); +using _aclDestroyBoolArrayFuncPtr = int (*)(const aclBoolArray *array); +using _aclDestroyTensorListFuncPtr = int (*)(const aclTensorList *array); +using _aclDestroyAclOpExecutorFuncPtr = int (*)(aclOpExecutor *executor); + +// Init and finalize +using _aclnnInitFuncPtr = int (*)(const char *); +using _aclnnFinalizeFuncPtr = int (*)(); + +// For reusing aclOpExecutor +using _aclSetAclOpExecutorRepeatableFuncPtr = int (*)(aclOpExecutor *executor); + +// Set the device address ptr for aclTensor +using _aclSetTensorAddrFuncPtr = int (*)(aclOpExecutor *executor, const size_t index, aclTensor *tensor, void *addr); +using _aclSetDynamicTensorAddrFuncPtr = int (*)(aclOpExecutor *executor, const size_t index, const size_t relativeIndex, + aclTensorList *tensors, void *addr); + +#define DECLARE_ACLNN_COMMON_META_FUNC(name) _##name##FuncPtr name##_ = nullptr + +#define EXTERN_ACLNN_COMMON_META_FUNC(name) \ + extern _##name##FuncPtr name##_; \ + inline constexpr const char *kName##name##_ = #name + +EXTERN_ACLNN_COMMON_META_FUNC(aclCreateTensor); +EXTERN_ACLNN_COMMON_META_FUNC(aclCreateScalar); +EXTERN_ACLNN_COMMON_META_FUNC(aclCreateIntArray); +EXTERN_ACLNN_COMMON_META_FUNC(aclCreateFloatArray); +EXTERN_ACLNN_COMMON_META_FUNC(aclCreateBoolArray); +EXTERN_ACLNN_COMMON_META_FUNC(aclCreateTensorList); + +EXTERN_ACLNN_COMMON_META_FUNC(aclDestroyTensor); +EXTERN_ACLNN_COMMON_META_FUNC(aclDestroyScalar); +EXTERN_ACLNN_COMMON_META_FUNC(aclDestroyIntArray); +EXTERN_ACLNN_COMMON_META_FUNC(aclDestroyFloatArray); +EXTERN_ACLNN_COMMON_META_FUNC(aclDestroyBoolArray); +EXTERN_ACLNN_COMMON_META_FUNC(aclDestroyTensorList); +EXTERN_ACLNN_COMMON_META_FUNC(aclDestroyAclOpExecutor); + +EXTERN_ACLNN_COMMON_META_FUNC(aclnnInit); +EXTERN_ACLNN_COMMON_META_FUNC(aclnnFinalize); + +EXTERN_ACLNN_COMMON_META_FUNC(aclSetAclOpExecutorRepeatable); + +EXTERN_ACLNN_COMMON_META_FUNC(aclSetTensorAddr); +EXTERN_ACLNN_COMMON_META_FUNC(aclSetDynamicTensorAddr); + +#define GET_ACLNN_COMMON_META_FUNC(name) \ + []() -> auto { \ + if (name##_ == nullptr) { \ + LoadOpApiLib(); \ + } \ + return name##_; \ + } \ + () + +#define GET_ACLNN_OP_FUNC(name) GetAclnnOpApiFunc(name.c_str()) + +} // namespace ops +} // namespace mrt +#endif // __OPS_ASCEND_ACLNN_UTILS_ACLNN_COMMON_META_H__ diff --git a/inferrt/src/ops/ascend/aclnn/utils/aclnn_converter.cc b/inferrt/src/ops/ascend/aclnn/utils/aclnn_converter.cc new file mode 100644 index 0000000000000000000000000000000000000000..33f9239eb21c1cff1b920d3862e61bb7c2090d30 --- /dev/null +++ b/inferrt/src/ops/ascend/aclnn/utils/aclnn_converter.cc @@ -0,0 +1,48 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "ops/ascend/aclnn/utils/aclnn_converter.h" + +namespace mrt { +namespace ops { +static const std::map kDataTypeToAclDataTypeMap = { + {ir::DataType::Type::Unknown, ACL_DT_UNDEFINED}, + {ir::DataType::Type::Float32, ACL_FLOAT}, + {ir::DataType::Type::Float64, ACL_DOUBLE}, + {ir::DataType::Type::Int8, ACL_INT8}, + {ir::DataType::Type::Int16, ACL_INT16}, + {ir::DataType::Type::Int32, ACL_INT32}, + {ir::DataType::Type::Int64, ACL_INT64}, + {ir::DataType::Type::UInt8, ACL_UINT8}, + {ir::DataType::Type::Bool, ACL_BOOL}, +}; + +aclDataType TensorConverter::ConvertDtype(ir::DataType::Type dtype) { + auto iter = kDataTypeToAclDataTypeMap.find(dtype); + if (iter == kDataTypeToAclDataTypeMap.end()) { + LOG_EXCEPTION << "Invalid dtype: " << dtype; + } + auto ret = iter->second; + if (ret == ACL_DT_UNDEFINED) { + LOG_EXCEPTION << "Invalid dtype: " << dtype; + } + return ret; +} +} // namespace ops +} // namespace mrt diff --git a/inferrt/src/ops/ascend/aclnn/utils/aclnn_converter.h b/inferrt/src/ops/ascend/aclnn/utils/aclnn_converter.h new file mode 100644 index 0000000000000000000000000000000000000000..bae0af5636fad79d6d79993aed0bee4c8564a709 --- /dev/null +++ b/inferrt/src/ops/ascend/aclnn/utils/aclnn_converter.h @@ -0,0 +1,179 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OPS_ASCEND_ACLNN_UTILS_ACLNN_CONVERTER_H__ +#define __OPS_ASCEND_ACLNN_UTILS_ACLNN_CONVERTER_H__ + +#include +#include +#include +#include +#include + +#include "ir/value/value.h" +#include "ops/utils/op_constants.h" +#include "ops/ascend/aclnn/utils/aclnn_common_meta.h" +#include "ops/ascend/aclnn/utils/opapi_lib_loader.h" + +namespace mrt { +namespace ops { +inline void TupleToTensorList(const ir::Tuple &tuple, std::vector *tensorList) { + for (size_t i = 0; i < tuple.Size(); ++i) { + (void)tensorList->emplace_back(tuple[i]->ToTensor()); + } +} + +inline void TupleToIntList(const ir::Tuple &tuple, std::vector *intList) { + for (size_t i = 0; i < tuple.Size(); ++i) { + (void)intList->emplace_back(tuple[i]->ToInt()); + } +} + +inline void TupleToBoolList(const ir::Tuple &tuple, std::vector *boolList) { + for (size_t i = 0; i < tuple.Size(); ++i) { + (void)boolList->emplace_back(static_cast(tuple[i]->ToBool())); + } +} + +inline void TupleToFloatList(const ir::Tuple &tuple, std::vector *floatList) { + for (size_t i = 0; i < tuple.Size(); ++i) { + (void)floatList->emplace_back(tuple[i]->ToFloat()); + } +} + +inline void TupleToDoubleList(const ir::Tuple &tuple, std::vector *doubleList) { + for (size_t i = 0; i < tuple.Size(); ++i) { + (void)doubleList->emplace_back(tuple[i]->ToDouble()); + } +} + +// Convert tensor to aclTensor. +struct TensorConverter { + aclTensorList *ConvertTensor(const std::vector &tensorList) { + if (tensorList.empty()) { + LOG_OUT << "tensorList is empty"; + } + static const auto aclCreateTensorList = GET_ACLNN_COMMON_META_FUNC(aclCreateTensorList); + std::vector aclTensorList; + for (const auto &tensor : tensorList) { + (void)aclTensorList.emplace_back(ConvertTensor(tensor)); + } + return aclCreateTensorList(aclTensorList.data(), aclTensorList.size()); + } + + aclTensor *ConvertTensor(const ir::TensorPtr &tensor) { + static const auto aclCreateTensor = GET_ACLNN_COMMON_META_FUNC(aclCreateTensor); + CHECK_IF_NULL(aclCreateTensor); + if (tensor == nullptr || tensor->Dtype().value == ir::DataType::Type::Unknown) { + return nullptr; + } + aclFormat format = ACL_FORMAT_ND; + switch (tensor->Dim()) { + case kDim3: + format = ACL_FORMAT_NCL; + break; + case kDim4: + format = ACL_FORMAT_NCHW; + break; + case kDim5: + format = ACL_FORMAT_NCDHW; + break; + default: + format = ACL_FORMAT_ND; + } + if (tensor->Format() == ir::MemoryFormat::FRACTAL_NZ) { + format = ACL_FORMAT_FRACTAL_NZ; + } + return aclCreateTensor(tensor->Shape().data(), tensor->Dim(), ConvertDtype(tensor->Dtype().value), + tensor->Strides().data(), tensor->StorageOffset(), format, tensor->Shape().data(), + tensor->Dim(), tensor->DataPtr()); + } + + aclDataType ConvertDtype(ir::DataType::Type dtype); +}; + +struct AttrConverter { + aclIntArray *ConvertAttr(const std::vector &intList) { + static const auto aclCreateIntArray = GET_ACLNN_COMMON_META_FUNC(aclCreateIntArray); + CHECK_IF_NULL(aclCreateIntArray); + return aclCreateIntArray(intList.data(), intList.size()); + } + + aclBoolArray *ConvertAttr(const std::vector &boolList) { + static const auto aclCreateBoolArray = GET_ACLNN_COMMON_META_FUNC(aclCreateBoolArray); + CHECK_IF_NULL(aclCreateBoolArray); + return aclCreateBoolArray(reinterpret_cast(boolList.data()), boolList.size()); + } + + aclFloatArray *ConvertAttr(const std::vector &floatList) { + static const auto aclCreateFloatArray = GET_ACLNN_COMMON_META_FUNC(aclCreateFloatArray); + CHECK_IF_NULL(aclCreateFloatArray); + return aclCreateFloatArray(floatList.data(), floatList.size()); + } +}; + +// Convert inputs to aclnn op inputs. +struct AclnnConverter { + template + constexpr auto Convert(const Args &...args) { + return std::make_tuple(Convert(args)...); + } + + aclTensor *Convert(const ir::TensorPtr &tensor) { return tensorConverter_.ConvertTensor(tensor); } + + // Scalar value such as int64, double, float, etc. + template >> + T Convert(T value) { + return value; + } + + const char *Convert(const std::string &str) { return str.c_str(); } + + void *Convert(const ir::TuplePtr &tuple) { + if (tuple->Size() == 0) { + LOG_OUT << "tuple is empty"; + return nullptr; + } + auto firstElement = (*tuple)[kIndex0]; + // Not support tuple in tuple and the element type in the tuple must be the same. + if (firstElement->IsTensor()) { + std::vector tensorList; + TupleToTensorList(*tuple, &tensorList); + return tensorConverter_.ConvertTensor(tensorList); + } else if (firstElement->IsInt()) { + std::vector intList; + TupleToIntList(*tuple, &intList); + return attrConverter_.ConvertAttr(intList); + } else if (firstElement->IsFloat()) { + std::vector floatList; + TupleToFloatList(*tuple, &floatList); + return attrConverter_.ConvertAttr(floatList); + } else if (firstElement->IsBool()) { + std::vector boolList; + TupleToBoolList(*tuple, &boolList); + return attrConverter_.ConvertAttr(boolList); + } else { + LOG_EXCEPTION << "Invalid element type in tuple: " << tuple; + } + } + + private: + TensorConverter tensorConverter_; + AttrConverter attrConverter_; +}; +} // namespace ops +} // namespace mrt +#endif // __OPS_ASCEND_ACLNN_UTILS_ACLNN_CONVERTER_H__ diff --git a/inferrt/src/ops/ascend/aclnn/utils/aclnn_deleter.h b/inferrt/src/ops/ascend/aclnn/utils/aclnn_deleter.h new file mode 100644 index 0000000000000000000000000000000000000000..ed57794bc6efb7b3fc6e852739f7b2d146378fa5 --- /dev/null +++ b/inferrt/src/ops/ascend/aclnn/utils/aclnn_deleter.h @@ -0,0 +1,89 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OPS_ASCEND_ACLNN_UTILS_ACLNN_DELETER_H__ +#define __OPS_ASCEND_ACLNN_UTILS_ACLNN_DELETER_H__ + +#include +#include + +#include "ops/ascend/aclnn/utils/opapi_lib_loader.h" + +namespace mrt { +namespace ops { +struct AclnnDeleter { + static void Release(aclTensor *tensor) { + static const auto aclDestroyTensor = GET_ACLNN_COMMON_META_FUNC(aclDestroyTensor); + CHECK_IF_NULL(aclDestroyTensor); + aclDestroyTensor(tensor); + } + + static void Release(aclTensorList *tensorList) { + static const auto aclDestroyTensorList = GET_ACLNN_COMMON_META_FUNC(aclDestroyTensorList); + CHECK_IF_NULL(aclDestroyTensorList); + aclDestroyTensorList(tensorList); + } + + static void Release(aclIntArray *intList) { + static const auto aclDestroyIntArray = GET_ACLNN_COMMON_META_FUNC(aclDestroyIntArray); + CHECK_IF_NULL(aclDestroyIntArray); + aclDestroyIntArray(intList); + } + + static void Release(aclBoolArray *boolList) { + static const auto aclDestroyBoolArray = GET_ACLNN_COMMON_META_FUNC(aclDestroyBoolArray); + CHECK_IF_NULL(aclDestroyBoolArray); + aclDestroyBoolArray(boolList); + } + + static void Release(aclFloatArray *floatList) { + static const auto aclDestroyFloatArray = GET_ACLNN_COMMON_META_FUNC(aclDestroyFloatArray); + CHECK_IF_NULL(aclDestroyFloatArray); + aclDestroyFloatArray(floatList); + } + + template + static void Release(T value) { + (void)value; + } + + template + static void CallRelease(const Tuple &t, std::index_sequence) { + (Release(std::get(t)), ...); + } + + template + static void ReleaseConvertedParams(const Tuple &t) { + static constexpr auto size = std::tuple_size::value; + CallRelease(t, std::make_index_sequence{}); + } + + static void ReleaseExecutor(aclOpExecutor *executor) { + static const auto aclDestroyAclOpExecutor = GET_ACLNN_COMMON_META_FUNC(aclDestroyAclOpExecutor); + if (aclDestroyAclOpExecutor == nullptr) { + LOG_OUT << "aclDestroyAclOpExecutor is nullptr"; + return; + } + auto ret = aclDestroyAclOpExecutor(executor); + if (ret != 0) { + LOG_EXCEPTION << "aclDestroyAclOpExecutor failed"; + } + } +}; +} // namespace ops +} // namespace mrt + +#endif // __OPS_ASCEND_ACLNN_UTILS_ACLNN_DELETER_H__ diff --git a/inferrt/src/ops/ascend/aclnn/utils/aclnn_executor.cc b/inferrt/src/ops/ascend/aclnn/utils/aclnn_executor.cc new file mode 100644 index 0000000000000000000000000000000000000000..061df9640d4a99ab7af441a64e4519c746f78881 --- /dev/null +++ b/inferrt/src/ops/ascend/aclnn/utils/aclnn_executor.cc @@ -0,0 +1,25 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "ops/ascend/aclnn/utils/aclnn_executor.h" + +namespace mrt { +namespace ops { +std::unique_ptr AclnnExecutor::converter_ = std::make_unique(); +} // namespace ops +} // namespace mrt diff --git a/inferrt/src/ops/ascend/aclnn/utils/aclnn_executor.h b/inferrt/src/ops/ascend/aclnn/utils/aclnn_executor.h new file mode 100644 index 0000000000000000000000000000000000000000..578953cab6c2bdc1bf83d1b15e02af5c444c170e --- /dev/null +++ b/inferrt/src/ops/ascend/aclnn/utils/aclnn_executor.h @@ -0,0 +1,182 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OPS_ASCEND_ACLNN_UTILS_ACLNN_EXECUTOR_H__ +#define __OPS_ASCEND_ACLNN_UTILS_ACLNN_EXECUTOR_H__ + +#include +#include +#include +#include + +#include "ops/ascend/aclnn/utils/aclnn_hash.h" +#include "ops/ascend/aclnn/utils/aclnn_cache.h" +#include "ops/ascend/aclnn/utils/opapi_lib_loader.h" + +namespace mrt { +namespace ops { +inline constexpr const char *kNameGetWorkspaceSize = "GetWorkspaceSize"; +using RunOpFunc = int (*)(void *, uint64_t, aclOpExecutor *, const aclrtStream); +class AclnnExecutor { + public: + explicit AclnnExecutor(const std::string &&opApiName) : opApiName_(std::move(opApiName)) { + getWorkspaceSizeApiName_ = opApiName_ + kNameGetWorkspaceSize; + AclnnInit(); + } + ~AclnnExecutor() { AclnnFinalize(); } + + template + void GetWorkspaceSize(uint64_t *workspaceSize, const OpClass *op, const Args &...args) { + // no cache + if (cacheCapacity_ == 0) { + GetWorkspaceSizeWithoutCacheList(workspaceSize, args...); + return; + } + + // TODO: with cache list + // hashId_ = CalcAclnnHash(opName_, args...); + + LOG_ERROR << "No cache list"; + } + + template + void GetWorkspaceSizeWithoutCacheList(uint64_t *workspaceSize, const Args &...args) { + if (isExecutorRepeatable_) { + *workspaceSize = workspaceSize_; + return; + } + + const auto getWorkspaceSizeFuncPtr = GET_ACLNN_OP_FUNC(getWorkspaceSizeApiName_); + if (getWorkspaceSizeFuncPtr == nullptr) { + LOG_EXCEPTION << "Api " << getWorkspaceSizeApiName_ << " is not in " << kNameOpApiLib; + } + + aclOpExecutor *opExecutor = nullptr; + auto convertedParams = converter_->Convert(args..., workspaceSize, &opExecutor); + auto getWorkspaceSizeFunc = ConvertToOpApiFunc(convertedParams, getWorkspaceSizeFuncPtr); + CHECK_IF_NULL(getWorkspaceSizeFunc); + auto ret = CallOpApiFunc(getWorkspaceSizeFunc, convertedParams); + if (ret != 0) { + LOG_EXCEPTION << "Call " << getWorkspaceSizeApiName_ << " failed"; + } + SetExecutorRepeatable(opExecutor); + if (isExecutorRepeatable_) { + workspaceSize_ = *workspaceSize; + // cache the params + cacheEntry_ = ir::MakeIntrusive>>( + CacheProcessor(std::move(convertedParams), opExecutor)); + LOG_OUT << "Cache the params for op[" << opApiName_ << "] success"; + return; + } + AclnnDeleter::ReleaseConvertedParams(convertedParams); + AclnnDeleter::ReleaseExecutor(opExecutor); + LOG_OUT << "Release the params and executor for op[" << opApiName_ << "] success"; + } + + template + void Launch(void *workspace, size_t workspaceSize, void *stream, const Args &...args) { + if (isExecutorRepeatable_) { + LaunchOpWithCache(workspace, workspaceSize, stream, args...); + } else { + LaunchOpWithoutCache(workspace, workspaceSize, stream, args...); + } + } + + template + void LaunchOpWithCache(void *workspace, size_t workspaceSize, void *stream, const Args &...args) { + // update tensor addr + AddrUpdater::CallUpdateAddr(cacheEntry_, args...); + // run op + const auto opApiFuncPtr = GET_ACLNN_OP_FUNC(opApiName_); + if (opApiFuncPtr == nullptr) { + LOG_EXCEPTION << "Api " << opApiName_ << " is not in " << kNameOpApiLib; + } + auto opApiFunc = reinterpret_cast(opApiFuncPtr); + auto opApiFuncRet = opApiFunc(workspace, workspaceSize, cacheEntry_->GetExecutor(), stream); + if (opApiFuncRet != 0) { + LOG_EXCEPTION << "Call " << opApiName_ << " failed"; + } + // release params only + cacheEntry_->Release(CacheReleaseType::RELEASE_PARAMS); + } + + template + void LaunchOpWithoutCache(void *workspace, size_t workspaceSize, void *stream, const Args &...args) { + // convert args and generate aclOpExecutor + const auto getWorkspaceSizeFuncPtr = GET_ACLNN_OP_FUNC(getWorkspaceSizeApiName_); + + if (getWorkspaceSizeFuncPtr == nullptr) { + LOG_EXCEPTION << "Api " << getWorkspaceSizeApiName_ << " is not in " << kNameOpApiLib; + } + uint64_t workspaceSizeTmp = 0; + aclOpExecutor *opExecutor = nullptr; + auto convertedParams = converter_->Convert(args..., &workspaceSizeTmp, &opExecutor); + auto getWorkspaceSizeFunc = ConvertToOpApiFunc(convertedParams, getWorkspaceSizeFuncPtr); + CHECK_IF_NULL(getWorkspaceSizeFunc); + auto getWorkspaceSizeRet = CallOpApiFunc(getWorkspaceSizeFunc, convertedParams); + if (getWorkspaceSizeRet != 0) { + LOG_EXCEPTION << "Call " << getWorkspaceSizeApiName_ << " failed"; + } + + // run op + const auto opApiFuncPtr = GET_ACLNN_OP_FUNC(opApiName_); + if (opApiFuncPtr == nullptr) { + LOG_EXCEPTION << "Api " << opApiName_ << " is not in " << kNameOpApiLib; + } + auto opApiFunc = reinterpret_cast(opApiFuncPtr); + auto opApiFuncRet = opApiFunc(workspace, workspaceSize, opExecutor, stream); + if (opApiFuncRet != 0) { + LOG_EXCEPTION << "Call " << opApiName_ << " failed"; + } + + // release params and executor + AclnnDeleter::ReleaseConvertedParams(convertedParams); + AclnnDeleter::ReleaseExecutor(opExecutor); + } + + void SetExecutorRepeatable(aclOpExecutor *executor) { + static const auto aclSetAclOpExecutorRepeatable = GET_ACLNN_COMMON_META_FUNC(aclSetAclOpExecutorRepeatable); + if (aclSetAclOpExecutorRepeatable == nullptr) { + LOG_OUT << "aclSetAclOpExecutorRepeatable is nullptr, which means the executor is not repeatable for op[" + << opApiName_ << "]"; + isExecutorRepeatable_ = false; + return; + } + auto ret = aclSetAclOpExecutorRepeatable(executor); + if (ret != 0) { + LOG_OUT << "aclSetAclOpExecutorRepeatable failed, which means the executor is not repeatable for op[" + << opApiName_ << "]"; + isExecutorRepeatable_ = false; + return; + } + isExecutorRepeatable_ = true; + LOG_OUT << "Set executor repeatable for op[" << opApiName_ << "] success"; + } + + private: + std::string opApiName_; + std::string getWorkspaceSizeApiName_; + static std::unique_ptr converter_; + CacheEntryPtr cacheEntry_{nullptr}; + bool isExecutorRepeatable_{false}; + uint64_t workspaceSize_{0}; + uint64_t hashId_{0}; + size_t cacheCapacity_{0}; +}; + +} // namespace ops +} // namespace mrt +#endif // __OPS_ASCEND_ACLNN_UTILS_ACLNN_EXECUTOR_H__ diff --git a/inferrt/src/ops/ascend/aclnn/utils/aclnn_hash.cc b/inferrt/src/ops/ascend/aclnn/utils/aclnn_hash.cc new file mode 100644 index 0000000000000000000000000000000000000000..46d8bf72f7391c9f5abaec97ca4beb029d09810a --- /dev/null +++ b/inferrt/src/ops/ascend/aclnn/utils/aclnn_hash.cc @@ -0,0 +1,80 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "ops/ascend/aclnn/utils/aclnn_hash.h" +#include "ops/ascend/aclnn/utils/aclnn_converter.h" + +namespace mrt { +namespace ops { +constexpr size_t kSizeFive = 5; +void GatherHash(const ir::TensorPtr &tensor) { + if (tensor == nullptr || tensor->Dtype().value == ir::DataType::Type::Unknown) { + MemcpyToBuf("None", kSizeFive); + return; + } + + // shape + const auto &shape = tensor->Shape(); + if (!shape.empty()) { + MemcpyToBuf(shape.data(), tensor->Dim() * sizeof(int64_t)); + } + + // dtype + auto dtype = tensor->Dtype().value; + MemcpyToBuf(&dtype, sizeof(int8_t)); + + // strides + const auto &strides = tensor->Strides(); + if (!strides.empty()) { + MemcpyToBuf(strides.data(), strides.size() * sizeof(int64_t)); + } + + // offset + auto offset = tensor->StorageOffset(); + MemcpyToBuf(&offset, sizeof(int64_t)); +} + +void GatherHash(const ir::TuplePtr &tuple) { + if (tuple->Size() == 0) { + LOG_OUT << "tuple is empty"; + return; + } + auto firstElement = (*tuple)[kIndex0]; + // Not support tuple in tuple and the element type in the tuple must be the same. + if (firstElement->IsTensor()) { + std::vector tensorList; + TupleToTensorList(*tuple, &tensorList); + GatherHash(tensorList); + } else if (firstElement->IsInt()) { + std::vector intList; + TupleToIntList(*tuple, &intList); + GatherHash(intList); + } else if (firstElement->IsFloat()) { + std::vector floatList; + TupleToFloatList(*tuple, &floatList); + GatherHash(floatList); + } else if (firstElement->IsBool()) { + std::vector boolList; + TupleToBoolList(*tuple, &boolList); + GatherHash(boolList); + } else { + LOG_EXCEPTION << "Invalid element type in tuple: " << tuple; + } +} +} // namespace ops +} // namespace mrt diff --git a/inferrt/src/ops/cpu/aten/test_aten.cc b/inferrt/src/ops/ascend/aclnn/utils/aclnn_hash.h similarity index 35% rename from inferrt/src/ops/cpu/aten/test_aten.cc rename to inferrt/src/ops/ascend/aclnn/utils/aclnn_hash.h index 294905561774a9918fb819cd5d2356bae1a850e8..0db3f9f2084f957f7739e834e60fac5a853e5a71 100644 --- a/inferrt/src/ops/cpu/aten/test_aten.cc +++ b/inferrt/src/ops/ascend/aclnn/utils/aclnn_hash.h @@ -14,48 +14,56 @@ * limitations under the License. */ +#ifndef __OPS_ASCEND_ACLNN_UTILS_ACLNN_HASH_H__ +#define __OPS_ASCEND_ACLNN_UTILS_ACLNN_HASH_H__ + +#include #include -#include "ops/cpu/aten/test_aten.h" -// This file need to be deleted in the future. +#include "ir/value/value.h" +#include "ops/ascend/aclnn/utils/hash_buf.h" + namespace mrt { namespace ops { -void TestAtenKernel::Init() { - CHECK_IF_NULL(node_); - input_.clear(); - for (auto &input : node_->inputs) { - CHECK_IF_NULL(input->output); - input_.emplace_back(input->output.get()); - } - CHECK_IF_NULL(node_->output); - output_ = node_->output.get(); -} +void GatherHash(const ir::TensorPtr &tensor); -void TestAtenKernel::InferShape() { - CHECK_IF_NULL(operator_); - node_->output = ir::MakeIntrusive(ir::MakeIntrusive( - std::vector{-1}, ir::DataType::Type::Float32, hardware::Device(hardware::DeviceType::CPU, 0))); - Init(); - LOG_OUT << "Begin InferShape for operator [" << ToStr(node_->op) << "], input=" << input_ << ", output=" << output_; - if (operator_->InferShape(input_, output_) != SUCCESS) { - LOG_EXCEPTION << "Infer shape failed for operator " << ToStr(node_->op); +inline void GatherHash(const std::vector &tensorList) { + for (auto &tensor : tensorList) { + GatherHash(tensor); } } -void TestAtenKernel::Resize() { - // null +void GatherHash(const ir::TuplePtr &tuple); + +// Gather scalar, int64_t/bool/float/double, etc. +template +void GatherHash(const T &value) { + MemcpyToBuf(&value, sizeof(T)); } -void TestAtenKernel::Launch() { - CHECK_IF_NULL(operator_); - Init(); - LOG_OUT << "Begin Launch for operator [" << ToStr(node_->op) << "], input=" << input_ << ", output=" << output_; - if (operator_->Launch(input_, nullptr, 0, output_, nullptr) != SUCCESS) { - LOG_EXCEPTION << "Launch operator " << ToStr(node_->op) << " failed"; - } +// Gather vector scalar. +template +void GatherHash(const std::vector &values) { + MemcpyToBuf(values.data(), values.size() * sizeof(T)); } -DART_REGISTER_KERNEL_LIB("TestAten", TestAtenKernelLib); +inline void GatherHash(const std::string &str) { MemcpyToBuf(str.c_str(), str.size()); } +inline void GatherHash() {} + +template +void GatherHash(const T &arg, const Args &...args) { + GatherHash(arg); + GatherHash(args...); +} + +template +uint64_t CalcAclnnHash(const std::string &opName, const Args &...args) { + gHashOffset = 0; + GatherHash(opName, args...); + return CalcHashId(); +} } // namespace ops } // namespace mrt + +#endif // __OPS_ASCEND_ACLNN_UTILS_ACLNN_HASH_H__ diff --git a/inferrt/src/ops/ascend/aclnn/utils/hash_buf.cc b/inferrt/src/ops/ascend/aclnn/utils/hash_buf.cc new file mode 100644 index 0000000000000000000000000000000000000000..d806ecd534fb20372e804050141355fbdcb38e75 --- /dev/null +++ b/inferrt/src/ops/ascend/aclnn/utils/hash_buf.cc @@ -0,0 +1,195 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ops/ascend/aclnn/utils/hash_buf.h" + +namespace mrt { +namespace ops { +thread_local char gHashBuf[gHashBufSize]; +thread_local int gHashOffset = 0; +constexpr int grShift33Bits = 33; +constexpr uint64_t MIX_STEP1 = 18397679294719823053LLU; +constexpr uint64_t MIX_STEP2 = 14181476777654086739LLU; + +namespace { +inline uint64_t RotatingLeft(uint64_t x, uint8_t n) { return (x << n) | (x >> (64 - n)); } + +inline uint64_t Mixture(uint64_t x) { + // constants step1(18397679294719823053) and step2(14181476777654086739) are used to allow + // hash values to be more evenly distributed after multiplication. + x ^= x >> grShift33Bits; + x *= MIX_STEP1; + x ^= x >> grShift33Bits; + x *= MIX_STEP2; + x ^= x >> grShift33Bits; + + return x; +} + +void GenHashTmp(const uint64_t *blocks, const int blockNum, const uint32_t seed, uint64_t *rhas, uint64_t *rhax) { + CHECK_IF_NULL(blocks); + + // use 9782798678568883157 and 5545529020109919103 for blocking and obfuscation of input data + const uint64_t c1 = 9782798678568883157LLU; + const uint64_t c2 = 5545529020109919103LLU; + + uint64_t has = seed; + uint64_t hax = seed; + for (int i = 0; i < blockNum; i++) { + int even_num = 2; + uint64_t tmp1 = blocks[i * even_num]; + uint64_t tmp2 = blocks[i * even_num + 1]; + + int8_t bits31 = 31; + tmp1 *= c1; + tmp1 = RotatingLeft(tmp1, bits31); + tmp1 *= c2; + has ^= tmp1; + + int8_t bits27 = 27; + has = RotatingLeft(has, bits27); + has += hax; + // increase randomness by mul by 5 and adding a constant + has = has * 5 + 1390208809; + + int8_t bits33 = 33; + tmp2 *= c2; + tmp2 = RotatingLeft(tmp2, bits33); + tmp2 *= c1; + hax ^= tmp2; + + hax = RotatingLeft(hax, bits31); + hax += has; + // increase randomness by mul by 5 and adding a constant + hax = hax * 5 + 944331445; + } + + *rhas = has; + *rhax = hax; +} +} // namespace + +uint64_t GenHash(const void *key, const int len, const uint32_t seed) { + const uint8_t *data = (const uint8_t *)key; + // the length of each block is 16 bytes + const int blockNum = len / 16; + // has and hax are literal appromix to hash, and hax is the return value of this function. + uint64_t has = seed; + uint64_t hax = seed; + + // use 9782798678568883157 and 5545529020109919103 for blocking and obfuscation of input data + const uint64_t c1 = 9782798678568883157LLU; + const uint64_t c2 = 5545529020109919103LLU; + + const uint64_t *blocks = (const uint64_t *)(data); + + // update hax + GenHashTmp(blocks, blockNum, seed, &has, &hax); + + // the length of each block is 16 bytes + const uint8_t *tail = (const uint8_t *)(data + blockNum * 16); + uint64_t t1 = 0; + uint64_t t2 = 0; + // because the size of a block is 16, different offsets are calculated for tail blocks + // for different sizes + switch (static_cast(len) & 15) { + case 15: + t2 ^= ((uint64_t)tail[14]) << 48; + [[fallthrough]]; + {} + case 14: + t2 ^= ((uint64_t)tail[13]) << 40; + [[fallthrough]]; + {} + case 13: + t2 ^= ((uint64_t)tail[12]) << 32; + [[fallthrough]]; + {} + case 12: + t2 ^= ((uint64_t)tail[11]) << 24; + [[fallthrough]]; + {} + case 11: + t2 ^= ((uint64_t)tail[10]) << 16; + [[fallthrough]]; + {} + case 10: + t2 ^= ((uint64_t)tail[9]) << 8; + [[fallthrough]]; + {} + case 9: + t2 ^= ((uint64_t)tail[8]) << 0; + t2 *= c2; + t2 = RotatingLeft(t2, 33); + t2 *= c1; + hax ^= t2; + [[fallthrough]]; + {} + case 8: + t1 ^= ((uint64_t)tail[7]) << 56; + [[fallthrough]]; + {} + case 7: + t1 ^= ((uint64_t)tail[6]) << 48; + [[fallthrough]]; + {} + case 6: + t1 ^= ((uint64_t)tail[5]) << 40; + [[fallthrough]]; + {} + case 5: + t1 ^= ((uint64_t)tail[4]) << 32; + [[fallthrough]]; + {} + case 4: + t1 ^= ((uint64_t)tail[3]) << 24; + [[fallthrough]]; + {} + case 3: + t1 ^= ((uint64_t)tail[2]) << 16; + [[fallthrough]]; + {} + case 2: + t1 ^= ((uint64_t)tail[1]) << 8; + [[fallthrough]]; + {} + case 1: + t1 ^= ((uint64_t)tail[0]) << 0; + t1 *= c1; + t1 = RotatingLeft(t1, 31); + t1 *= c2; + has ^= t1; + [[fallthrough]]; + {} + default: { + } + } + + has ^= static_cast(len); + hax ^= static_cast(len); + + has += hax; + hax += has; + + has = Mixture(has); + hax = Mixture(hax); + + has += hax; + hax += has; + return hax; +} +} // namespace ops +} // namespace mrt diff --git a/inferrt/src/ops/ascend/aclnn/utils/hash_buf.h b/inferrt/src/ops/ascend/aclnn/utils/hash_buf.h new file mode 100644 index 0000000000000000000000000000000000000000..8b18acf942e99c9552972bc87d6e7ad5580bcef3 --- /dev/null +++ b/inferrt/src/ops/ascend/aclnn/utils/hash_buf.h @@ -0,0 +1,64 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OPS_ASCEND_ACLNN_UTILS_HASH_BUF_H__ +#define __OPS_ASCEND_ACLNN_UTILS_HASH_BUF_H__ + +#include + +#include "common/common.h" +#include "include/securec.h" + +namespace mrt { +namespace ops { +inline constexpr int gHashBufSize = 8192; +inline constexpr int gHashBufMaxSize = gHashBufSize + 1024; +extern thread_local char gHashBuf[gHashBufSize]; +extern thread_local int gHashOffset; + +inline void MemcpyToBuf(const void *data, size_t size) { + if (size == 0) { + return; + } + if (MS_UNLIKELY(static_cast(gHashOffset) > SIZE_MAX - size)) { + LOG_ERROR << "Hash buf is overflow."; + return; + } + if (gHashOffset + size >= gHashBufSize) { + gHashOffset = gHashBufMaxSize; + return; + } + auto ret = memcpy_sp(gHashBuf + gHashOffset, gHashBufSize - gHashOffset, data, size); + if (ret != EOK) { + LOG_EXCEPTION << "Failed to memcpy!"; + } + gHashOffset += size; +} + +uint64_t GenHash(const void *key, const int len, const uint32_t seed = 0xdeadb0d7); + +inline uint64_t CalcHashId() { + if (gHashOffset == gHashBufMaxSize) { + return 0; + } + uint64_t hashId = GenHash(gHashBuf, gHashOffset); + return hashId; +} + +} // namespace ops +} // namespace mrt + +#endif // __OPS_ASCEND_ACLNN_UTILS_HASH_BUF_H__ diff --git a/inferrt/src/ops/ascend/aclnn/utils/opapi_lib_loader.cc b/inferrt/src/ops/ascend/aclnn/utils/opapi_lib_loader.cc new file mode 100644 index 0000000000000000000000000000000000000000..debe3ae28c759849eddd9628394f2ed4edd1aff2 --- /dev/null +++ b/inferrt/src/ops/ascend/aclnn/utils/opapi_lib_loader.cc @@ -0,0 +1,134 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include "ops/ascend/aclnn/utils/opapi_lib_loader.h" +#include "hardware/ascend/res_manager/symbol_interface/symbol_utils.h" + +namespace mrt { +namespace ops { +namespace { +void LoadCommonMetaFuncApi() { + LOAD_COMMON_META_FUNC(aclCreateTensor); + LOAD_COMMON_META_FUNC(aclCreateScalar); + LOAD_COMMON_META_FUNC(aclCreateIntArray); + LOAD_COMMON_META_FUNC(aclCreateFloatArray); + LOAD_COMMON_META_FUNC(aclCreateBoolArray); + LOAD_COMMON_META_FUNC(aclCreateTensorList); + + LOAD_COMMON_META_FUNC(aclDestroyTensor); + LOAD_COMMON_META_FUNC(aclDestroyScalar); + LOAD_COMMON_META_FUNC(aclDestroyIntArray); + LOAD_COMMON_META_FUNC(aclDestroyFloatArray); + LOAD_COMMON_META_FUNC(aclDestroyBoolArray); + LOAD_COMMON_META_FUNC(aclDestroyTensorList); + LOAD_COMMON_META_FUNC(aclDestroyAclOpExecutor); + + LOAD_COMMON_META_FUNC(aclnnInit); + LOAD_COMMON_META_FUNC(aclnnFinalize); + + LOAD_COMMON_META_FUNC(aclSetAclOpExecutorRepeatable); + + LOAD_COMMON_META_FUNC(aclSetTensorAddr); + LOAD_COMMON_META_FUNC(aclSetDynamicTensorAddr); +} +} // namespace + +static bool isLoaded = false; +static bool isAclnnInit = false; +static std::mutex initMutex; +static std::shared_mutex rwOpApiMutex; +// handler -> libPath +std::unordered_map libHandlers; + +void LoadOpApiLib() { + if (isLoaded) { + return; + } + const std::string ascendPath = device::ascend::GetAscendPath(); + const std::vector dependLibs = {"libdummy_tls.so", "libnnopbase.so"}; + std::unique_lock writeLock(rwOpApiMutex); + for (const auto &depLib : dependLibs) { + (void)GetOpApiLibHandler(ascendPath + "lib64/" + depLib); + } + auto opApiLibPath = ascendPath + kNameOpApiLib; + auto handler = GetOpApiLibHandler(opApiLibPath); + if (handler != nullptr) { + LOG_OUT << "Load lib " << opApiLibPath << " success"; + (void)libHandlers.emplace(handler, opApiLibPath); + } + LoadCommonMetaFuncApi(); + isLoaded = true; + LOG_OUT << "Load opapi lib success"; +} + +void *GetAclnnOpApiFunc(const char *apiName) { + // apiName -> api + static thread_local std::unordered_map opapiCache; + auto iter = opapiCache.find(std::string(apiName)); + if (iter != opapiCache.end()) { + LOG_OUT << "OpApi " << apiName << " hit cache"; + return iter->second; + } + std::shared_lock readLock(rwOpApiMutex); + if (libHandlers.size() == 0) { + readLock.unlock(); + LoadOpApiLib(); + } + for (auto &libHandler : libHandlers) { + auto apiFunc = GetOpApiFuncFromLib(libHandler.first, libHandler.second.c_str(), apiName); + if (apiFunc != nullptr) { + (void)opapiCache.emplace(std::string(apiName), apiFunc); + LOG_OUT << "Get OpApiFunc [" << apiName << "] from " << libHandler.second; + return apiFunc; + } + } + LOG_OUT << "Dlsym " << apiName << " failed"; + (void)opapiCache.emplace(std::string(apiName), nullptr); + return nullptr; +} + +void AclnnInit() { + std::lock_guard lock(initMutex); + if (isAclnnInit) { + return; + } + static const auto aclnnInit = GET_ACLNN_COMMON_META_FUNC(aclnnInit); + CHECK_IF_NULL(aclnnInit); + auto ret = aclnnInit(nullptr); + CHECK_IF_FAIL(ret == 0); + isAclnnInit = true; + LOG_OUT << "Aclnn init success"; +} + +void AclnnFinalize() { + if (!isAclnnInit) { + return; + } + static const auto aclnnFinalize = GET_ACLNN_COMMON_META_FUNC(aclnnFinalize); + CHECK_IF_NULL(aclnnFinalize); + auto ret = aclnnFinalize(); + CHECK_IF_FAIL(ret == 0); + isAclnnInit = false; + LOG_OUT << "Aclnn finalize success"; +} +} // namespace ops +} // namespace mrt diff --git a/inferrt/src/ops/ascend/aclnn/utils/opapi_lib_loader.h b/inferrt/src/ops/ascend/aclnn/utils/opapi_lib_loader.h new file mode 100644 index 0000000000000000000000000000000000000000..9a2a084d11f97bcc25216bf1486072b9c4f8896d --- /dev/null +++ b/inferrt/src/ops/ascend/aclnn/utils/opapi_lib_loader.h @@ -0,0 +1,98 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OPS_ASCEND_ACLNN_UTILS_OPAPI_LIB_LOADER_H__ +#define __OPS_ASCEND_ACLNN_UTILS_OPAPI_LIB_LOADER_H__ + +#include +#include +#include +#include +#include +#include +#include + +#include "common/common.h" +#include "ops/ascend/aclnn/utils/aclnn_common_meta.h" + +namespace mrt { +namespace ops { +inline constexpr const char *kNameOpApiLib = "lib64/libopapi.so"; +extern std::unordered_map libHandlers; + +void LoadOpApiLib(); +void AclnnInit(); +void AclnnFinalize(); +void *GetAclnnOpApiFunc(const char *apiName); + +inline void *GetOpApiLibHandler(const std::string &libPath) { + auto handler = dlopen(libPath.c_str(), RTLD_LAZY); + if (handler == nullptr) { + LOG_OUT << "Dlopen " << libPath << " failed!" << dlerror(); + } + return handler; +} + +inline void *GetOpApiFuncFromLib(void *handler, const char *libName, const char *apiName) { + CHECK_IF_NULL(handler); + auto func = dlsym(handler, apiName); + if (func == nullptr) { + LOG_OUT << "Dlsym " << apiName << " from " << libName << " failed!" << dlerror(); + } + return func; +} + +template +T LoadCommonMetaApi(const char *apiName) { + for (auto &libHandler : libHandlers) { + T apiFunc = reinterpret_cast(GetOpApiFuncFromLib(libHandler.first, libHandler.second.c_str(), apiName)); + if (apiFunc == nullptr) { + LOG_OUT << "Get CommonMetaApi [" << apiName << "] failed, libPath: " << libHandler.second; + } + return apiFunc; + } + return nullptr; +} + +#define LOAD_COMMON_META_FUNC(name) name##_ = LoadCommonMetaApi<_##name##FuncPtr>(kName##name##_) + +template +auto ConvertToOpApiFunc(const Tuple ¶ms, void *opApiAddr, std::index_sequence) { + using OpApiFunc = int (*)(typename std::decay(params))>::type...); + auto func = reinterpret_cast(opApiAddr); + return func; +} + +template +auto ConvertToOpApiFunc(const Tuple ¶ms, void *opApiAddr) { + static constexpr auto size = std::tuple_size::value; + return ConvertToOpApiFunc(params, opApiAddr, std::make_index_sequence{}); +} + +template +auto CallOpApiFunc(Function f, Tuple t, std::index_sequence) { + return f(std::get(t)...); +} + +template +auto CallOpApiFunc(Function f, Tuple t) { + static constexpr auto size = std::tuple_size::value; + return CallOpApiFunc(f, t, std::make_index_sequence{}); +} + +} // namespace ops +} // namespace mrt +#endif // __OPS_ASCEND_ACLNN_UTILS_OPAPI_LIB_LOADER_H__ diff --git a/inferrt/src/ops/op_base/op_mul.cc b/inferrt/src/ops/op_base/op_mul.cc new file mode 100644 index 0000000000000000000000000000000000000000..00f6a6dcf8e8e575e73e52b7db15e463d1f954c2 --- /dev/null +++ b/inferrt/src/ops/op_base/op_mul.cc @@ -0,0 +1,33 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "ops/op_base/op_mul.h" +#include "ops/op_base/utils.h" + +namespace mrt { +namespace ops { +OpsErrorCode OpMul::InferShape(const std::vector &input, ir::Value *output) { + CHECK_IF_FAIL(input.size() == kInputSize2); + const auto &input0 = input[kIndex0]->ToTensor(); + const auto &input1 = input[kIndex1]->ToTensor(); + CalBroadCastShape(input0->Shape(), input1->Shape(), &(output->ToTensor()->Shape())); + output->ToTensor()->Resize(); + return SUCCESS; +} +} // namespace ops +} // namespace mrt diff --git a/inferrt/src/ops/op_base/op_mul.h b/inferrt/src/ops/op_base/op_mul.h new file mode 100644 index 0000000000000000000000000000000000000000..00ef0e8846f2a1214504d3d4c868350616e0719e --- /dev/null +++ b/inferrt/src/ops/op_base/op_mul.h @@ -0,0 +1,35 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OPS_OP_BASE_OP_MUL_H__ +#define __OPS_OP_BASE_OP_MUL_H__ + +#include + +#include "ops/operator.h" + +namespace mrt { +namespace ops { +class OpMul : public Operator { + public: + OpMul() = default; + ~OpMul() override = default; + + OpsErrorCode InferShape(const std::vector &input, ir::Value *output) override; +}; +} // namespace ops +} // namespace mrt +#endif // __OPS_OP_BASE_OP_MUL_H__ diff --git a/inferrt/src/ops/op_base/utils.cc b/inferrt/src/ops/op_base/utils.cc new file mode 100644 index 0000000000000000000000000000000000000000..cf848905ed5aca296bc77e4942f8725b4459aaa2 --- /dev/null +++ b/inferrt/src/ops/op_base/utils.cc @@ -0,0 +1,50 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ops/op_base/utils.h" + +namespace mrt { +namespace ops { +void CalBroadCastShape(const std::vector &xShape, const std::vector &yShape, + std::vector *broadcastShape) { + if (xShape == yShape) { + *broadcastShape = xShape; + return; + } + + auto xLength = xShape.size(); + auto yLength = yShape.size(); + auto res = xLength > yLength; + size_t maxLen = res ? xLength : yLength; + size_t minLen = res ? yLength : xLength; + const std::vector &maxShape = res ? xShape : yShape; + const std::vector &minShape = res ? yShape : xShape; + + *broadcastShape = maxShape; + auto lengthDiff = maxLen - minLen; + for (size_t i = 0; i < minLen; ++i) { + auto dsti = lengthDiff + i; + if (maxShape[dsti] == 1) { + (*broadcastShape)[dsti] = minShape[i]; + } else if (maxShape[dsti] != minShape[i] && minShape[i] != 1) { + LOG_EXCEPTION << "xShape[" << xLength + i << "] or yShape[" << yLength + i << "] must be when they are not equal" + << ", but got xShape=" << ir::ShapeToString(xShape) << ", yShape=" << ir::ShapeToString(yShape); + } + } +} + +} // namespace ops +} // namespace mrt diff --git a/inferrt/src/ops/op_base/utils.h b/inferrt/src/ops/op_base/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..05d55563300a28c6e65225fc7eb9a294ccbec701 --- /dev/null +++ b/inferrt/src/ops/op_base/utils.h @@ -0,0 +1,31 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OPS_OP_BASE_OP_UTILS_H__ +#define __OPS_OP_BASE_OP_UTILS_H__ + +#include + +#include "common/common.h" +#include "ir/tensor/tensor.h" + +namespace mrt { +namespace ops { +void CalBroadCastShape(const std::vector &xShape, const std::vector &yShape, std::vector *broadcastShape); +} // namespace ops +} // namespace mrt + +#endif // __OPS_OP_BASE_OP_UTILS_H__ diff --git a/inferrt/src/ops/op_register.cc b/inferrt/src/ops/op_register.cc index 7ec20906b5cd296984ba2be8980e90c934725960..8c95c4cb75fa35ef56e1fe33e68b862060b5e03a 100644 --- a/inferrt/src/ops/op_register.cc +++ b/inferrt/src/ops/op_register.cc @@ -14,9 +14,48 @@ * limitations under the License. */ +#include +#include +#include +#include +#include +#include + #include "ops/op_register.h" +#include "common/logger.h" +#include "common/dynamic_lib_loader.h" namespace mrt { namespace ops { +bool LoadOpLib(const std::string &opLibPrefix, std::stringstream *errMsg) { + static std::unique_ptr dynamicLibLoader = std::make_unique(); + CHECK_IF_NULL(dynamicLibLoader); + CHECK_IF_NULL(errMsg); + DIR *dir = opendir(dynamicLibLoader->GetDynamicLibFilePath().c_str()); + if (dir == nullptr) { + *errMsg << "Open Op Lib dir failed, file path:" << dynamicLibLoader->GetDynamicLibFilePath() << std::endl; + return false; + } + struct dirent *entry; + std::set opLibs; + while ((entry = readdir(dir)) != nullptr) { + std::string opLibName = entry->d_name; + if (opLibName.find(opLibPrefix) == std::string::npos) { + continue; + } + if (opLibName.find_first_of(".") == std::string::npos) { + continue; + } + opLibs.insert(opLibName); + } + for (const auto &opLibName : opLibs) { + if (!dynamicLibLoader->LoadDynamicLib(opLibName, errMsg)) { + return false; + } + } + (void)closedir(dir); + return true; +} + OpFactoryBase *OpFactoryBase::GetOpFactory(const std::string_view &name) { auto iter = OpFactoryMap().find(name); if (iter == OpFactoryMap().end()) { @@ -40,5 +79,6 @@ OpFactoryBase::OpFactoryMapType &OpFactoryBase::OpFactoryMap() { static OpFactoryBase::OpFactoryMapType factoryMap; return factoryMap; } + } // namespace ops } // namespace mrt diff --git a/inferrt/src/ops/op_register.h b/inferrt/src/ops/op_register.h index c078d75dac997cb87dce300e03feade0b630a89b..6909496b219b6bb86d91996dd11d58c48320daea 100644 --- a/inferrt/src/ops/op_register.h +++ b/inferrt/src/ops/op_register.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #include "common/logger.h" @@ -58,6 +59,9 @@ struct OpFactoryTraits { static constexpr std::string_view value = kCPUOpFactory; }; +// Function for loading op libs. +DA_API bool LoadOpLib(const std::string &opLibPrefix, std::stringstream *errMsg); + class DA_API OpFactoryBase { using OpFactoryMapType = std::unordered_map>; @@ -90,7 +94,31 @@ class OpFactory : public OpFactoryBase { factoryBase = OpFactoryBase::CreateOpFactory(OpFactoryTraits::value, std::make_unique>()); } - return *static_cast *>(factoryBase); + auto *opFactory = static_cast *>(factoryBase); + CHECK_IF_NULL(opFactory); + opFactory->LoadOpPlugin(); + return *opFactory; + } + + void LoadOpPlugin() { + if (isPluginLoaded_) { + return; + } + isPluginLoaded_ = true; + std::stringstream errMsg; + if constexpr (std::is_same_v) { + if (!LoadOpLib("libops_ascend", &errMsg)) { + LOG_EXCEPTION << "Load Ascend Op Lib failed, error message: " << errMsg.str(); + } + } else if constexpr (std::is_same_v) { + if (!LoadOpLib("libkernel_aten", &errMsg)) { + LOG_EXCEPTION << "Load CPU Op Lib failed, error message: " << errMsg.str(); + } + } else if constexpr (std::is_same_v) { + LOG_OUT << "Unknown Op Factory, skip load Op Lib, error message: " << errMsg.str(); + } else { + LOG_EXCEPTION << "Got invalid OpFactoryType, only supports AscendOpFactory, CPUOpFactory and UnknownOpFactory."; + } } void Register(const std::string &opName, CreatorFunc &&creator) { @@ -120,15 +148,20 @@ class OpFactory : public OpFactoryBase { private: std::unordered_map opCreatorsMap_; + bool isPluginLoaded_ = false; }; template class OpRegistrar { public: - explicit OpRegistrar(const std::string &opName, std::function()> creator) { - OpFactory::GetInstance().Register(opName, std::move(creator)); + explicit OpRegistrar(const std::string &&opName, std::function()> &&creator) + : opName_(std::move(opName)) { + OpFactory::GetInstance().Register(opName_, std::move(creator)); } - ~OpRegistrar() = default; + ~OpRegistrar() { OpFactory::GetInstance().UnRegister(opName_); } + + private: + std::string opName_; }; #define MRT_REG_OP(OP_NAME, OP_CLASS, DEVICE_NAME) \ diff --git a/inferrt/src/ops/operator.h b/inferrt/src/ops/operator.h index 9c98d0db0cf62ce5a43e89401fe86d2208f8ffe7..5df6a416e0c486640b37d77424d2306b09fb889a 100644 --- a/inferrt/src/ops/operator.h +++ b/inferrt/src/ops/operator.h @@ -79,7 +79,7 @@ class Operator { * @return OpsErrorCode Error code indicating success or failure of workspace calculation. */ virtual OpsErrorCode CalcWorkspace(const std::vector &input, const ir::Value *output, - size_t *workspace_size) { + size_t *workspaceSize) { return SUCCESS; } diff --git a/inferrt/src/pybind/mrt/pybind11_ir.cc b/inferrt/src/pybind/mrt/pybind11_ir.cc index 34492c565c88a6806f264c5d005112d235c63c71..543d37b0abee108b91fafcd516b7b475b38d2da9 100644 --- a/inferrt/src/pybind/mrt/pybind11_ir.cc +++ b/inferrt/src/pybind/mrt/pybind11_ir.cc @@ -74,7 +74,7 @@ PYBIND11_MODULE(_mrt_ir, m) { .def("is_bool", &ir::Value::IsBool) .def("is_string", &ir::Value::IsString) .def("is_none", &ir::Value::IsNone) - .def("to_tensor", &ir::Value::ToTensor) + .def("to_tensor", static_cast(&ir::Value::ToTensor)) .def("to_tuple", &ir::Value::ToTuple) .def("to_double", &ir::Value::ToDouble) .def("to_int", &ir::Value::ToInt) diff --git a/tests/st/check/check_aclnn_op.py b/tests/st/check/check_aclnn_op.py new file mode 100644 index 0000000000000000000000000000000000000000..35d2a8cb23a0f9f531a8f64600e8aa30ccc37478 --- /dev/null +++ b/tests/st/check/check_aclnn_op.py @@ -0,0 +1,17 @@ +import torch +from mrt.torch import backend + +def foo(x, y): + return torch.mul(x, y) + +opt_foo = torch.compile(foo, backend=backend) + +x = torch.randn(2, 2) +y = torch.randn(2, 2) +x_npu = x.npu() +y_npu = y.npu() +bar = foo(x_npu, y_npu) +opt_bar = opt_foo(x_npu, y_npu) + +assert torch.equal(opt_bar, bar), f"\nopt_bar={opt_bar}\nbar={bar}" +print("The result is correct. Launch aclnn [mul] successfully.") diff --git a/tests/st/check/run_test.sh b/tests/st/check/run_test.sh index 2f708458aef074a3df30e331ff689097f15a8565..d902e568b948eae2de1c987c7323a3ebec557974 100644 --- a/tests/st/check/run_test.sh +++ b/tests/st/check/run_test.sh @@ -1,8 +1,4 @@ CURRENT_PATH=$(dirname $(dirname $(dirname $(dirname $(realpath "$0"))))) -INFERRT_PATH=$CURRENT_PATH -BUILD_DIR=$CURRENT_PATH/build -export DART_KERNEL_LIB_PATH=$BUILD_DIR/inferrt/src/ops/cpu/aten/libkernel_aten.so -export DART_KERNEL_LIB_NAME=Aten python $CURRENT_PATH/tests/st/check/check_backend.py export MRT_ENABLE_PIPELINE="on" diff --git a/third_party/patch/securec/securec.patch001 b/third_party/patch/securec/securec.patch001 new file mode 100644 index 0000000000000000000000000000000000000000..bcda1eac538f795275192d83d34e13b1fa80e624 --- /dev/null +++ b/third_party/patch/securec/securec.patch001 @@ -0,0 +1,108 @@ +diff -uprN securec_original/CMakeLists.txt securec_modified/CMakeLists.txt +--- securec_original/CMakeLists.txt 1970-01-01 08:00:00.000000000 +0800 ++++ securec_modified/CMakeLists.txt 2022-12-17 19:32:50.338347300 +0800 +@@ -0,0 +1,43 @@ ++cmake_minimum_required(VERSION 3.14.0) ++project(securec) ++include(CMakePackageConfigHelpers) ++include(GNUInstallDirs) ++ ++set(CMAKE_VERBOSE_MAKEFILE on) ++if(NOT MSVC) ++ if(CMAKE_SYSTEM_NAME MATCHES "Windows") ++ add_compile_definitions(SECUREC_ONLY_DECLARE_MEMSET) ++ SET(CMAKE_C_FLAGS "$ENV{CFLAGS} -fno-inline-functions -fno-omit-frame-pointer \ ++ -fstack-protector-all -fPIC -D_FORTIFY_SOURCE=2 -O2") ++ elseif(NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") # mac and lite packages using clang ++ SET(CMAKE_C_FLAGS "$ENV{CFLAGS} -fno-inline-functions -fno-omit-frame-pointer \ ++ -fstack-protector-all -fPIC -D_FORTIFY_SOURCE=2 -O2") ++ else() ++ SET(CMAKE_C_FLAGS "$ENV{CFLAGS} -fPIC -Wl,-z,relro,-z,now,-z,noexecstack \ ++ -Wno-nullability-completeness -fno-inline-functions -fno-omit-frame-pointer \ ++ -fstack-protector-all -fPIC -D_FORTIFY_SOURCE=2 -O2 -D_LIBCPP_INLINE_VISIBILITY='' \ ++ -D'_LIBCPP_EXTERN_TEMPLATE(...)='") ++ endif() ++ set(CMAKE_EXPORT_COMPILE_COMMANDS ON) ++ #add flags ++ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -I/usr/local/include -Werror") ++endif() ++ ++include_directories(./include) ++add_subdirectory(src) ++ ++set(csec_INSTALL_LIB_DIR "lib") ++set(csec_INSTALL_INCLUDE_DIR "include") ++install( ++ TARGETS securec ++ DESTINATION ${csec_INSTALL_LIB_DIR} ++ COMPONENT securec ++) ++file(GLOB_RECURSE csec_HEADERS ++ ${CMAKE_SOURCE_DIR}/include/* ++) ++install( ++ FILES ${csec_HEADERS} ++ DESTINATION ${csec_INSTALL_INCLUDE_DIR} ++ COMPONENT securec ++) +diff -uprN securec_original/Makefile securec_modified/Makefile +--- securec_original/Makefile 2021-05-08 09:50:50.000000000 +0800 ++++ securec_modified/Makefile 1970-01-01 08:00:00.000000000 +0800 +@@ -1,44 +0,0 @@ +-PROJECT=libboundscheck.so +- +-CC?=gcc +- +-OPTION = -fPIC +-OPTION += -fstack-protector-all +-OPTION += -D_FORTIFY_SOURCE=2 -O2 +-OPTION += -Wformat=2 -Wfloat-equal -Wshadow +-OPTION += -Wconversion +-OPTION += -Wformat-security +-OPTION += -Wextra +-OPTION += --param ssp-buffer-size=4 +-OPTION += -Warray-bounds +-OPTION += -Wpointer-arith +-OPTION += -Wcast-qual +-OPTION += -Wstrict-prototypes +-OPTION += -Wmissing-prototypes +-OPTION += -Wstrict-overflow=1 +-OPTION += -Wstrict-aliasing=2 +-OPTION += -Wswitch -Wswitch-default +- +-CFLAG = -Wall -DNDEBUG -O2 $(OPTION) +- +-SOURCES=$(wildcard src/*.c) +- +-OBJECTS=$(patsubst %.c,%.o,$(SOURCES)) +- +-.PHONY:clean +- +-CFLAG += -Iinclude +-LD_FLAG = -fPIC -s -Wl,-z,relro,-z,now,-z,noexecstack -fstack-protector-all +- +-$(PROJECT): $(OBJECTS) +- mkdir -p lib +- $(CC) -shared -o lib/$@ $(patsubst %.o,obj/%.o,$(notdir $(OBJECTS))) $(LD_FLAG) +- @echo "finish $(PROJECT)" +- +-.c.o: +- @mkdir -p obj +- $(CC) -c $< $(CFLAG) -o obj/$(patsubst %.c,%.o,$(notdir $<)) +- +-clean: +- -rm -rf obj lib +- @echo "clean up" +diff -uprN securec_original/src/CMakeLists.txt securec_modified/src/CMakeLists.txt +--- securec_original/src/CMakeLists.txt 1970-01-01 08:00:00.000000000 +0800 ++++ securec_modified/src/CMakeLists.txt 2022-12-17 19:15:47.358347300 +0800 +@@ -0,0 +1,9 @@ ++if(CMAKE_SYSTEM_NAME MATCHES "Windows") ++ list(APPEND SECUREC_SRCS "memset_s.c") ++else() ++ aux_source_directory(. SECUREC_SRCS) ++endif() ++add_library(securec STATIC ${SECUREC_SRCS}) ++if(CMAKE_SYSTEM_NAME MATCHES "Windows") ++ target_compile_definitions(securec PRIVATE -DSECUREC_DLL_IMPORT) ++endif()