12 Star 79 Fork 28

MegEngine / MegEngine

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
CMakeLists.txt 59.42 KB
一键复制 编辑 原始数据 按行查看 历史
MegEngine Team 提交于 2024-01-08 11:51 . fix(ci): fix relocation overflow
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596
cmake_minimum_required(VERSION 3.15.2)
message(STATUS "CMAKE_GENERATOR: ${CMAKE_GENERATOR}")
if(NOT ${CMAKE_GENERATOR} STREQUAL "Ninja")
message(WARNING "CMAKE_GENERATOR NOT EQUAL Ninja, which we do not recommend")
endif()
include(cmake/FetchMegBrainVersion.cmake)
project(
MegEngine
LANGUAGES C CXX
VERSION ${MGB_VER_STRING})
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)
set(CMAKE_POLICY_DEFAULT_CMP0048 NEW)
if(NOT MSVC
AND NOT APPLE
AND NOT WIN32)
set(CMAKE_CXX_ARCHIVE_CREATE "<CMAKE_AR> Dqc <TARGET> <LINK_FLAGS> <OBJECTS>")
set(CMAKE_CXX_ARCHIVE_APPEND "<CMAKE_AR> Dq <TARGET> <LINK_FLAGS> <OBJECTS>")
set(CMAKE_CXX_ARCHIVE_FINISH "<CMAKE_RANLIB> -D <TARGET>")
endif()
include(GNUInstallDirs)
include(CheckCXXCompilerFlag)
include(CheckIPOSupported)
include(CMakeDependentOption)
check_cxx_compiler_flag(-Wclass-memaccess CXX_SUPPORT_WCLASS_MEMACCESS)
set(MGE_ARCH
AUTO
CACHE STRING "Architecture on which MegEngine to be built.")
set_property(
CACHE MGE_ARCH
PROPERTY STRINGS
AUTO
x86_64
i386
armv7
aarch64
naive
fallback)
set(MGE_EXPORT_TARGETS MegEngine-targets)
if(NOT "$ENV{LD_LIBRARY_PATH}" STREQUAL "")
string(REPLACE ":" ";" ALTER_LD_LIBRARY_PATHS $ENV{LD_LIBRARY_PATH})
else()
set(ALTER_LD_LIBRARY_PATHS "")
endif()
if(NOT "$ENV{LIBRARY_PATH}" STREQUAL "")
string(REPLACE ":" ";" ALTER_LIBRARY_PATHS $ENV{LIBRARY_PATH})
else()
set(ALTER_LIBRARY_PATHS "")
endif()
option(MGE_WITH_JIT "Build MegEngine with JIT." ON)
option(MGE_WITH_JIT_MLIR "Build MegEngine with MLIR JIT." OFF)
option(MGE_WITH_HALIDE "Build MegEngine with Halide JIT" OFF)
option(MGE_WITH_MIDOUT_PROFILE "Build MegEngine with Midout profile." OFF)
option(
MGE_WITH_MINIMUM_SIZE
"Swith off MGE_ENABLE_RTTI、MGE_ENABLE_EXCEPTIONS、MGE_ENABLE_LOGGING and switch on MGE_INFERENCE_ONLY so that compile minimum load_and_run."
OFF)
option(MGE_ARMV8_2_FEATURE_FP16 "Enable armv8.2-a+fp16 support" OFF)
option(MGE_DISABLE_FLOAT16 "Disable MegEngine float16 support." OFF)
option(MGE_WITH_CUDA "Enable MegEngine CUDA support." ON)
option(MGE_CUDA_USE_STATIC "Enable MegEngine CUDA static linking." ON)
option(MGE_WITH_LITE "Build MGE with lite" ON)
option(MGE_WITH_TRT "Build MegEngine with TensorRT." ON)
option(MGE_WITH_CUDA_STUB "Build MegEngine with CUDA stub." ON)
option(MGE_WITH_NVRTC_STUB "Build MegEngine with NVRTC stub." OFF)
option(MGE_WITH_CUDNN_SHARED "Build MegEngine with CUDNN shared." ON)
option(MGE_WITH_CUBLAS_SHARED "Build MegEngine with CUBLAS shared." OFF)
option(MGE_USE_SYSTEM_LIB "Build MegEngine with system libraries." OFF)
option(MGB_WITH_FLATBUFFERS "Build MegBrain with FlatBuffers serialization support." ON)
option(MGE_WITH_CAMBRICON "Build MegEngine with Cambricon support" OFF)
option(BUILD_SHARED_LIBS "Build shared libraries" ON)
option(MGE_WITH_ATLAS "Build MegEngine with Atlas support" OFF)
option(MGE_ENABLE_RTTI "Build with RTTI" ON)
option(MGE_ENABLE_LOGGING "Build with logging" ON)
option(MGE_DEBUG_UTIL "Enable debug utility" ON)
option(MGE_ENABLE_EXCEPTIONS "Build with exceptions" ON)
option(MGE_WITH_TEST "Enable test for MegEngine." OFF)
option(MGE_WITH_BENCHMARK "Enable DNN BENCHMARK" OFF)
option(MGE_WITH_DISTRIBUTED "Build with distributed support" ON)
option(MGE_BUILD_IMPERATIVE_RT "Build _imperative_rt Python Module " ON)
option(MGE_INFERENCE_ONLY "Build inference only library." OFF)
option(MGE_WITH_MKLDNN "Enable Intel MKL_DNN support," ON)
option(MGE_WITH_ROCM "Enable ROCM support" OFF)
option(MGE_WITH_LARGE_ARCHIVE "Enable big archive link support" OFF)
option(MGE_BUILD_WITH_ASAN "Enable build with ASAN, need compiler support" OFF)
option(MGE_WITH_CUSTOM_OP "Build with Custom op" OFF)
option(MGE_SYNC_THIRD_PARTY "help sync third_party submodule" OFF)
option(MGE_PROFILE_COMPILE_TIME "help profile compile time per file" OFF)
if(MGE_PROFILE_COMPILE_TIME)
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "cmake -E time")
endif()
# TODO: add windows support
cmake_dependent_option(MGE_WITH_CUPTI "Build with CUPTI" OFF
"MGE_WITH_CUDA;MGE_BUILD_IMPERATIVE_RT;NOT MSVC;NOT WIN32" OFF)
set(MGB_CUPTI ${MGE_WITH_CUPTI})
if(MSVC OR WIN32)
# FIXME: static link Windows vc runtime with some version from Visual Studio have some
# runtime issue at some call PATH, for example: _imperative_rt.pyd -->
# megengine_shared.dll for example c api flush can not find the fd args, I have no
# idea about this issue as a Workround, dynamic link vc runtime, but at some case, we
# will static link vcrt when
# MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP/MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2, so please
# use lite_static_all_in_one(lite/CMakeLists.txt) in Windows XP env as possible How to
# install VC runtime if you env do not install, refer to:
# https://docs.microsoft.com/en-us/cpp/windows/latest-supported-vc-redist?view=msvc-160
option(MGE_STATIC_LINK_WITH_VC_RUNTIME
"Enable mge static link with Windows vc runtime" OFF)
option(MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP "Enable deploy inference on Windows xp" OFF)
# special MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2 for Windows XP sp2(32bit) internal
# behavior: 1: will force define MGB_HAVE_THREAD=0, which means only support single
# thread 2: some Feature will be disable, eg: MGB_ENABLE_JSON and var sanity check, do
# not too many care this!!, if you want to use this Feature to 'DEBUG', you can run
# same model at NON-XP-SP2 env, eg Win7 or XP-SP3(build without
# MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2) 3: we only support MegEngine(load_and_run)
# and MegEngineLite API work on XP SP2 some debug utils, eg, megbrain_test/megdnn_test
# not support run, most caused by gtest src code sdk caller: 1: as we remove mutex,
# when you use MSVC self API eg CreateThread to start several MegEngine instances in
# the same progress, please call MegEngine API(init/run) as serial as possible, also
# please do not use std::thread std::mutex/std::this_thread_id at SDK caller side!!!
# check dll/exe can deploy on Windows XP sp2 or not: please checkout
# scripts/misc/check_windows_xp_sp2_deploy.py
option(MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2
"Enable deploy inference on Windows xp sp2" OFF)
# PE file linked by LLVM lld can not run at Windows XP env, so we force use link.exe
# which always locate in Microsoft Visual Studio/*/*/VC/Tools/MSVC/*/bin/*/*/link.exe
set(CMAKE_LINKER "link.exe")
if(MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP OR MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2)
set(MGE_STATIC_LINK_WITH_VC_RUNTIME ON)
message(
STATUS "Force set MGE_STATIC_LINK_WITH_VC_RUNTIME ON when build for Windows XP")
if(NOT ${MGE_ARCH} STREQUAL "i386")
message(FATAL_ERROR "only support 32bit when build for Windows xp")
endif()
if(NOT MGE_INFERENCE_ONLY)
message(FATAL_ERROR "only support inference when build for Windows xp")
endif()
if(MGE_WITH_CUDA)
message(FATAL_ERROR "do not support CUDA when build for Windows xp")
endif()
# Windows XP sp3 have thread issue, Workround for it
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D_WIN32_WINNT=0x0501 /Zc:threadSafeInit-")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /D_WIN32_WINNT=0x0501 /Zc:threadSafeInit-")
# for Windows XP type
add_link_options("/SUBSYSTEM:CONSOLE,5.01")
# some old lib(for example mkl for xp) use legacy stdio, so we force link
# legacy_stdio_definitions
add_link_options("/DEFAULTLIB:legacy_stdio_definitions.lib")
if(MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__DEPLOY_ON_XP_SP2__=1")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D__DEPLOY_ON_XP_SP2__=1")
endif()
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D_WIN32_WINNT=0x0601")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /D_WIN32_WINNT=0x0601")
endif()
endif()
if(MSVC OR WIN32)
message(STATUS "windows force cudnn static link")
set(MGE_WITH_CUDNN_SHARED OFF)
endif()
if(MGE_WITH_NVRTC_STUB OR MGE_WITH_CUDA_STUB)
set(MGE_WITH_ANY_CUDA_STUB ON)
else()
set(MGE_WITH_ANY_CUDA_STUB OFF)
endif()
if(MGE_WITH_MIDOUT_PROFILE)
message(
STATUS
"build with MIDOUT PROFILE and force set MGE_WITH_MINIMUM_SIZE off and force rtti ON"
)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMIDOUT_PROFILING")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMIDOUT_PROFILING")
set(MGE_WITH_MINIMUM_SIZE OFF)
set(MGE_ENABLE_RTTI ON)
if(WIN32)
message(FATAL_ERROR "do not support midout at WIN32")
endif()
endif()
set(BIN_REDUCE ${PROJECT_SOURCE_DIR}/src/bin_reduce_cmake.h)
if(MGE_WITH_MINIMUM_SIZE)
message(STATUS "build with MGE_WITH_MINIMUM_SIZE bin_reduce header is: ${BIN_REDUCE}")
set(MGE_ENABLE_RTTI OFF)
set(MGE_ENABLE_LOGGING OFF)
set(MGE_ENABLE_EXCEPTIONS OFF)
set(MGE_INFERENCE_ONLY ON)
# MGE_WITH_MINIMUM_SIZE will triger unused-parameter
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-parameter")
endif()
if(NOT MGE_WITH_MIDOUT_PROFILE AND NOT WIN32)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -include ${BIN_REDUCE}")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -include ${BIN_REDUCE}")
endif()
check_cxx_compiler_flag(-fuse-ld=gold CXX_SUPPORT_GOLD)
if(NOT APPLE)
# check CXX_FUNCTION_DATA_GC_SECTIONS_SUPPORT on APPLE will leak cmake crash
check_cxx_compiler_flag("-ffunction-sections -fdata-sections -Wl,--gc-sections"
CXX_FUNCTION_DATA_GC_SECTIONS_SUPPORT)
if(CXX_FUNCTION_DATA_GC_SECTIONS_SUPPORT)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffunction-sections -fdata-sections")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffunction-sections -fdata-sections")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gc-sections")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--gc-sections")
# check more -Wl,-z,nocopyreloc -Wl,--icf=all to reduce elf size -fuse-ld=gold is
# not compat with -Wl,-z,nocopyreloc -Wl,--icf=all so we only try enable icf on
# ANDROID
check_cxx_compiler_flag("-Wl,-z,nocopyreloc -Wl,--icf=all -fuse-ld=lld"
LINKER_SUPPORT_Z_NOCOPYRELOC_ICF_ALL)
check_cxx_compiler_flag("-Wl,-z,nocopyreloc -Wl,--icf=safe"
LINKER_SUPPORT_Z_NOCOPYRELOC_ICF_SAFE_NO_LLD)
if(LINKER_SUPPORT_Z_NOCOPYRELOC_ICF_ALL AND (ANDROID OR OHOS))
message(STATUS "icf is supported in this compiler")
set(CMAKE_EXE_LINKER_FLAGS
"${CMAKE_EXE_LINKER_FLAGS} -Wl,-z,nocopyreloc -Wl,--icf=all -fuse-ld=lld")
set(CMAKE_SHARED_LINKER_FLAGS
"${CMAKE_SHARED_LINKER_FLAGS} -Wl,-z,nocopyreloc -Wl,--icf=all -fuse-ld=lld")
elseif(LINKER_SUPPORT_Z_NOCOPYRELOC_ICF_SAFE_NO_LLD)
message(STATUS "icf=safe is supported in this compiler without lld")
set(CMAKE_EXE_LINKER_FLAGS
"${CMAKE_EXE_LINKER_FLAGS} -Wl,-z,nocopyreloc -Wl,--icf=safe")
set(CMAKE_SHARED_LINKER_FLAGS
"${CMAKE_SHARED_LINKER_FLAGS} -Wl,-z,nocopyreloc -Wl,--icf=safe")
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
add_compile_options($<$<COMPILE_LANGUAGE:C>:-fPIC>)
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:-fPIC>)
endif()
endif()
endif()
endif()
check_ipo_supported(RESULT IS_LTO_SUPPORT OUTPUT output_info)
# LLVM on Windows report support LTO, but do not support -flto=full at link stage
if(IS_LTO_SUPPORT AND NOT WIN32)
message(STATUS "lto is supported in this compiler")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -flto=full")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -flto=full")
else()
message(STATUS "lto is not supported in this compiler")
endif()
if(APPLE)
set(BUILD_SHARED_LIBS OFF)
message(STATUS "build static for xcode framework require")
endif()
if(MGE_USE_SYSTEM_LIB)
set(MGE_CUDA_USE_STATIC OFF)
endif()
if(MGB_WITH_FLATBUFFERS)
set(MGB_ENABLE_FBS_SERIALIZATION ON)
endif()
if(OHOS)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-command-line-argument")
endif()
if(CMAKE_TOOLCHAIN_FILE)
message(STATUS "We are cross compiling.")
message(
STATUS
"config FLATBUFFERS_FLATC_EXECUTABLE to: ${PROJECT_SOURCE_DIR}/build_dir/host_flatc/install/bin/flatc"
)
set(FLATBUFFERS_FLATC_EXECUTABLE
"${PROJECT_SOURCE_DIR}/build_dir/host_flatc/install/bin/flatc")
if(ANDROID_TOOLCHAIN_ROOT)
if(NOT "${ANDROID_ARCH_NAME}" STREQUAL "")
set(ANDROID_ARCH ${ANDROID_ARCH_NAME})
endif()
if(${ANDROID_ARCH} STREQUAL "arm")
set(MGE_ARCH "armv7")
elseif(${ANDROID_ARCH} STREQUAL "arm64")
set(MGE_ARCH "aarch64")
else()
message(FATAL_ERROR "DO NOT SUPPORT ANDROID ARCH NOW")
endif()
elseif(CMAKE_SYSTEM_NAME STREQUAL "OHOS")
if(${OHOS_ARCH} STREQUAL "armeabi-v7a")
set(MGE_ARCH "armv7")
elseif(${OHOS_ARCH} STREQUAL "arm64-v8a")
set(MGE_ARCH "aarch64")
else()
message(FATAL_ERROR "DO NOT SUPPORT OHOS ARCH NOW")
endif()
elseif(IOS_TOOLCHAIN_ROOT)
if(${IOS_ARCH} STREQUAL "armv7")
set(MGE_ARCH "armv7")
elseif(${IOS_ARCH} STREQUAL "arm64")
set(MGE_ARCH "aarch64")
elseif(${IOS_ARCH} STREQUAL "armv7k")
set(MGE_ARCH "armv7")
elseif(${IOS_ARCH} STREQUAL "arm64e")
set(MGE_ARCH "aarch64")
elseif(${IOS_ARCH} STREQUAL "armv7s")
set(MGE_ARCH "armv7")
else()
message(FATAL_ERROR "Unsupported IOS_ARCH.")
endif()
elseif(RISCV_TOOLCHAIN_ROOT)
set(MGE_ARCH "riscv64")
elseif(NOT "${ARM_CROSS_BUILD_ARCH}" STREQUAL "")
set(MGE_ARCH ${ARM_CROSS_BUILD_ARCH})
else()
message(FATAL_ERROR "Unknown cross-compiling settings.")
endif()
message(STATUS "CONFIG MGE_ARCH TO ${MGE_ARCH}")
endif()
if(${MGE_ARCH} STREQUAL "AUTO")
if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL
"AMD64")
set(MGE_ARCH "x86_64")
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i386" OR ${CMAKE_SYSTEM_PROCESSOR}
STREQUAL "i686")
set(MGE_ARCH "i386")
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64" OR ${CMAKE_SYSTEM_PROCESSOR}
STREQUAL "arm64")
set(MGE_ARCH "aarch64")
elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "^arm")
set(MGE_ARCH "armv7")
else()
message(FATAL_ERROR "Unknown machine architecture for MegEngine.")
endif()
endif()
if(NOT CMAKE_CONFIGURATION_TYPES AND NOT CMAKE_BUILD_TYPE)
message(STATUS "Setting build type to 'RelWithDebInfo' as none was specified.")
set(CMAKE_BUILD_TYPE RelWithDebInfo)
endif()
if(${CMAKE_BUILD_TYPE} STREQUAL "Release"
AND NOT MGE_WITH_TEST
AND NOT ${MGE_ARCH} STREQUAL "x86_64"
AND NOT MGE_WITH_MIDOUT_PROFILE)
set(MGE_ENABLE_RTTI OFF)
message(
STATUS
"disable MGE_ENABLE_RTTI when Release/NON-x86_64/NON-MGE_WITH_MIDOUT_PROFILE mode!!"
)
endif()
if(MSVC OR WIN32)
# for cmake after 3.15.2
cmake_policy(SET CMP0091 NEW)
set(CMAKE_OBJECT_PATH_MAX 300)
if(MGE_BUILD_WITH_ASAN)
set(MGE_STATIC_LINK_WITH_VC_RUNTIME ON)
message(
STATUS
"Force set MGE_STATIC_LINK_WITH_VC_RUNTIME ON when build for Windows MGE_BUILD_WITH_ASAN"
)
endif()
if(MGE_STATIC_LINK_WITH_VC_RUNTIME)
if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDebug")
else()
set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded")
endif()
else()
if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDebugDLL")
else()
set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDLL")
endif()
endif()
add_compile_definitions(NOMINMAX=1 _USE_MATH_DEFINES=1 WIN32=1)
message(STATUS "into windows build CMAKE_C_COMPILER_ID: ${CMAKE_C_COMPILER_ID}")
if(NOT ${CMAKE_C_COMPILER_ID} STREQUAL "Clang" AND NOT ${CMAKE_C_COMPILER_ID}
STREQUAL "Clang-cl")
message(
FATAL_ERROR
"only support clang-cl for windows build, pls check detail: scripts/cmake-build/BUILD_README.md"
)
endif()
# on windows need append
# VS_PATH/VC/Tools/Llvm/x64/lib/clang/${CMAKE_CXX_COMPILER_VERSION}/lib/windows and
# VS_PATH/VC/Tools/Llvm/lib/clang/${CMAKE_CXX_COMPILER_VERSION}/lib/windows to PATH
# env
if(MGE_BUILD_WITH_ASAN)
message(
WARNING
"please do (set)export ASAN_OPTIONS=windows_hook_rtl_allocators=true when run test after build finish, caused by we link asan dll!!"
)
if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
message(
WARNING
"Windows AddressSanitizer doesn't support linking with debug runtime libraries yet, which means do not support CMAKE_BUILD_TYPE=Debug"
)
message(
FATAL_ERROR
"Please build with RelWithDebInfo or Release by : EXTRA_CMAKE_ARGS=\"-DMGE_BUILD_WITH_ASAN=ON -DCMAKE_BUILD_TYPE=RelWithDebInfo ...\""
)
endif()
if("$ENV{VS_PATH}" STREQUAL "")
message(
FATAL_ERROR
"can not find VS_PATH, please export Visual Studio root dir to VS_PATH env")
endif()
if(${MGE_ARCH} STREQUAL "x86_64")
set(WINDOWS_ASAN_DLL_NAME "clang_rt.asan_dynamic-x86_64.lib")
set(WINDOWS_ASAN_RUNTIME_THUNK_NAME "clang_rt.asan_dynamic_runtime_thunk-x86_64")
set(WINDOWS_ASAN_PATH_SUFFIXES
"VC/Tools/Llvm/x64/lib/clang/${CMAKE_CXX_COMPILER_VERSION}/lib/windows")
elseif(${MGE_ARCH} STREQUAL "i386")
set(WINDOWS_ASAN_DLL_NAME "clang_rt.asan_dynamic-i386.lib")
set(WINDOWS_ASAN_RUNTIME_THUNK_NAME
"clang_rt.asan_dynamic_runtime_thunk-i386.lib")
set(WINDOWS_ASAN_PATH_SUFFIXES
"VC/Tools/Llvm/lib/clang/${CMAKE_CXX_COMPILER_VERSION}/lib/windows")
else()
message(FATAL_ERROR "unsupport asan ARCH: ${MGE_ARCH} on Windows")
endif()
find_path(
ASAN_DLL_PATH
NAMES ${WINDOWS_ASAN_DLL_NAME}
HINTS $ENV{VS_PATH}
PATH_SUFFIXES ${WINDOWS_ASAN_PATH_SUFFIXES}
DOC "Windows asan library path")
if(ASAN_DLL_PATH STREQUAL "ASAN_DLL_PATH-NOTFOUND")
message(FATAL_ERROR "can not find asan dll, please upgrade you LLVM")
endif()
message(STATUS "Windows asan dll path: ${ASAN_DLL_PATH}")
link_directories(${ASAN_DLL_PATH})
link_libraries(${WINDOWS_ASAN_DLL_NAME})
link_libraries(${WINDOWS_ASAN_RUNTIME_THUNK_NAME})
set(WIN_FLAGS "/Od -DNDEBUG -fsanitize=address")
# windows Llvm asan do not take effect when /O2 RELWITHDEBINFO default value is /O2,
# so override it
set(CMAKE_C_FLAGS_RELWITHDEBINFO "/Zi /Od /Ob1 /DNDEBUG")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "/Zi /Od /Ob1 /DNDEBUG")
set(CMAKE_C_FLAGS_RELEASE "/Zi /Od /Ob1 /DNDEBUG")
set(CMAKE_CXX_FLAGS_RELEASE "/Zi /Od /Ob1 /DNDEBUG")
else()
set(WIN_FLAGS "/O2")
endif()
# add flags for enable sse instruction optimize for X86, enable avx header to compile
# avx code
set(WIN_FLAGS "${WIN_FLAGS} -msse4.2 -D_AVX_ -D_AVX2_ -D__AVX__ -D__AVX2__ -D__FMA__")
# if u CPU is cascadelake series, u can enable for performance set(WIN_FLAGS
# "{WIN_FLAGS} -march=cascadelake -mtune=cascadelake") set(WIN_FLAGS "{WIN_FLAGS}
# -mavx512cd -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vnni")
# for windows build
set(WIN_FLAGS
"${WIN_FLAGS} -Wno-error=implicit-int-conversion -Wno-error=double-promotion")
set(WIN_FLAGS
"${WIN_FLAGS} -Wno-error=zero-as-null-pointer-constant -Wno-error=implicit-int-conversion"
)
set(WIN_FLAGS
"${WIN_FLAGS} -Wno-error=float-conversion -Wno-error=shadow-field -Wno-error=covered-switch-default"
)
set(WIN_FLAGS
"${WIN_FLAGS} -Wno-error=deprecated -Wno-error=documentation -Wno-error=unreachable-code-break"
)
set(WIN_FLAGS "${WIN_FLAGS} /DWIN32 -Wno-macro-redefined /wd4819")
set(WIN_FLAGS
"${WIN_FLAGS} /D_CRT_SECURE_NO_DEPRECATE /D_CRT_SECURE_NO_WARNINGS /DNOGDI /D_USE_MATH_DEFINES /bigobj"
)
set(WIN_FLAGS
"${WIN_FLAGS} /Zm500 /EHs /wd4351 /wd4291 /wd4250 /wd4996 /wd4819 -Wno-inconsistent-dllimport"
)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${WIN_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${WIN_FLAGS}")
# FIXME: fix halide/mlir JIT backends on windows
message(STATUS "disable halide and mlir jit backends on windows host build...")
set(MGE_WITH_HALIDE OFF)
set(MGE_WITH_JIT_MLIR OFF)
# TODO: imp ExecutableHelperImpl@src/jit/impl/utils.cpp build with Windows, then
# enable base jit on Windows
message(STATUS "disable base jit on windows host build...")
set(MGE_WITH_JIT OFF)
# FIXME: fix MegRay on windows
message(STATUS "Disable distributed build on windows host build...")
set(MGE_WITH_DISTRIBUTED OFF)
if(${MGE_ARCH} STREQUAL "i386" AND ${CMAKE_BUILD_TYPE} STREQUAL "Debug")
# https://docs.microsoft.com/en-us/cpp/build/reference/z7-zi-zi-debug-information-format?view=msvc-170
# Workround for error LNK1318
message(
STATUS
"force use full symbolic debugging with build for 32bit for Windows with Debug mode"
)
set(CMAKE_C_FLAGS_DEBUG "/Z7")
set(CMAKE_CXX_FLAGS_DEBUG "/Z7")
endif()
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra")
# NONE windows DEBUG general flags
if(MGE_BUILD_WITH_ASAN)
set(CMAKE_C_FLAGS_DEBUG "-O0 -g -fsanitize=address -fno-omit-frame-pointer")
set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -fsanitize=address -fno-omit-frame-pointer")
else()
set(CMAKE_C_FLAGS_DEBUG "-O0 -g")
set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g")
endif()
# NONE windows opt general flags
if(MGE_BUILD_WITH_ASAN)
set(OPTIMIZE_LEVEL "-g -O0 -DNDEBUG -fsanitize=address -fno-omit-frame-pointer")
elseif(ANDROID)
set(OPTIMIZE_LEVEL "-g -Ofast -DNDEBUG")
elseif(OHOS)
set(OPTIMIZE_LEVEL "-g -Ofast -DNDEBUG")
else()
set(OPTIMIZE_LEVEL "-g -O3 -DNDEBUG")
endif()
# remove finite-math-only opt from Ofast, caused by clang have a different runtime
# finite math logic, this issue do not find at g++, but as a unity build flags, we
# force add -fno-finite-math-only when compiler support
check_cxx_compiler_flag("-fno-finite-math-only" CXX_NO_FINITE_MATH_ONLY_SUPPORT)
if(CXX_NO_FINITE_MATH_ONLY_SUPPORT)
message(STATUS "force add -fno-finite-math-only for this compiler")
set(OPTIMIZE_LEVEL "${OPTIMIZE_LEVEL} -fno-finite-math-only")
endif()
set(CMAKE_C_FLAGS_RELEASE "${OPTIMIZE_LEVEL}")
set(CMAKE_CXX_FLAGS_RELEASE "${OPTIMIZE_LEVEL}")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "${OPTIMIZE_LEVEL}")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${OPTIMIZE_LEVEL}")
# some gnu(gcc) compiler use -static -libasan have runtime issue also, when target is
# big, clang ld will take a long long long time when use -static-libsan, so we use
# dynamic asan by default ANDROID asan.so depends on log, so broadcast log
# link_libraries for megengine depends target, for example flatc target
if(MGE_BUILD_WITH_ASAN)
if(ANDROID)
link_libraries(log)
elseif(OHOS)
link_libraries(hilog_ndk.z)
endif()
endif()
endif()
if(MGE_WITH_CUDA)
include(cmake/cudnn.cmake)
if(MGE_CUDA_USE_STATIC
AND ("${CUDNN_VERSION}" VERSION_GREATER "8.0.0" OR "${CUDNN_VERSION}" VERSION_EQUAL
"8.0.0")
AND (NOT MGE_WITH_CUDNN_SHARED))
message(WARNING "Static link CUDNN8 will auto enable MGE_WITH_LARGE_ARCHIVE=ON")
set(MGE_WITH_LARGE_ARCHIVE ON)
endif()
endif()
if(MGE_WITH_LARGE_ARCHIVE)
message(STATUS "Set -mcmodel=large and disable -fuse-ld=gold")
set(MGE_COMMON_LINKER_FLAGS "-mcmodel=large")
elseif(
CXX_SUPPORT_GOLD
AND NOT ANDROID
AND NOT OHOS
AND NOT APPLE
AND NOT MSVC
AND NOT WIN32
AND NOT MGE_WITH_LARGE_ARCHIVE
AND NOT ${MGE_ARCH} STREQUAL "riscv64")
message(STATUS "Using GNU gold linker.")
set(MGE_COMMON_LINKER_FLAGS "-fuse-ld=gold")
endif()
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}")
set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}")
if(MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2)
# x86 cpu jit backends only support MLIR now, but MLIR runtime do not support at xp
# sp2
message(WARNING "disable MGE_WITH_JIT when build for windows xp sp2")
set(MGE_WITH_JIT OFF)
endif()
if(NOT MGE_WITH_JIT)
if(MGE_WITH_HALIDE)
message(WARNING "MGE_WITH_HALIDE is set to OFF with MGE_WITH_JIT disabled")
set(MGE_WITH_HALIDE OFF)
endif()
if(MGE_WITH_JIT_MLIR)
message(WARNING "MGE_WITH_JIT_MLIR is set to OFF with MGE_WITH_JIT disabled")
set(MGE_WITH_JIT_MLIR OFF)
endif()
endif()
# FIXME At present, there are some conflicts between the LLVM that halide depends on and
# the LLVM that MLIR depends on. Should be fixed in subsequent versions.
if(MGE_BUILD_IMPERATIVE_RT AND MGE_WITH_HALIDE)
message(FATAL_ERROR "cannot use HALIDE when building IMPERATIVE_RT")
endif()
if(MGE_WITH_JIT_MLIR AND MGE_WITH_HALIDE)
message(FATAL_ERROR "cannot use HALIDE with MGE_WITH_JIT_MLIR enabled")
endif()
if(MGE_WITH_CUDA)
# FIXME: check_language(CUDA) failed when sbsa mode! detail:
# https://gitlab.kitware.com/cmake/cmake/-/issues/20676
if(CMAKE_TOOLCHAIN_FILE)
set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
message(
WARNING
"force set CMAKE_CUDA_HOST_COMPILER to CMAKE_CXX_COMPILER when nvcc sbsa mode!!"
)
endif()
include(CheckLanguage)
check_language(CUDA)
if(NOT CMAKE_CUDA_COMPILER AND NOT CMAKE_TOOLCHAIN_FILE)
message(FATAL_ERROR "CUDA compiler not found in PATH")
endif()
# remove this after CMAKE fix nvcc sbsa
if(NOT CMAKE_CUDA_COMPILER AND CMAKE_TOOLCHAIN_FILE)
set(CMAKE_CUDA_COMPILER "nvcc")
message(WARNING "force set CMAKE_CUDA_COMPILER to nvcc when nvcc sbsa mode!!")
endif()
find_package(CUDA)
enable_language(CUDA)
set(CMAKE_CUDA_STANDARD 14)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
endif()
if(CMAKE_TOOLCHAIN_FILE)
# TODO: fix cross build mlir-linalg-ods-gen for enable cross build with MLIR
message(
STATUS
"Disable MLIR jit backends support, as we do not support cross build MLIR module caused by mlir-linalg-ods-gen, if you really need this, try build at host env, for example Android termux env for android, arm-linux env for arm with linux board"
)
set(MGE_WITH_JIT_MLIR OFF)
endif()
if(NOT MGE_WITH_CUDA)
message(STATUS "Disable TensorRT support and disable HALIDE, as CUDA is not enabled.")
set(MGE_WITH_HALIDE OFF)
set(MGE_WITH_TRT OFF)
endif()
find_package(PythonInterp 3 REQUIRED)
# NOTICE: just use for target, which do not depend on python api PURPOSE: reuse target
# obj when switch python3 version will fallback to PYTHON_EXECUTABLE if can not find in
# PATH env
set(PYTHON3_IN_ENV "python3")
find_program(PYTHON3_EXECUTABLE_WITHOUT_VERSION ${PYTHON3_IN_ENV})
if(PYTHON3_EXECUTABLE_WITHOUT_VERSION)
message(STATUS "use ${PYTHON3_IN_ENV} as PYTHON3_EXECUTABLE_WITHOUT_VERSION")
set(PYTHON3_EXECUTABLE_WITHOUT_VERSION ${PYTHON3_IN_ENV})
else()
message(
STATUS
"fallback ${PYTHON_EXECUTABLE} as PYTHON3_EXECUTABLE_WITHOUT_VERSION,\
target which depend on PYTHON3_EXECUTABLE_WITHOUT_VERSION will be rebuild when switch python3"
)
set(PYTHON3_EXECUTABLE_WITHOUT_VERSION ${PYTHON_EXECUTABLE})
endif()
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads)
if(NOT "${CMAKE_THREAD_LIBS_INIT}" STREQUAL "")
if(${CMAKE_THREAD_LIBS_INIT} STREQUAL "-pthread" AND MGE_WITH_CUDA)
set_property(
TARGET Threads::Threads
PROPERTY INTERFACE_COMPILE_OPTIONS
"$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-pthread>"
"$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:-pthread>")
endif()
endif()
set(MGE_BLAS
MKL
CACHE STRING "BLAS implementaion used by MegEngine.")
set_property(CACHE MGE_BLAS PROPERTY STRINGS MKL OpenBLAS)
set(MGE_CUDA_GENCODE
""
CACHE STRING "Overwrite -gencode specifications for CUDA")
if(NOT CMAKE_CUDA_HOST_COMPILER)
set(CMAKE_CUDA_HOST_COMPILER $(CMAKE_CXX_COMPILER))
endif()
if(NOT MGE_ENABLE_RTTI)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
endif()
if(NOT MGE_ENABLE_EXCEPTIONS)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions")
# some lite macro will triger format-security error when disable exceptions
add_definitions(-Wno-format-security)
endif()
if(MGE_BUILD_IMPERATIVE_RT
OR ANDROID
OR OHOS)
message(STATUS "config cxx standard to 17.")
set(CMAKE_CXX_STANDARD 17)
endif()
if(NOT ${MGE_WITH_CUDA}
AND NOT ${MGE_WITH_ROCM}
AND NOT ${MGE_WITH_CAMBRICON})
message(STATUS "Disable distributed support, as both CUDA and ROCm are disabled.")
set(MGE_WITH_DISTRIBUTED OFF)
endif()
if(MGE_INFERENCE_ONLY)
message(STATUS "Disable distributed support for inference only build.")
set(MGE_WITH_DISTRIBUTED OFF)
message(STATUS "Disable imperative_rt python module for inference only build.")
set(MGE_BUILD_IMPERATIVE_RT OFF)
endif()
# please do any include(cmake/* after do this execute_process
if(MGE_SYNC_THIRD_PARTY)
include(cmake/third_party_sync.cmake)
endif()
if(MGE_WITH_TEST)
include(cmake/gtest.cmake)
endif()
include(cmake/gflags.cmake)
if(MGE_WITH_JIT_MLIR OR MGE_BUILD_IMPERATIVE_RT)
include(cmake/llvm-project.cmake)
endif()
if(MGE_BUILD_IMPERATIVE_RT)
set(MGE_WITH_CUSTOM_OP ON)
endif()
if(MGE_WITH_DISTRIBUTED)
include(cmake/protobuf.cmake)
include(cmake/zmq.cmake)
endif()
if(MGB_WITH_FLATBUFFERS)
include(cmake/flatbuffers.cmake)
endif()
if(MGE_WITH_CUPTI)
include(cmake/cupti.cmake)
endif()
if(MGE_WITH_CUDA)
include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
foreach(path ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
get_filename_component(_NAME ${path} NAME)
if(NOT ${_NAME} STREQUAL "stubs")
list(APPEND CUDA_LINK_DIRECTORIES ${path})
endif()
endforeach()
link_directories(${CUDA_LINK_DIRECTORIES})
set(CMAKE_CUDA_FLAGS_DEBUG "-O0 -g")
set(CMAKE_CUDA_FLAGS_RELEASE "-O3")
set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-O3 -g")
set(CMAKE_CUDA_FLAGS_MINSIZEREL "-Os")
if(MSVC OR WIN32)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xfatbin -compress-all")
set(CCBIN_FLAG
"${CCBIN_FLAG} /wd4819 /wd4334 /wd4267 /wd4002 /wd4244 /wd4068 /std:c++14 /bigobj"
)
if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
set(CCBIN_FLAG "${CCBIN_FLAG} -D_ITERATOR_DEBUG_LEVEL=2 -MTd")
endif()
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --compiler-options \" ${CCBIN_FLAG} \" ")
else()
set(CMAKE_CUDA_FLAGS "-Xcompiler -Wall,-Wextra -Xfatbin -compress-all")
endif()
if(NOT MGE_ENABLE_RTTI)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -fno-rtti")
endif()
if(NOT MGE_ENABLE_EXCEPTIONS)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -fno-exceptions")
endif()
if(NOT MGE_CUDA_GENCODE)
if(${MGE_ARCH} STREQUAL "x86_64"
OR ${MGE_ARCH} STREQUAL "i386"
OR ${MGE_ARCH} STREQUAL "aarch64")
set(MEGDNN_THREADS_512 0)
# ON windows platform, static library just a shell, always fallback to DLL
if(MGE_WITH_CUDA
AND MGE_CUDA_USE_STATIC
AND NOT MSVC
AND NOT WIN32
AND ("${CUDNN_VERSION}" VERSION_GREATER "8.0.0" OR "${CUDNN_VERSION}"
VERSION_EQUAL "8.0.0")
AND (NOT MGE_WITH_CUDNN_SHARED))
message(
WARNING
"Static link CUDNN8 with many sm is unworkable, we only enable sm61 sm70 sm75 by default, and enable MGE_WITH_LARGE_ARCHIVE=ON"
)
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "11.8.0"
OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "11.8.0")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_80,code=sm_80")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_86,code=sm_86")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_89,code=sm_89")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_90,code=sm_90")
set(MGE_CUDA_GENCODE
"${MGE_CUDA_GENCODE} -gencode arch=compute_90,code=compute_90")
elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "11.1.0"
OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "11.1.0")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_80,code=sm_80")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_86,code=sm_86")
set(MGE_CUDA_GENCODE
"${MGE_CUDA_GENCODE} -gencode arch=compute_86,code=compute_86")
elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "11.0.0"
OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "11.0.0")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_80,code=sm_80")
set(MGE_CUDA_GENCODE
"${MGE_CUDA_GENCODE} -gencode arch=compute_80,code=compute_80")
elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.0.0"
OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.0.0")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
set(MGE_CUDA_GENCODE
"${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=compute_75")
elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "9.0.0"
OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "9.0.0")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
set(MGE_CUDA_GENCODE
"${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=compute_70")
else()
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_35,code=sm_35")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
set(MGE_CUDA_GENCODE
"${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=compute_61")
endif()
else()
message(FATAL_ERROR "Unsupported CUDA host arch.")
endif()
else()
set(MEGDNN_THREADS_512 1)
endif()
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${MGE_CUDA_GENCODE}")
if(MGE_WITH_TRT)
include(cmake/tensorrt.cmake)
endif()
if(MGE_CUDA_USE_STATIC)
if(MGE_WITH_TRT)
if(MSVC OR WIN32)
message(STATUS "windows TRT_LIBRARY: ${TRT_LIBRARY}")
list(APPEND MGE_CUDA_LIBS ${TRT_LIBRARY} ${TRT_PLUGIN_LIBRARY})
else()
if(TensorRT_VERSION_MAJOR GREATER_EQUAL 8)
list(APPEND MGE_CUDA_LIBS libnvinfer libnvinfer_plugin)
else()
list(APPEND MGE_CUDA_LIBS -Wl,--whole-archive libnvinfer libnvinfer_plugin
-Wl,--no-whole-archive)
endif()
endif()
if(TensorRT_VERSION_MAJOR STREQUAL 7)
message(STATUS "handle trt myelin lib after trt7")
list(APPEND MGE_CUDA_LIBS libmyelin_compiler libmyelin_executor
libmyelin_pattern_runtime libmyelin_pattern_library)
endif()
endif()
if("${CUDNN_VERSION}" STREQUAL "7.5.0")
if(MSVC OR WIN32)
message(STATUS "windows CUDNN_LIBRARY: ${CUDNN_LIBRARY}")
list(APPEND MGE_CUDA_LIBS ${CUDNN_LIBRARY})
else()
message(
STATUS
"cudnn 7.5.0 has bug in cudnnConvolutionBiasActivationForward, need --whole-archive to workaround, ref https://docs.nvidia.com/deeplearning/cudnn/release-notes/rel_7xx.html"
)
list(APPEND MGE_CUDA_LIBS -Wl,--whole-archive libcudnn -Wl,--no-whole-archive)
endif()
else()
if(MSVC OR WIN32)
message(STATUS "windows CUDNN_LIBRARY: ${CUDNN_LIBRARY}")
list(APPEND MGE_CUDA_LIBS ${CUDNN_LIBRARY})
else()
list(APPEND MGE_CUDA_LIBS libcudnn)
endif()
endif()
if(MSVC OR WIN32)
list(APPEND MGE_CUDA_LIBS cusolver.lib curand.lib cudart_static.lib cusparse.lib)
else()
list(
APPEND
MGE_CUDA_LIBS
cusolver_static
curand_static
culibos
cudart_static
cusparse_static)
endif()
if(MSVC OR WIN32)
list(APPEND MGE_CUDA_LIBS cublas.lib)
else()
if(MGE_WITH_CUBLAS_SHARED)
list(APPEND MGE_CUDA_LIBS cublas)
else()
list(APPEND MGE_CUDA_LIBS cublas_static)
endif()
endif()
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.1.0"
OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.1.0")
if(MSVC OR WIN32)
list(APPEND MGE_CUDA_LIBS cublasLt.lib)
else()
if(MGE_WITH_CUBLAS_SHARED)
list(APPEND MGE_CUDA_LIBS cublasLt)
else()
list(APPEND MGE_CUDA_LIBS cublasLt_static culibos)
endif()
endif()
endif()
if((${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.0.0"
OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.0.0")
AND NOT MSVC
AND NOT WIN32)
# mark all symbols from liblapack_static.a as weak to avoid duplicated definition
# with mkl
find_library(LAPACK_STATIC_PATH lapack_static
HINTS ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
if(NOT LAPACK_STATIC_PATH)
message(FATAL_ERROR "liblapack_static.a not found")
endif()
set(LAPACK_STATIC_COPY_PATH ${CMAKE_CURRENT_BINARY_DIR}/liblapack_static_copy.a)
# add a target that run objcopy
add_custom_command(
OUTPUT ${LAPACK_STATIC_COPY_PATH}
COMMAND ${CMAKE_OBJCOPY} -w -W* ${LAPACK_STATIC_PATH} ${LAPACK_STATIC_COPY_PATH}
VERBATIM)
add_custom_target(lapack_static_weak_target DEPENDS ${LAPACK_STATIC_COPY_PATH})
# create a library named "lapack_static_weak"
add_library(lapack_static_weak STATIC IMPORTED GLOBAL)
add_dependencies(lapack_static_weak lapack_static_weak_target)
set_target_properties(lapack_static_weak PROPERTIES IMPORTED_LOCATION
${LAPACK_STATIC_COPY_PATH})
list(APPEND MGE_CUDA_LIBS lapack_static_weak ${LAPACK_STATIC_COPY_PATH})
endif()
else()
if(MGE_WITH_TRT)
list(APPEND MGE_CUDA_LIBS libnvinfer libnvinfer_plugin)
if(TensorRT_VERSION_MAJOR STREQUAL 7)
message(STATUS "handle trt myelin lib after trt7")
list(APPEND MGE_CUDA_LIBS libmyelin)
endif()
endif()
list(APPEND MGE_CUDA_LIBS libcudnn)
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.1.0"
OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.1.0")
list(APPEND MGE_CUDA_LIBS cublasLt cusolver cublas curand)
endif()
list(APPEND MGE_CUDA_LIBS cudart)
endif()
if(NOT MGE_WITH_CUDA_STUB)
if(MSVC OR WIN32)
list(APPEND MGE_CUDA_LIBS cuda.lib)
else()
list(APPEND MGE_CUDA_LIBS cuda)
endif()
endif()
if(NOT MGE_WITH_NVRTC_STUB)
if(MSVC OR WIN32)
list(APPEND MGE_CUDA_LIBS nvrtc.lib)
else()
list(APPEND MGE_CUDA_LIBS nvrtc)
endif()
endif()
if(MGE_WITH_ANY_CUDA_STUB)
add_subdirectory(dnn/cuda-stub)
list(APPEND MGE_CUDA_LIBS cuda-stub)
endif()
if(MSVC OR WIN32)
list(APPEND MGE_CUDA_LIBS nvrtc.lib)
else()
list(APPEND MGE_CUDA_LIBS nvToolsExt)
endif()
set(MGE_CUDA_LIBS "${MGE_CUDA_LIBS} -lrt")
if(UNIX)
set(MGE_CUDA_LIBS "${MGE_CUDA_LIBS} -ldl")
endif()
endif()
# ##########please add_subdirectory from here###############
if((${MGE_ARCH} STREQUAL "x86_64"
OR ${MGE_ARCH} STREQUAL "i386"
OR ${MGE_ARCH} STREQUAL "armv7"
OR ${MGE_ARCH} STREQUAL "aarch64"
)
AND NOT APPLE
AND NOT MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2)
option(MGE_ENABLE_CPUINFO "Build cpuinfo library for check runtime." ON)
if(MGE_ENABLE_CPUINFO)
message(STATUS "Enable cpuinfo runtime check and little kernel optimize.")
add_definitions(-DMGB_ENABLE_CPUINFO_CHECK)
include(cmake/cpuinfo.cmake)
endif()
endif()
if(MGE_WITH_CAMBRICON)
include_directories("$ENV{NEUWARE_HOME}/include")
link_directories("$ENV{NEUWARE_HOME}/lib64")
include(cmake/Cambricon/bang.cmake)
include(cmake/Cambricon/cnrt.cmake)
include(cmake/Cambricon/cndev.cmake)
include(cmake/Cambricon/cndrv.cmake)
list(APPEND MGE_CAMBRICON_LIBS libcnrt libcndev cnmlrt libcndrv)
if(CNRT_VERSION_STRING VERSION_GREATER "5.0.0")
include(cmake/Cambricon/cnnl.cmake)
include(cmake/Cambricon/cnlight.cmake)
include(cmake/Cambricon/magicmind.cmake)
list(
APPEND
MGE_CAMBRICON_LIBS
libcnnl
libcnnl_extra
libcnlight
libmagicmind
libmagicmind_runtime)
else()
include(cmake/cnml.cmake)
list(APPEND MGE_CAMBRICON_LIBS libcnml)
endif()
set(MGE_CAMBRICON_LIBS "${MGE_CAMBRICON_LIBS}")
endif()
if(MGE_WITH_ROCM)
include(cmake/rocm.cmake)
endif()
if(MGE_WITH_ATLAS)
add_subdirectory(dnn/atlas-stub)
list(APPEND MGE_ATLAS_LIBS atlas-stub)
set(MGE_ATLAS_LIBS "${MGE_ATLAS_LIBS}")
set(MGB_ATLAS ${MGE_WITH_ATLAS})
endif()
find_program(CCACHE_BIN ccache)
if(CCACHE_BIN)
set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_BIN})
if(MGE_WITH_CUDA AND NOT ${CMAKE_VERSION} VERSION_LESS "3.10.0")
message(STATUS "Using ccache as CMAKE_CUDA_COMPILER_LAUNCHER")
set(CMAKE_CUDA_COMPILER_LAUNCHER ${CCACHE_BIN})
endif()
endif()
if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386")
if(${MGE_BLAS} STREQUAL "MKL")
include(cmake/mkl.cmake)
set(MGE_BLAS_LIBS libmkl)
elseif(${MGE_BLAS} STREQUAL "OpenBLAS")
include(cmake/OpenBLAS.cmake)
set(MGE_BLAS_LIBS libopenblas)
else()
message(FATAL_ERROR "Unknown BLAS implementation ${MGE_BLAS}")
endif()
endif()
# MKLDNN build
if(MGE_WITH_MKLDNN AND ${MGE_ARCH} STREQUAL "x86_64")
include(cmake/MKL_DNN.cmake)
set(MEGDNN_X86_WITH_MKL_DNN 1)
endif()
# RTTI
if(MGE_ENABLE_RTTI)
set(MEGDNN_ENABLE_MANGLING 0)
set(MEGDNN_ENABLE_RTTI 1)
else()
set(MEGDNN_ENABLE_MANGLING 1)
set(MEGDNN_ENABLE_RTTI 0)
endif()
set(MGB_VERBOSE_TYPEINFO_NAME ${MGE_ENABLE_RTTI})
# Logging
set(MGB_ENABLE_LOGGING ${MGE_ENABLE_LOGGING})
set(MEGDNN_ENABLE_LOGGING ${MGE_ENABLE_LOGGING})
set(MGB_ENABLE_JSON ${MGE_ENABLE_LOGGING})
# Exception
if(NOT MGE_ENABLE_EXCEPTIONS)
message(
STATUS
"Exceptions disabled; MegEngine would kill itself when it is supposed to throw an exception."
)
endif()
set(MGB_ENABLE_EXCEPTION ${MGE_ENABLE_EXCEPTIONS})
set(MEGDNN_ENABLE_EXCEPTIONS ${MGE_ENABLE_EXCEPTIONS})
# JIT
if(MGE_WITH_JIT AND MGE_WITH_HALIDE)
set(HALIDE_SHARED_LIBRARY
OFF
CACHE BOOL "Build as a shared library")
include(cmake/Halide.cmake)
endif()
if(MGE_ENABLE_EXCEPTIONS)
add_compile_definitions(CPP_REDIS_ENABLE_EXCEPTION=1)
else()
add_compile_definitions(CPP_REDIS_ENABLE_EXCEPTION=0)
endif()
include(cmake/cpp_redis.cmake)
# Thread
if(APPLE)
set(CMAKE_THREAD_LIBS_INIT "-lpthread")
set(CMAKE_HAVE_THREADS_LIBRARY 1)
set(CMAKE_USE_WIN32_THREADS_INIT 0)
set(CMAKE_USE_PTHREADS_INIT 1)
set(THREADS_PREFER_PTHREAD_FLAG ON)
message(STATUS "disable jit, halide and mlir on macos host build...")
set(MGE_WITH_HALIDE OFF)
set(MGE_WITH_JIT OFF)
set(MGE_WITH_JIT_MLIR OFF)
endif()
# riscv64
if(${MGE_ARCH} STREQUAL "riscv64")
set(CMAKE_THREAD_LIBS_INIT "-lpthread")
set(CMAKE_HAVE_THREADS_LIBRARY 1)
set(CMAKE_USE_WIN32_THREADS_INIT 0)
set(CMAKE_USE_PTHREADS_INIT 1)
set(THREADS_PREFER_PTHREAD_FLAG ON)
message(STATUS "force config thread when build riscv64, as CMAKE detect failed")
endif()
set(MGB_JIT ${MGE_WITH_JIT})
set(MGB_JIT_MLIR ${MGE_WITH_JIT_MLIR})
set(MGB_JIT_HALIDE ${MGE_WITH_HALIDE})
# for consumer override MGB_C_OPR_INIT_FUNC symbol interface
if(NOT "${CUSTOM_C_OPR_INIT_FUNC}" STREQUAL "")
add_compile_definitions(MGB_C_OPR_INIT_FUNC=${CUSTOM_C_OPR_INIT_FUNC})
message(STATUS "override MGB_C_OPR_INIT_FUNC to ${CUSTOM_C_OPR_INIT_FUNC}")
endif()
set(MGB_CUSTOM_OP ${MGE_WITH_CUSTOM_OP})
if(MSVC OR WIN32)
set(CMAKE_HAVE_THREADS_LIBRARY 1)
set(CMAKE_USE_WIN32_THREADS_INIT 1)
set(CMAKE_USE_PTHREADS_INIT 1)
set(THREADS_PREFER_PTHREAD_FLAG ON)
endif()
if(CMAKE_THREAD_LIBS_INIT
OR CMAKE_HAVE_LIBC_PTHREAD
OR CMAKE_USE_WIN32_THREADS_INIT
OR ANDROID
OR OHOS)
set(MGB_HAVE_THREAD 1)
endif()
if(MSVC OR WIN32)
if(MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2)
message(STATUS "disable MGB_HAVE_THREAD/MGB_ENABLE_JSON when DEPLOY ON XP SP2")
set(MGB_HAVE_THREAD 0)
set(MGB_ENABLE_JSON 0)
endif()
endif()
if(MGE_WITH_TEST)
# use intra-op multi threads
set(MEGDNN_ENABLE_MULTI_THREADS 1)
endif()
# benchmark
if(MGE_WITH_BENCHMARK)
set(MEGDNN_WITH_BENCHMARK ${MGE_WITH_BENCHMARK})
endif()
# CUDA
set(MGB_CUDA ${MGE_WITH_CUDA})
set(MEGDNN_WITH_CUDA ${MGE_WITH_CUDA})
# ROCM
set(MGB_ROCM ${MGE_WITH_ROCM})
set(MEGDNN_WITH_ROCM ${MGE_WITH_ROCM})
# CAMBRICON
set(MGB_CAMBRICON ${MGE_WITH_CAMBRICON})
set(MEGDNN_WITH_CAMBRICON ${MGE_WITH_CAMBRICON})
# Debug info
if(${CMAKE_BUILD_TYPE} STREQUAL "Debug" OR ${CMAKE_BUILD_TYPE} STREQUAL
"RelWithDebInfo")
set(MGB_ASSERT_LOC 1)
set(MGB_ENABLE_DEBUG_UTIL 1)
else()
set(MGB_ASSERT_LOC 0)
set(MGB_ENABLE_DEBUG_UTIL 0)
endif()
if(MSVC OR WIN32)
if(${MGE_ARCH} STREQUAL "i386")
set(MGB_ENABLE_DEBUG_UTIL 0)
message(STATUS "disable MGB_ENABLE_DEBUG_UTIL at Windows i386 build")
endif()
endif()
# TensorRT
set(MGB_ENABLE_TENSOR_RT ${MGE_WITH_TRT})
# inference need jit now, also keep same build logic with bazel
if(MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2)
set(MGB_BUILD_SLIM_SERVING 1)
else()
set(MGB_BUILD_SLIM_SERVING 0)
endif()
# Inference only
if(MGE_INFERENCE_ONLY AND NOT MGE_WITH_TEST)
set(MGB_ENABLE_GRAD 0)
else()
set(MGB_ENABLE_GRAD 1)
endif()
# Distributed communication
set(MGB_ENABLE_OPR_MM ${MGE_WITH_DISTRIBUTED})
# MGE_ARCH related flags
if(MGE_ARCH STREQUAL "x86_64" OR MGE_ARCH STREQUAL "i386")
if(MGE_BLAS STREQUAL "MKL")
set(MEGDNN_X86_WITH_MKL 1)
elseif(MGE_BLAS STREQUAL "OpenBLAS")
set(MEGDNN_X86_WITH_OPENBLAS 1)
endif()
endif()
# Enable Naive
if(MGE_ARCH STREQUAL "naive")
set(MEGDNN_NAIVE 1)
message(STATUS "MEGDNN_NAIVE is enabled; MegDNN performance is degraded.")
endif()
if(MGE_ARCH STREQUAL "x86_64" OR MGE_ARCH STREQUAL "i386")
set(MEGDNN_X86 1)
if(MGE_ARCH STREQUAL "x86_64")
set(MEGDNN_X86_64 1)
set(MEGDNN_64_BIT 1)
if(NOT MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64")
endif()
else()
set(MEGDNN_X86_32 1)
if(NOT MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32")
endif()
endif()
if(NOT MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2 -mfpmath=sse")
endif()
endif()
# dotprod is not enable by default on APPLE, cpuinfo has some problem on APPLE
if(NOT APPLE AND ${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
check_cxx_compiler_flag("-march=armv8.2-a+dotprod" CXX_COMPILER_SUPPORT_DOT)
if(CXX_COMPILER_SUPPORT_DOT)
message(STATUS "Enable dotprod feature in armv8.2-a using MGB_ENABLE_DOT")
set(MGB_ENABLE_DOT 1)
endif()
endif()
if(MGE_ARCH STREQUAL "armv7")
# -funsafe-math-optimizations to enable neon auto-vectorization (since neon is not
# fully IEEE 754 compatible, GCC does not turn on neon auto-vectorization by default.
if(ANDROID OR OHOS)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=softfp -mfpu=neon")
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -funsafe-math-optimizations")
set(MARCH "-march=armv7-a")
set(MEGDNN_ARMV7 1)
endif()
if(MGE_ARCH STREQUAL "aarch64")
set(MEGDNN_AARCH64 1)
set(MEGDNN_64_BIT 1)
set(MARCH "-march=armv8-a")
set(MGB_AARCH64 1)
if(MGE_ARMV8_2_FEATURE_FP16)
message(STATUS "Enable fp16 feature support in armv8.2")
if(NOT ${MGE_DISABLE_FLOAT16})
set(MEGDNN_ENABLE_FP16_NEON 1)
endif()
set(MARCH "-march=armv8.2-a+fp16")
endif()
if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
message(
WARNING
"aarch64 ld will add -mfix-cortex-a53-843419 and -mfix-cortex-a53-835769 by default.\
when build with DEBUG build type,ld will take about 14min+, for save link time(14min->1min), \
you may open below flags if not deploy on arm a53 platform, or just build release type!"
)
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mno-fix-cortex-a53-843419
# -mno-fix-cortex-a53-835769")
endif()
if(MGE_WITH_CUDA)
message(STATUS "check compiler version...")
if(CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.5)
message(FATAL_ERROR "gcc version must >= 7.5 when build with cuda")
endif()
# cuda libs build with -mno-fix-cortex-a53-843419 -mno-fix-cortex-a53-835769, if you are not deploy on arm a53 platform and want to save link time, you may open below flags even
message(
WARNING
"cuda libs build with -mno-fix-cortex-a53-843419 -mno-fix-cortex-a53-835769, so force disable it to avoid link error"
)
set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -mno-fix-cortex-a53-843419 -mno-fix-cortex-a53-835769")
endif()
endif()
if(MGE_ARCH STREQUAL "riscv64")
set(MEGDNN_RISCV64 1)
set(MEGDNN_64_BIT 1)
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MARCH}")
set(MGE_VERSION_SCRIPT
${PROJECT_SOURCE_DIR}/src/version.ld
CACHE INTERNAL "Path to linker version script")
execute_process(
COMMAND git log -1 --format=%H
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}
OUTPUT_VARIABLE GIT_FULL_HASH
OUTPUT_STRIP_TRAILING_WHITESPACE)
# Write out megbrain_build_config.h It defines macros needed by both megbrain and dnn
# please don't put the configuration that is easy to change at
# megbrain_build_config.h.in for example cuda_sm_gen.h.in and git_full_hash_header.h.in,
# which will lead to CMake build dirty file issue
configure_file(src/megbrain_build_config.h.in
${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
if(MGE_WITH_CUDA)
configure_file(src/cuda_sm_gen.h.in
${CMAKE_CURRENT_BINARY_DIR}/genfiles/cuda_sm_gen.h)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/genfiles/cuda_sm_gen.h
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
endif()
configure_file(src/git_full_hash_header.h.in
${CMAKE_CURRENT_BINARY_DIR}/genfiles/git_full_hash_header.h)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/genfiles/git_full_hash_header.h
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
add_subdirectory(dnn)
list(APPEND MGB_OPR_PARAM_DEFS_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/tools/param_defs/mgb_opr_param_defs.py)
set(MGB_OPR_PARAM_DEFS_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/dnn/scripts/gen_param_defs.py)
set(MGB_OPR_PARAM_DEFS_OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/src/opr/include/)
file(MAKE_DIRECTORY ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr)
add_custom_command(
OUTPUT ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr/param_defs.h
COMMAND
${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${MGB_OPR_PARAM_DEFS_SCRIPT}
${MGB_OPR_PARAM_DEFS_SRCS} ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr/param_defs.h
DEPENDS ${MGB_OPR_PARAM_DEFS_SRCS} ${MGB_OPR_PARAM_DEFS_SCRIPT}
VERBATIM)
list(APPEND MGB_OPR_PARAM_DEFS_OUTS
${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr/param_defs.h)
install(FILES ${MGB_OPR_PARAM_DEFS_OUTS}
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/megbrain/opr/)
list(APPEND MGB_OPR_PARAM_DEFS_INC ${MGB_OPR_PARAM_DEFS_OUT_DIR})
add_custom_target(_mgb_opr_param_defs DEPENDS ${MGB_OPR_PARAM_DEFS_OUTS})
add_library(mgb_opr_param_defs INTERFACE)
target_include_directories(
mgb_opr_param_defs INTERFACE $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
$<BUILD_INTERFACE:${MGB_OPR_PARAM_DEFS_INC}>)
add_dependencies(mgb_opr_param_defs _mgb_opr_param_defs)
install(TARGETS mgb_opr_param_defs EXPORT ${MGE_EXPORT_TARGETS})
if(MGE_WITH_JIT_MLIR OR MGE_BUILD_IMPERATIVE_RT)
# generate param_defs.td
set(MGE_GENFILE_DIR ${PROJECT_BINARY_DIR}/src/genfiles)
set(MGE_GEN_IR_DIR ${PROJECT_BINARY_DIR}/src/core/include/megbrain/ir)
set(OPR_PARAM_DEFS_SRCS ${MGE_GENFILE_DIR}/opr_param_defs.py)
set(OPR_PARAM_DEFS_SCRIPT ${PROJECT_SOURCE_DIR}/dnn/scripts/gen_tablegen.py)
set(OPR_PARAM_DEFS_OUT ${MGE_GEN_IR_DIR}/param_defs.td)
file(COPY ${PROJECT_SOURCE_DIR}/dnn/scripts/opr_param_defs.py
DESTINATION ${MGE_GENFILE_DIR})
file(READ ${PROJECT_SOURCE_DIR}/tools/param_defs/mgb_opr_param_defs.py CONTENTS)
file(APPEND ${OPR_PARAM_DEFS_SRCS} ${CONTENTS})
file(MAKE_DIRECTORY ${MGE_GEN_IR_DIR})
add_custom_command(
OUTPUT ${OPR_PARAM_DEFS_OUT}
COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${OPR_PARAM_DEFS_SCRIPT}
${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_OUT}
DEPENDS ${PROJECT_SOURCE_DIR}/dnn/scripts/opr_param_defs.py
${PROJECT_SOURCE_DIR}/tools/param_defs/mgb_opr_param_defs.py
${OPR_PARAM_DEFS_SCRIPT}
VERBATIM)
# mlir tblgen sources
set(MGE_IR_DIR ${PROJECT_SOURCE_DIR}/src/core/include/megbrain/ir)
set(MGE_IR_INCLUDE_DIRS ${MLIR_LLVM_INCLUDE_DIR} ${MGE_IR_DIR} ${MGE_GEN_IR_DIR})
list(TRANSFORM MGE_IR_INCLUDE_DIRS PREPEND "-I")
file(GLOB_RECURSE MGE_IR_TDS ${MGE_IR_DIR}/*.td)
add_custom_target(param_defs_tblgen DEPENDS ${OPR_PARAM_DEFS_OUT})
endif()
if(MGE_WITH_DISTRIBUTED)
set(MEGRAY_WITH_NCCL
${MGE_WITH_CUDA}
CACHE BOOL "Override MegRay option" FORCE)
set(MEGRAY_WITH_SHM
${MGE_WITH_CUDA}
CACHE BOOL "Override MegRay option" FORCE)
set(MEGRAY_WITH_RCCL
${MGE_WITH_ROCM}
CACHE BOOL "Override MegRay option" FORCE)
set(MEGRAY_WITH_CNCL
${MGE_WITH_CAMBRICON}
CACHE BOOL "Override MegRay option" FORCE)
set(MEGRAY_CUDA_GENCODE
${MGE_CUDA_GENCODE}
CACHE STRING "Overwrite MegRay CUDA -gencode specifications" FORCE)
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/MegRay)
endif()
add_subdirectory(src)
if(MGE_BUILD_IMPERATIVE_RT)
add_subdirectory(imperative)
message(STATUS "Enable imperative python wrapper runtime")
endif()
if(MGE_WITH_TEST AND MGE_ENABLE_RTTI)
add_subdirectory(test)
endif()
if(TARGET _imperative_rt)
add_custom_target(
develop
COMMAND
${CMAKE_COMMAND} -E create_symlink
${CMAKE_CURRENT_BINARY_DIR}/imperative/python/${PACKAGE_NAME}/core/$<TARGET_FILE_NAME:${MODULE_NAME}>
${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/core/$<TARGET_FILE_NAME:${MODULE_NAME}>
COMMAND
${CMAKE_COMMAND} -E create_symlink
${CMAKE_CURRENT_BINARY_DIR}/imperative/python/${PACKAGE_NAME}/version.py
${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/version.py
COMMAND
${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/src/custom/include
${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/core/include
COMMAND ${CMAKE_COMMAND} -E make_directory
${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/core/lib
COMMAND
${CMAKE_COMMAND} -E create_symlink
${CMAKE_CURRENT_BINARY_DIR}/src/$<TARGET_FILE_NAME:${MGE_SHARED_LIB}>
${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/core/lib/$<TARGET_FILE_NAME:${MGE_SHARED_LIB}>
DEPENDS ${develop_depends}
VERBATIM)
add_dependencies(develop _imperative_rt)
# generate stub file for _imperative_rt
execute_process(
COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} -c
"import mypy.version; assert mypy.version.__version__ >= '0.982'"
RESULT_VARIABLE NOT_HAVING_MYPY_STUBGEN)
if(NOT ${NOT_HAVING_MYPY_STUBGEN})
add_custom_command(
TARGET develop
POST_BUILD
COMMAND
${PYTHON3_EXECUTABLE_WITHOUT_VERSION} -c "from mypy.stubgen import main; main()"
-p ${PACKAGE_NAME}.core.${MODULE_NAME} -o
${CMAKE_CURRENT_SOURCE_DIR}/imperative/python
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/imperative/python
VERBATIM)
endif()
endif()
# Configure and install pkg-config. Note that unlike the Config.cmake modules, this is
# not relocatable (and not really portable) because we have two dependencies without
# pkg-config descriptions: FlatBuffers and MKL-DNN
if(MGE_USE_SYSTEM_MKLDNN)
set(MGE_PKGCONFIG_LIBS_PRIVATE "-ldnnl")
endif()
if(MGE_USE_SYSTEM_OPENBLAS)
set(MGE_PKGCONFIG_LIBS_PRIVATE "${MGE_PKGCONFIG_LIBS_PRIVATE} -lopenblas")
endif()
configure_file(cmake/megengine.pc.in ${CMAKE_CURRENT_BINARY_DIR}/megengine.pc @ONLY)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/megengine.pc
DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
# Do not export targets if MGE_WITH_DISTRIBUTED is on. MegRay is not ready.
if(NOT MGE_WITH_DISTRIBUTED)
include(CMakePackageConfigHelpers)
set(MGE_INSTALL_CMAKEDIR ${CMAKE_INSTALL_LIBDIR}/cmake/MegEngine)
configure_package_config_file(
cmake/MegEngineConfig.cmake.in ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfig.cmake
INSTALL_DESTINATION ${MGE_INSTALL_CMAKEDIR})
write_basic_package_version_file(
${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfigVersion.cmake
VERSION ${MGB_VER_STRING}
COMPATIBILITY SameMajorVersion)
install(EXPORT ${MGE_EXPORT_TARGETS} DESTINATION ${MGE_INSTALL_CMAKEDIR})
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfig.cmake
${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfigVersion.cmake
DESTINATION ${MGE_INSTALL_CMAKEDIR})
endif()
if(MGE_WITH_JIT_MLIR)
add_subdirectory(tools/mlir/mgb-opt)
add_subdirectory(tools/mlir/mgb-file-check)
endif()
if(MGE_WITH_CUDA
AND MGE_CUDA_USE_STATIC
AND ("${CUDNN_VERSION}" VERSION_GREATER "8.0.0" OR "${CUDNN_VERSION}" VERSION_EQUAL
"8.0.0")
AND (NOT MGE_WITH_CUDNN_SHARED))
message(
WARNING
"Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" "
)
message(
WARNING
"Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" "
)
message(
WARNING
"Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" "
)
endif()
if(MGE_WITH_LITE)
add_subdirectory(lite)
endif()
if(ANDROID)
message(
WARNING
"MegEngine project use thread_local, if you want to deploy MegEngine at dlopen/dlclose scene, please build with c++_shared by -DANDROID_STL=c++_shared, detail at https://github.com/android-ndk/ndk/issues/789 for example: EXTRA_CMAKE_ARGS=\" -DANDROID_STL=c++_shared\" ./scripts/cmake-build/cross_build_android_arm_inference.sh "
)
endif()
C++
1
https://gitee.com/MegEngine/MegEngine.git
git@gitee.com:MegEngine/MegEngine.git
MegEngine
MegEngine
MegEngine
master

搜索帮助