diff --git a/inc/external/acl/acl_rt.h b/inc/external/acl/acl_rt.h index 7d6455b164380208c0570449b1bffddf56495440..38dfc2528f93c8b891d759653c286aa6ef959b68 100644 --- a/inc/external/acl/acl_rt.h +++ b/inc/external/acl/acl_rt.h @@ -87,6 +87,12 @@ typedef enum aclrtGroupAttr { ACL_GROUP_GROUPID_INT } aclrtGroupAttr; +typedef enum aclrtFloatOverflowMode { + ACL_RT_OVERFLOW_MODE_SATURATION = 0, + ACL_RT_OVERFLOW_MODE_INFNAN, + ACL_RT_OVERFLOW_MODE_UNDEF, +} aclrtFloatOverflowMode; + typedef struct tagRtGroupInfo aclrtGroupInfo; typedef struct rtExceptionInfo aclrtExceptionInfo; @@ -1028,6 +1034,48 @@ ACL_FUNC_VISIBILITY aclError aclrtGetMemInfo(aclrtMemAttr attr, size_t *free, si */ ACL_FUNC_VISIBILITY aclError aclrtSetOpWaitTimeout(uint32_t timeout); +/** + * @ingroup AscendCL + * @brief enable or disable overflow switch on some stream + * @param stream [IN] set overflow switch on this stream + * @param flag [IN] 0 : disable 1 : enable + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtSetStreamOverflowSwitch(aclrtStream stream, uint32_t flag); + +/** + * @ingroup AscendCL + * @brief get overflow switch on some stream + * @param stream [IN] get overflow switch on this stream + * @param flag [OUT] current overflow switch, 0 : disable others : enable + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtGetStreamOverflowSwitch(aclrtStream stream, uint32_t *flag); + +/** + * @ingroup AscendCL + * @brief set saturation mode + * @param mode [IN] target saturation mode + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtSetDeviceSatMode(aclrtFloatOverflowMode mode); + +/** + * @ingroup AscendCL + * @brief get saturation mode + * @param mode [OUT] get saturation mode + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtGetDeviceSatMode(aclrtFloatOverflowMode *mode); + #ifdef __cplusplus } #endif diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py index 63ad87065c42ab585cad7260b075e72dab58e7a7..2c5d9a107c0391ea3694da205016599a76338349 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py @@ -226,3 +226,7 @@ def get_rdma_cache(data_type, shape, name="rdma_w"): with npu_scope.npu_mem_type_scope(): return variable_scope.get_variable(name=name, shape=shape, dtype=data_type, initializer=init_ops.zeros_initializer()) + + +def set_device_sat_mode(mode): + tf_adapter.SetDeviceSatMode(mode) diff --git a/tf_adapter/swig/ge_plugin.i b/tf_adapter/swig/ge_plugin.i index bb666ab62d200402c694432956660a86b226bc43..61b5ade1114b723e73a3f7b03550005acb0e29f4 100644 --- a/tf_adapter/swig/ge_plugin.i +++ b/tf_adapter/swig/ge_plugin.i @@ -51,6 +51,10 @@ extern int32_t RdmaInitAndRegister(const std::vector &var_info, extern int32_t GetVarAddrAndSize(const std::string &var_name, uint64_t &base_addr, uint64_t &var_size); extern int32_t MallocSharedMem(const ge::TensorInfo &tensor_info, uint64_t &dev_addr, uint64_t &memory_size); + +extern int32_t SetDeviceSatMode(uint32_t mode); + +extern int32_t GetDeviceSatMode(); %} %template(var_info_vec) std::vector; @@ -120,3 +124,7 @@ extern int32_t RdmaInitAndRegister(const std::vector &var_info, extern int32_t GetVarAddrAndSize(const std::string &var_name, uint64_t &base_addr, uint64_t &var_size); extern int32_t MallocSharedMem(const ge::TensorInfo &tensor_info, uint64_t &dev_addr, uint64_t &memory_size); + +extern int32_t SetDeviceSatMode(uint32_t mode); + +extern int32_t GetDeviceSatMode(); diff --git a/tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.cc b/tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.cc index 277d68aecd22a777b1bdc04cc54e4eb50893d5cc..197aa4c3343e3617a1cda345e05c3c0b5e835c84 100644 --- a/tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.cc +++ b/tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.cc @@ -24,6 +24,8 @@ #include "tf_adapter/common/adapter_logger.h" namespace { + constexpr uint32_t kDeviceSatModeLimit = 2U; + std::uint32_t deviceSatMode = 2U; std::mutex aclChannleMutex; std::map aclChannleMap; std::map aclDataTypeStrMap = @@ -592,3 +594,20 @@ aclError aclmdlExecuteAsync(uint32_t modelId, const aclmdlDataset *inputs, aclmd } return ACL_SUCCESS; } + +aclError aclrtSetDeviceSatMode(aclrtFloatOverflowMode mode) { + if (mode != ACL_RT_OVERFLOW_MODE_SATURATION && mode != ACL_RT_OVERFLOW_MODE_INFNAN) { + deviceSatMode = 2U; + return ACL_ERROR_INVALID_PARAM; + } + deviceSatMode = static_cast(mode); + return ACL_SUCCESS; +} + +aclError aclrtGetDeviceSatMode(aclrtFloatOverflowMode *mode) { + if (deviceSatMode >= kDeviceSatModeLimit) { + return ACL_ERROR_FAILURE; + } + *mode = static_cast(deviceSatMode); + return ACL_SUCCESS; +} diff --git a/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc b/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc index 754faf743f5a7cd95f592f1735e046b006c0b6d4..b53da180075ee08ffff95965643f3285439798e5 100644 --- a/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc +++ b/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc @@ -127,6 +127,16 @@ TEST_F(GePluginTest, MallocSharedMemOKTest) { int32_t ret = MallocSharedMem(tensor_info, dev_addr, memory_size); EXPECT_EQ(ret, 0); } +TEST_F(GePluginTest, SetDeviceSatModeTest) { + uint32_t mode = 1U; + int32_t ret = SetDeviceSatMode(mode); + EXPECT_EQ(ret, 0); + EXPECT_EQ(GetDeviceSatMode(), 1); + mode = 2U; + ret = SetDeviceSatMode(mode); + EXPECT_EQ(ret, -1); + EXPECT_EQ(GetDeviceSatMode(), -1); +} TEST_F(GePluginTest, NpuCloseTest) { std::map init_options; init_options["ge.jobType"] = "1"; @@ -159,4 +169,4 @@ TEST_F(GePluginTest, RdmaInitAndRegisterOKTest) { EXPECT_EQ(ret, 0); } } -} // end tensorflow \ No newline at end of file +} // end tensorflow diff --git a/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc b/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc index bb0bf3c4ee5e7079d7f5c8ba5f69520d4027df43..7b1bbba005ac3a7f7ffcb39eeba1c904a1ae1b06 100644 --- a/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc +++ b/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc @@ -122,6 +122,16 @@ TEST_F(GePluginTest, MallocSharedMemOKTest) { int32_t ret = MallocSharedMem(tensor_info, dev_addr, memory_size); EXPECT_EQ(ret, 0); } +TEST_F(GePluginTest, SetDeviceSatModeTest) { + uint64_t mode = 1U; + int32_t ret = SetDeviceSatMode(mode); + EXPECT_EQ(ret, 0); + EXPECT_EQ(GetDeviceSatMode(), 1); + mode = 2U; + ret = SetDeviceSatMode(mode); + EXPECT_EQ(ret, -1); + EXPECT_EQ(GetDeviceSatMode(), -1); +} TEST_F(GePluginTest, NpuCloseTest) { std::map init_options; init_options["ge.jobType"] = "1"; @@ -155,4 +165,4 @@ TEST_F(GePluginTest, RdmaInitAndRegisterOKTest) { } } -} // end tensorflow \ No newline at end of file +} // end tensorflow diff --git a/tf_adapter/util/ge_plugin.cc b/tf_adapter/util/ge_plugin.cc index cd8c8e5d7654f0937bcf9d4de8ef357519ec1a10..86532ef359f59b358da66dd5c56a6bcf457e4579 100644 --- a/tf_adapter/util/ge_plugin.cc +++ b/tf_adapter/util/ge_plugin.cc @@ -27,6 +27,7 @@ #include "tf_adapter/common/adapter_logger.h" #include "tf_adapter/common/common.h" #include "tf_adapter/util/npu_attrs.h" +#include "acl/acl_rt.h" #include "tf_adapter/util/npu_plugin.h" #include "aoe_tuning_api.h" using AoeFinalizeFunc = Aoe::AoeStatus (*)(); @@ -478,4 +479,26 @@ int32_t MallocSharedMem(const ge::TensorInfo &tensor_info, uint64_t &dev_addr, u return 0; } -std::atomic_int GePlugin::graph_counter_ = {0}; \ No newline at end of file +int32_t SetDeviceSatMode(uint32_t mode) { + aclError ret = aclrtSetDeviceSatMode(aclrtFloatOverflowMode(mode)); + if (ret != ACL_SUCCESS) { + ADP_LOG(ERROR) << "[GePlugin] set device sat mode failed, ret : " << ToString(ret); + LOG(ERROR) << "[GePlugin] set device sat mode failed, ret : " << ToString(ret); + return -1; + } + ADP_LOG(INFO) << "[GePlugin] set device sat mode success."; + return 0; +} + +int32_t GetDeviceSatMode() { + aclrtFloatOverflowMode floatOverflowMode = ACL_RT_OVERFLOW_MODE_UNDEF; + aclError ret = aclrtGetDeviceSatMode(&floatOverflowMode); + if (ret != ACL_SUCCESS) { + ADP_LOG(ERROR) << "[GePlugin] get device sat mode failed, ret : " << ToString(ret); + LOG(ERROR) << "[GePlugin] get device sat mode failed, ret : " << ToString(ret); + return -1; + } + ADP_LOG(INFO) << "[GePlugin] get device sat mode success."; + return static_cast(floatOverflowMode); +} +std::atomic_int GePlugin::graph_counter_ = {0}; diff --git a/tf_adapter/util/npu_plugin.h b/tf_adapter/util/npu_plugin.h index c2a4f2fbd42ff737728b5fd378f844e97b6496d1..a76ea92b9a94b7d096130388515d58915d710186 100644 --- a/tf_adapter/util/npu_plugin.h +++ b/tf_adapter/util/npu_plugin.h @@ -50,4 +50,7 @@ int32_t GetVarAddrAndSize(const std::string &var_name, uint64_t &base_addr, uint int32_t MallocSharedMem(const ge::TensorInfo &tensor_info, uint64_t &dev_addr, uint64_t &memory_size); +int32_t SetDeviceSatMode(uint32_t mode); + +int32_t GetDeviceSatMode(); #endif // TENSORFLOW_NPU_PLUGIN_H_ diff --git a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp index 9d50a7687d4529b076e0e44b4156823cb48f4cf9..4eee72f637d74c099f4a1a5c42f11ecd8413bdb6 100644 --- a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp +++ b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp @@ -40,6 +40,7 @@ #include "framework/omg/parser/parser_api.h" #include "ge/ge_api.h" +#include "acl/acl_rt.h" #include "npu_aoe.h" #include "npu_device_register.h" #include "npu_global.h" @@ -119,6 +120,7 @@ const std::map kConfigurableOptions = { namespace { std::unordered_set npu_specify_ops_cache; +constexpr uint32_t kDeviceSatModeLimit = 2U; } namespace npu { void ParseGlobalOptions(int device_index, const std::map &user_options, @@ -284,6 +286,25 @@ PYBIND11_MODULE(_npu_device_backends, m) { << ", it will take effect in the next training loop"; }); + (void)m.def("SetDeviceSatMode", [](uint32_t mode) { + if (mode > kDeviceSatModeLimit) { + LOG(ERROR) << "overflow mode is unvalid" << mode; + return; + } + aclrtSetDeviceSatMode(aclrtFloatOverflowMode(mode)); + }); + + (void)m.def("GetDeviceSatMode", []() -> std::int32_t { + aclrtFloatOverflowMode mode = ACL_RT_OVERFLOW_MODE_UNDEF; + aclError ret = aclrtGetDeviceSatMode(&mode); + if (ret != ACL_SUCCESS) { + LOG(ERROR) << "get device sat mode failed"; + return -1; + } + LOG(INFO) << "get deviceSatMode success"; + return static_cast(mode); + }); + (void)m.def("RunContextOptionsSetMemoryOptimizeOptions", &RunContextOptionsSetMemoryOptimizeOptions); (void)m.def("CleanRunContextOptions", &CleanRunContextOptions); (void)m.def("RunContextOptionsSetGraphParallelOptions", &RunContextOptionsSetGraphParallelOptions); diff --git a/tf_adapter_2.x/python/npu_device/__init__.py b/tf_adapter_2.x/python/npu_device/__init__.py index 99207552ba1f1a416909977ec93cd6f9e595c594..1089a95ec75bd00c43c89c0dc07ff3d82d33f00c 100644 --- a/tf_adapter_2.x/python/npu_device/__init__.py +++ b/tf_adapter_2.x/python/npu_device/__init__.py @@ -21,6 +21,8 @@ from npu_device.npu_device import gen_npu_ops from npu_device.npu_device import global_options from npu_device.npu_device import set_npu_loop_size from npu_device.npu_device import npu_run_context +from npu_device.npu_device import set_device_sat_mode +from npu_device.npu_device import get_device_sat_mode from npu_device.utils.scope import keep_dtype_scope from npu_device.utils.scope import npu_recompute_scope diff --git a/tf_adapter_2.x/python/npu_device/npu_device.py b/tf_adapter_2.x/python/npu_device/npu_device.py index d3412cd74016759a7279034f3452a91a31b51bc7..472a810204e84ca0aae7473007bc514bf159d988 100644 --- a/tf_adapter_2.x/python/npu_device/npu_device.py +++ b/tf_adapter_2.x/python/npu_device/npu_device.py @@ -64,6 +64,14 @@ def set_npu_loop_size(loop_size): _npu_device_backends.SetNpuLoopSize(loop_size) +def set_device_sat_mode(mode): + _npu_device_backends.SetDeviceSatMode(mode) + + +def get_device_sat_mode(): + return _npu_device_backends.GetDeviceSatMode() + + _global_options = None _global_options_lock = threading.Lock() diff --git a/tf_adapter_2.x/tests/st/adapter2_st.py b/tf_adapter_2.x/tests/st/adapter2_st.py index 55ea54778a7f04b808f69f06450575d0b4b474a4..f107b51ec611d461bd47f7b893b9f36de848526d 100644 --- a/tf_adapter_2.x/tests/st/adapter2_st.py +++ b/tf_adapter_2.x/tests/st/adapter2_st.py @@ -23,7 +23,8 @@ os.environ['ASCEND_OPP_PATH'] = 'non-existed-path' import npu_device from npu_device.npu_device import stupid_repeat - +from npu_device.npu_device import set_device_sat_mode +from npu_device.npu_device import get_device_sat_mode import unittest import tensorflow as tf from tensorflow.python.eager import context @@ -70,6 +71,12 @@ def foo_cpu_add_(v): class Adapter2St(unittest.TestCase): + def test_set_device_sat_mode(self): + set_device_sat_mode(2) + self.assertTrue(get_device_sat_mode(), -1); + set_device_sat_mode(1) + self.assertTrue(get_device_sat_mode(), 1); + def test_mix_resource(self): with context.device("/job:localhost/replica:0/task:0/device:CPU:0"): x = tf.Variable(1) diff --git a/tf_adapter_2.x/tests/stub/acl_stub.cpp b/tf_adapter_2.x/tests/stub/acl_stub.cpp index 9f68453153ac8f4b2fc11bf013f981666c7b814d..8d1c7393b2d3553f00d3476fc71487d6deab7216 100644 --- a/tf_adapter_2.x/tests/stub/acl_stub.cpp +++ b/tf_adapter_2.x/tests/stub/acl_stub.cpp @@ -23,6 +23,11 @@ limitations under the License. #include "acl/acl_rt.h" #include "acl/acl_tdt.h" +namespace { +const uint32_t kDeviceSatModeLimit = 2U; +std::uint32_t deviceSatMode = 2U; +} + struct aclopAttr {}; struct aclDataBuffer {}; struct aclTensorDesc {}; @@ -197,6 +202,22 @@ aclError acltdtReceiveTensor(const acltdtChannelHandle *handle, acltdtDataset *d return ACL_ERROR_NONE; } +aclError aclrtSetDeviceSatMode(aclrtFloatOverflowMode mode) { + if (mode != ACL_RT_OVERFLOW_MODE_SATURATION && mode != ACL_RT_OVERFLOW_MODE_INFNAN) { + deviceSatMode = 2U; + return ACL_ERROR_INVALID_PARAM; + } + deviceSatMode = mode; + return ACL_ERROR_NONE; +} + +aclError aclrtGetDeviceSatMode(aclrtFloatOverflowMode *mode) { + if (deviceSatMode >= kDeviceSatModeLimit) { + return ACL_ERROR_FAILURE; + } + *mode = aclrtFloatOverflowMode(deviceSatMode); + return ACL_ERROR_NONE; +} #ifdef __cplusplus } -#endif \ No newline at end of file +#endif