From 6e8cad3b0ee671b643a5c21107b9d5a7d90e3868 Mon Sep 17 00:00:00 2001 From: c00420053 Date: Mon, 28 Nov 2022 17:48:11 +0800 Subject: [PATCH] support tf_adapter overflow set inf-nan mode --- inc/external/acl/acl_rt.h | 48 +++++++++++++++++++ .../npu_bridge/estimator/npu/npu_plugin.py | 4 ++ tf_adapter/swig/ge_plugin.i | 8 ++++ .../depends/ascendcl/src/ascendcl_stub.cc | 19 ++++++++ .../tests/st/util/testcase/ge_plugin_test.cc | 12 ++++- .../tests/ut/util/testcase/ge_plugin_test.cc | 12 ++++- tf_adapter/util/ge_plugin.cc | 25 +++++++++- tf_adapter/util/npu_plugin.h | 3 ++ .../npu_device/core/npu_wrapper.cpp | 21 ++++++++ tf_adapter_2.x/python/npu_device/__init__.py | 2 + .../python/npu_device/npu_device.py | 8 ++++ tf_adapter_2.x/tests/st/adapter2_st.py | 9 +++- tf_adapter_2.x/tests/stub/acl_stub.cpp | 23 ++++++++- 13 files changed, 189 insertions(+), 5 deletions(-) diff --git a/inc/external/acl/acl_rt.h b/inc/external/acl/acl_rt.h index 7d6455b16..38dfc2528 100644 --- a/inc/external/acl/acl_rt.h +++ b/inc/external/acl/acl_rt.h @@ -87,6 +87,12 @@ typedef enum aclrtGroupAttr { ACL_GROUP_GROUPID_INT } aclrtGroupAttr; +typedef enum aclrtFloatOverflowMode { + ACL_RT_OVERFLOW_MODE_SATURATION = 0, + ACL_RT_OVERFLOW_MODE_INFNAN, + ACL_RT_OVERFLOW_MODE_UNDEF, +} aclrtFloatOverflowMode; + typedef struct tagRtGroupInfo aclrtGroupInfo; typedef struct rtExceptionInfo aclrtExceptionInfo; @@ -1028,6 +1034,48 @@ ACL_FUNC_VISIBILITY aclError aclrtGetMemInfo(aclrtMemAttr attr, size_t *free, si */ ACL_FUNC_VISIBILITY aclError aclrtSetOpWaitTimeout(uint32_t timeout); +/** + * @ingroup AscendCL + * @brief enable or disable overflow switch on some stream + * @param stream [IN] set overflow switch on this stream + * @param flag [IN] 0 : disable 1 : enable + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtSetStreamOverflowSwitch(aclrtStream stream, uint32_t flag); + +/** + * @ingroup AscendCL + * @brief get overflow switch on some stream + * @param stream [IN] get overflow switch on this stream + * @param flag [OUT] current overflow switch, 0 : disable others : enable + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtGetStreamOverflowSwitch(aclrtStream stream, uint32_t *flag); + +/** + * @ingroup AscendCL + * @brief set saturation mode + * @param mode [IN] target saturation mode + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtSetDeviceSatMode(aclrtFloatOverflowMode mode); + +/** + * @ingroup AscendCL + * @brief get saturation mode + * @param mode [OUT] get saturation mode + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtGetDeviceSatMode(aclrtFloatOverflowMode *mode); + #ifdef __cplusplus } #endif diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py index 63ad87065..2c5d9a107 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py @@ -226,3 +226,7 @@ def get_rdma_cache(data_type, shape, name="rdma_w"): with npu_scope.npu_mem_type_scope(): return variable_scope.get_variable(name=name, shape=shape, dtype=data_type, initializer=init_ops.zeros_initializer()) + + +def set_device_sat_mode(mode): + tf_adapter.SetDeviceSatMode(mode) diff --git a/tf_adapter/swig/ge_plugin.i b/tf_adapter/swig/ge_plugin.i index bb666ab62..61b5ade11 100644 --- a/tf_adapter/swig/ge_plugin.i +++ b/tf_adapter/swig/ge_plugin.i @@ -51,6 +51,10 @@ extern int32_t RdmaInitAndRegister(const std::vector &var_info, extern int32_t GetVarAddrAndSize(const std::string &var_name, uint64_t &base_addr, uint64_t &var_size); extern int32_t MallocSharedMem(const ge::TensorInfo &tensor_info, uint64_t &dev_addr, uint64_t &memory_size); + +extern int32_t SetDeviceSatMode(uint32_t mode); + +extern int32_t GetDeviceSatMode(); %} %template(var_info_vec) std::vector; @@ -120,3 +124,7 @@ extern int32_t RdmaInitAndRegister(const std::vector &var_info, extern int32_t GetVarAddrAndSize(const std::string &var_name, uint64_t &base_addr, uint64_t &var_size); extern int32_t MallocSharedMem(const ge::TensorInfo &tensor_info, uint64_t &dev_addr, uint64_t &memory_size); + +extern int32_t SetDeviceSatMode(uint32_t mode); + +extern int32_t GetDeviceSatMode(); diff --git a/tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.cc b/tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.cc index 277d68aec..197aa4c33 100644 --- a/tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.cc +++ b/tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.cc @@ -24,6 +24,8 @@ #include "tf_adapter/common/adapter_logger.h" namespace { + constexpr uint32_t kDeviceSatModeLimit = 2U; + std::uint32_t deviceSatMode = 2U; std::mutex aclChannleMutex; std::map aclChannleMap; std::map aclDataTypeStrMap = @@ -592,3 +594,20 @@ aclError aclmdlExecuteAsync(uint32_t modelId, const aclmdlDataset *inputs, aclmd } return ACL_SUCCESS; } + +aclError aclrtSetDeviceSatMode(aclrtFloatOverflowMode mode) { + if (mode != ACL_RT_OVERFLOW_MODE_SATURATION && mode != ACL_RT_OVERFLOW_MODE_INFNAN) { + deviceSatMode = 2U; + return ACL_ERROR_INVALID_PARAM; + } + deviceSatMode = static_cast(mode); + return ACL_SUCCESS; +} + +aclError aclrtGetDeviceSatMode(aclrtFloatOverflowMode *mode) { + if (deviceSatMode >= kDeviceSatModeLimit) { + return ACL_ERROR_FAILURE; + } + *mode = static_cast(deviceSatMode); + return ACL_SUCCESS; +} diff --git a/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc b/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc index 754faf743..b53da1800 100644 --- a/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc +++ b/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc @@ -127,6 +127,16 @@ TEST_F(GePluginTest, MallocSharedMemOKTest) { int32_t ret = MallocSharedMem(tensor_info, dev_addr, memory_size); EXPECT_EQ(ret, 0); } +TEST_F(GePluginTest, SetDeviceSatModeTest) { + uint32_t mode = 1U; + int32_t ret = SetDeviceSatMode(mode); + EXPECT_EQ(ret, 0); + EXPECT_EQ(GetDeviceSatMode(), 1); + mode = 2U; + ret = SetDeviceSatMode(mode); + EXPECT_EQ(ret, -1); + EXPECT_EQ(GetDeviceSatMode(), -1); +} TEST_F(GePluginTest, NpuCloseTest) { std::map init_options; init_options["ge.jobType"] = "1"; @@ -159,4 +169,4 @@ TEST_F(GePluginTest, RdmaInitAndRegisterOKTest) { EXPECT_EQ(ret, 0); } } -} // end tensorflow \ No newline at end of file +} // end tensorflow diff --git a/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc b/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc index bb0bf3c4e..7b1bbba00 100644 --- a/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc +++ b/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc @@ -122,6 +122,16 @@ TEST_F(GePluginTest, MallocSharedMemOKTest) { int32_t ret = MallocSharedMem(tensor_info, dev_addr, memory_size); EXPECT_EQ(ret, 0); } +TEST_F(GePluginTest, SetDeviceSatModeTest) { + uint64_t mode = 1U; + int32_t ret = SetDeviceSatMode(mode); + EXPECT_EQ(ret, 0); + EXPECT_EQ(GetDeviceSatMode(), 1); + mode = 2U; + ret = SetDeviceSatMode(mode); + EXPECT_EQ(ret, -1); + EXPECT_EQ(GetDeviceSatMode(), -1); +} TEST_F(GePluginTest, NpuCloseTest) { std::map init_options; init_options["ge.jobType"] = "1"; @@ -155,4 +165,4 @@ TEST_F(GePluginTest, RdmaInitAndRegisterOKTest) { } } -} // end tensorflow \ No newline at end of file +} // end tensorflow diff --git a/tf_adapter/util/ge_plugin.cc b/tf_adapter/util/ge_plugin.cc index cd8c8e5d7..86532ef35 100644 --- a/tf_adapter/util/ge_plugin.cc +++ b/tf_adapter/util/ge_plugin.cc @@ -27,6 +27,7 @@ #include "tf_adapter/common/adapter_logger.h" #include "tf_adapter/common/common.h" #include "tf_adapter/util/npu_attrs.h" +#include "acl/acl_rt.h" #include "tf_adapter/util/npu_plugin.h" #include "aoe_tuning_api.h" using AoeFinalizeFunc = Aoe::AoeStatus (*)(); @@ -478,4 +479,26 @@ int32_t MallocSharedMem(const ge::TensorInfo &tensor_info, uint64_t &dev_addr, u return 0; } -std::atomic_int GePlugin::graph_counter_ = {0}; \ No newline at end of file +int32_t SetDeviceSatMode(uint32_t mode) { + aclError ret = aclrtSetDeviceSatMode(aclrtFloatOverflowMode(mode)); + if (ret != ACL_SUCCESS) { + ADP_LOG(ERROR) << "[GePlugin] set device sat mode failed, ret : " << ToString(ret); + LOG(ERROR) << "[GePlugin] set device sat mode failed, ret : " << ToString(ret); + return -1; + } + ADP_LOG(INFO) << "[GePlugin] set device sat mode success."; + return 0; +} + +int32_t GetDeviceSatMode() { + aclrtFloatOverflowMode floatOverflowMode = ACL_RT_OVERFLOW_MODE_UNDEF; + aclError ret = aclrtGetDeviceSatMode(&floatOverflowMode); + if (ret != ACL_SUCCESS) { + ADP_LOG(ERROR) << "[GePlugin] get device sat mode failed, ret : " << ToString(ret); + LOG(ERROR) << "[GePlugin] get device sat mode failed, ret : " << ToString(ret); + return -1; + } + ADP_LOG(INFO) << "[GePlugin] get device sat mode success."; + return static_cast(floatOverflowMode); +} +std::atomic_int GePlugin::graph_counter_ = {0}; diff --git a/tf_adapter/util/npu_plugin.h b/tf_adapter/util/npu_plugin.h index c2a4f2fbd..a76ea92b9 100644 --- a/tf_adapter/util/npu_plugin.h +++ b/tf_adapter/util/npu_plugin.h @@ -50,4 +50,7 @@ int32_t GetVarAddrAndSize(const std::string &var_name, uint64_t &base_addr, uint int32_t MallocSharedMem(const ge::TensorInfo &tensor_info, uint64_t &dev_addr, uint64_t &memory_size); +int32_t SetDeviceSatMode(uint32_t mode); + +int32_t GetDeviceSatMode(); #endif // TENSORFLOW_NPU_PLUGIN_H_ diff --git a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp index 9d50a7687..4eee72f63 100644 --- a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp +++ b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp @@ -40,6 +40,7 @@ #include "framework/omg/parser/parser_api.h" #include "ge/ge_api.h" +#include "acl/acl_rt.h" #include "npu_aoe.h" #include "npu_device_register.h" #include "npu_global.h" @@ -119,6 +120,7 @@ const std::map kConfigurableOptions = { namespace { std::unordered_set npu_specify_ops_cache; +constexpr uint32_t kDeviceSatModeLimit = 2U; } namespace npu { void ParseGlobalOptions(int device_index, const std::map &user_options, @@ -284,6 +286,25 @@ PYBIND11_MODULE(_npu_device_backends, m) { << ", it will take effect in the next training loop"; }); + (void)m.def("SetDeviceSatMode", [](uint32_t mode) { + if (mode > kDeviceSatModeLimit) { + LOG(ERROR) << "overflow mode is unvalid" << mode; + return; + } + aclrtSetDeviceSatMode(aclrtFloatOverflowMode(mode)); + }); + + (void)m.def("GetDeviceSatMode", []() -> std::int32_t { + aclrtFloatOverflowMode mode = ACL_RT_OVERFLOW_MODE_UNDEF; + aclError ret = aclrtGetDeviceSatMode(&mode); + if (ret != ACL_SUCCESS) { + LOG(ERROR) << "get device sat mode failed"; + return -1; + } + LOG(INFO) << "get deviceSatMode success"; + return static_cast(mode); + }); + (void)m.def("RunContextOptionsSetMemoryOptimizeOptions", &RunContextOptionsSetMemoryOptimizeOptions); (void)m.def("CleanRunContextOptions", &CleanRunContextOptions); (void)m.def("RunContextOptionsSetGraphParallelOptions", &RunContextOptionsSetGraphParallelOptions); diff --git a/tf_adapter_2.x/python/npu_device/__init__.py b/tf_adapter_2.x/python/npu_device/__init__.py index 99207552b..1089a95ec 100644 --- a/tf_adapter_2.x/python/npu_device/__init__.py +++ b/tf_adapter_2.x/python/npu_device/__init__.py @@ -21,6 +21,8 @@ from npu_device.npu_device import gen_npu_ops from npu_device.npu_device import global_options from npu_device.npu_device import set_npu_loop_size from npu_device.npu_device import npu_run_context +from npu_device.npu_device import set_device_sat_mode +from npu_device.npu_device import get_device_sat_mode from npu_device.utils.scope import keep_dtype_scope from npu_device.utils.scope import npu_recompute_scope diff --git a/tf_adapter_2.x/python/npu_device/npu_device.py b/tf_adapter_2.x/python/npu_device/npu_device.py index d3412cd74..472a81020 100644 --- a/tf_adapter_2.x/python/npu_device/npu_device.py +++ b/tf_adapter_2.x/python/npu_device/npu_device.py @@ -64,6 +64,14 @@ def set_npu_loop_size(loop_size): _npu_device_backends.SetNpuLoopSize(loop_size) +def set_device_sat_mode(mode): + _npu_device_backends.SetDeviceSatMode(mode) + + +def get_device_sat_mode(): + return _npu_device_backends.GetDeviceSatMode() + + _global_options = None _global_options_lock = threading.Lock() diff --git a/tf_adapter_2.x/tests/st/adapter2_st.py b/tf_adapter_2.x/tests/st/adapter2_st.py index 55ea54778..f107b51ec 100644 --- a/tf_adapter_2.x/tests/st/adapter2_st.py +++ b/tf_adapter_2.x/tests/st/adapter2_st.py @@ -23,7 +23,8 @@ os.environ['ASCEND_OPP_PATH'] = 'non-existed-path' import npu_device from npu_device.npu_device import stupid_repeat - +from npu_device.npu_device import set_device_sat_mode +from npu_device.npu_device import get_device_sat_mode import unittest import tensorflow as tf from tensorflow.python.eager import context @@ -70,6 +71,12 @@ def foo_cpu_add_(v): class Adapter2St(unittest.TestCase): + def test_set_device_sat_mode(self): + set_device_sat_mode(2) + self.assertTrue(get_device_sat_mode(), -1); + set_device_sat_mode(1) + self.assertTrue(get_device_sat_mode(), 1); + def test_mix_resource(self): with context.device("/job:localhost/replica:0/task:0/device:CPU:0"): x = tf.Variable(1) diff --git a/tf_adapter_2.x/tests/stub/acl_stub.cpp b/tf_adapter_2.x/tests/stub/acl_stub.cpp index 9f6845315..8d1c7393b 100644 --- a/tf_adapter_2.x/tests/stub/acl_stub.cpp +++ b/tf_adapter_2.x/tests/stub/acl_stub.cpp @@ -23,6 +23,11 @@ limitations under the License. #include "acl/acl_rt.h" #include "acl/acl_tdt.h" +namespace { +const uint32_t kDeviceSatModeLimit = 2U; +std::uint32_t deviceSatMode = 2U; +} + struct aclopAttr {}; struct aclDataBuffer {}; struct aclTensorDesc {}; @@ -197,6 +202,22 @@ aclError acltdtReceiveTensor(const acltdtChannelHandle *handle, acltdtDataset *d return ACL_ERROR_NONE; } +aclError aclrtSetDeviceSatMode(aclrtFloatOverflowMode mode) { + if (mode != ACL_RT_OVERFLOW_MODE_SATURATION && mode != ACL_RT_OVERFLOW_MODE_INFNAN) { + deviceSatMode = 2U; + return ACL_ERROR_INVALID_PARAM; + } + deviceSatMode = mode; + return ACL_ERROR_NONE; +} + +aclError aclrtGetDeviceSatMode(aclrtFloatOverflowMode *mode) { + if (deviceSatMode >= kDeviceSatModeLimit) { + return ACL_ERROR_FAILURE; + } + *mode = aclrtFloatOverflowMode(deviceSatMode); + return ACL_ERROR_NONE; +} #ifdef __cplusplus } -#endif \ No newline at end of file +#endif -- Gitee