diff --git a/inc/graphengine/inc/framework/common/string_util.h b/inc/graphengine/inc/framework/common/string_util.h index f03683633b8ecd344c9a46740461678a6e51bb97..c1216d90aae4581e650d14df679afc0dec5c95c3 100644 --- a/inc/graphengine/inc/framework/common/string_util.h +++ b/inc/graphengine/inc/framework/common/string_util.h @@ -45,18 +45,21 @@ class GE_FUNC_VISIBILITY StringUtils { public: static std::string &Ltrim(std::string &s) { #if __cplusplus >= 201103L - (void)s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int c) { return !std::isspace(c); })); + (void)s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](const int32_t c) { return std::isspace(c) == 0; })); #else - (void)s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun(std::isspace)))); + (void)s.erase(s.begin(), std::find_if(s.begin(), s.end(), + std::not1(std::ptr_fun(std::isspace)))); #endif return s; } // lint -esym(551,*) static std::string &Rtrim(std::string &s) { /*lint !e618*/ #if __cplusplus >= 201103L - (void)s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int c) { return !std::isspace(c); })); + (void)s.erase(std::find_if(s.rbegin(), s.rend(), [](const int32_t c) { return std::isspace(c) == 0; }).base(), + s.end()); #else - (void)s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun(std::isspace))).base(), s.end()); + (void)s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun(std::isspace))).base(), + s.end()); #endif return s; } diff --git a/tf_adapter/tests/st/optimizers/testcase/om_partition_subgraphs_pass_test.cc b/tf_adapter/tests/st/optimizers/testcase/om_partition_subgraphs_pass_test.cc index 38e0704299108191e82e5d966ed9b68a867a4fcd..53c4eb0e4bf5f451db5e0aaed235cbe9bb46d7c4 100644 --- a/tf_adapter/tests/st/optimizers/testcase/om_partition_subgraphs_pass_test.cc +++ b/tf_adapter/tests/st/optimizers/testcase/om_partition_subgraphs_pass_test.cc @@ -212,7 +212,7 @@ TEST_F(OmOptimizationPassTest, StringInputMaxSizeTest) { std::string target_graph = DoRunOmOptimizationPassTest(); EXPECT_EQ(target_graph, "arg_input_0_0->DecodeJpeg;DecodeJpeg->retval_DecodeJpeg_0_0"); } -TEST_F(OmOptimizationPassTest, NpuOpsIdentifierTest01) { +TEST_F(OmOptimizationPassTest, GetOppPluginVendorsTest01) { SetLogLevelForC(0); std::string opp_path = __FILE__; opp_path = opp_path.substr(0, opp_path.rfind("/") + 1) + "opp_path/"; @@ -224,7 +224,78 @@ TEST_F(OmOptimizationPassTest, NpuOpsIdentifierTest01) { system(("mkdir -p " + path_vendors).c_str()); system(("echo 'load_priority=customize,mdc,lhisi' > " + path_config).c_str()); std::vector vendors; - NpuOpsIdentifier::GetOppPluginVendors(path_config, vendors); + EXPECT_TRUE(NpuOpsIdentifier::GetOppPluginVendors(path_config, vendors)); + EXPECT_EQ(vendors.size(), 3); + EXPECT_EQ(vendors[0], "customize"); + EXPECT_EQ(vendors[1], "mdc"); + EXPECT_EQ(vendors[2], "lhisi"); + ClearLogLevelForC(); + system(("rm -rf " + opp_path).c_str()); +} +TEST_F(OmOptimizationPassTest, GetOppPluginVendorsTest02) { + SetLogLevelForC(0); + std::string opp_path = __FILE__; + opp_path = opp_path.substr(0, opp_path.rfind("/") + 1) + "opp_path/"; + setenv("ASCEND_OPP_PATH", opp_path.c_str(), 1); + std::string path_builtin = opp_path + "built-in"; + std::string path_vendors = opp_path + "vendors"; + std::string path_config = path_vendors + "/config.ini"; + system(("mkdir -p " + path_builtin).c_str()); + system(("mkdir -p " + path_vendors).c_str()); + system(("echo '' > " + path_config).c_str()); + std::vector vendors; + EXPECT_FALSE(NpuOpsIdentifier::GetOppPluginVendors(path_config, vendors)); + EXPECT_EQ(vendors.size(), 0); + ClearLogLevelForC(); + system(("rm -rf " + opp_path).c_str()); +} +TEST_F(OmOptimizationPassTest, GetOppPluginVendorsTest03) { + SetLogLevelForC(0); + std::string opp_path = __FILE__; + opp_path = opp_path.substr(0, opp_path.rfind("/") + 1) + "opp_path/"; + setenv("ASCEND_OPP_PATH", opp_path.c_str(), 1); + std::string path_builtin = opp_path + "built-in"; + std::string path_vendors = opp_path + "vendors"; + std::string path_config = path_vendors + "/config.ini"; + system(("mkdir -p " + path_builtin).c_str()); + system(("mkdir -p " + path_vendors).c_str()); + system(("echo 'load_priority' > " + path_config).c_str()); + std::vector vendors; + EXPECT_FALSE(NpuOpsIdentifier::GetOppPluginVendors(path_config, vendors)); + EXPECT_EQ(vendors.size(), 0); + ClearLogLevelForC(); + system(("rm -rf " + opp_path).c_str()); +} +TEST_F(OmOptimizationPassTest, GetOppPluginVendorsTest04) { + SetLogLevelForC(0); + std::string opp_path = __FILE__; + opp_path = opp_path.substr(0, opp_path.rfind("/") + 1) + "opp_path/"; + setenv("ASCEND_OPP_PATH", opp_path.c_str(), 1); + std::string path_builtin = opp_path + "built-in"; + std::string path_vendors = opp_path + "vendors"; + std::string path_config = path_vendors + "/config.ini"; + system(("mkdir -p " + path_builtin).c_str()); + system(("mkdir -p " + path_vendors).c_str()); + system(("rm -rf " + path_config).c_str()); + std::vector vendors; + EXPECT_FALSE(NpuOpsIdentifier::GetOppPluginVendors(path_config, vendors)); + EXPECT_EQ(vendors.size(), 0); + ClearLogLevelForC(); + system(("rm -rf " + opp_path).c_str()); +} +TEST_F(OmOptimizationPassTest, GetOppPluginVendorsTest05) { + SetLogLevelForC(0); + std::string opp_path = __FILE__; + opp_path = opp_path.substr(0, opp_path.rfind("/") + 1) + "opp_path/"; + setenv("ASCEND_OPP_PATH", opp_path.c_str(), 1); + std::string path_builtin = opp_path + "built-in"; + std::string path_vendors = opp_path + "vendors"; + std::string path_config = path_vendors + "/config.ini"; + system(("mkdir -p " + path_builtin).c_str()); + system(("mkdir -p " + path_vendors).c_str()); + system(("echo ' load_priority = customize , mdc , lhisi ' > " + path_config).c_str()); + std::vector vendors; + EXPECT_TRUE(NpuOpsIdentifier::GetOppPluginVendors(path_config, vendors)); EXPECT_EQ(vendors.size(), 3); EXPECT_EQ(vendors[0], "customize"); EXPECT_EQ(vendors[1], "mdc"); diff --git a/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc b/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc index aff1db6be2906e962335ca2b6db7f871898b51d2..1b9b91e134e14ab831987269ef67915da5515479 100644 --- a/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc +++ b/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc @@ -84,6 +84,22 @@ TEST_F(NpuAttrTest, CheckAoeMode) { EXPECT_EQ(s.ok(), false); } +TEST_F(NpuAttrTest, CheckPrecisionMode ) { + GraphOptimizationPassOptions options; + SessionOptions session_options; + session_options.config.mutable_graph_options()->mutable_optimizer_options()->set_do_function_inlining(true); + auto *custom_config = + session_options.config.mutable_graph_options()->mutable_rewrite_options()->add_custom_optimizers(); + custom_config->set_name("NpuOptimizer"); + options.session_options = &session_options; + + AttrValue precision_mode = AttrValue(); + precision_mode.set_s("force_Dp32"); + (*custom_config->mutable_parameter_map())["precision_mode"] = precision_mode; + Status s = NpuAttrs::SetNpuOptimizerAttr(options, nullptr); + EXPECT_EQ(s.ok(), false); +} + TEST_F(NpuAttrTest, GetDumpPath) { setenv("DUMP_GRAPH_PATH", "./", 1); string path = GetDumpPath(); diff --git a/tf_adapter/tests/ut/optimizers/testcase/om_partition_subgraphs_pass_test.cc b/tf_adapter/tests/ut/optimizers/testcase/om_partition_subgraphs_pass_test.cc index 38e0704299108191e82e5d966ed9b68a867a4fcd..53c4eb0e4bf5f451db5e0aaed235cbe9bb46d7c4 100644 --- a/tf_adapter/tests/ut/optimizers/testcase/om_partition_subgraphs_pass_test.cc +++ b/tf_adapter/tests/ut/optimizers/testcase/om_partition_subgraphs_pass_test.cc @@ -212,7 +212,7 @@ TEST_F(OmOptimizationPassTest, StringInputMaxSizeTest) { std::string target_graph = DoRunOmOptimizationPassTest(); EXPECT_EQ(target_graph, "arg_input_0_0->DecodeJpeg;DecodeJpeg->retval_DecodeJpeg_0_0"); } -TEST_F(OmOptimizationPassTest, NpuOpsIdentifierTest01) { +TEST_F(OmOptimizationPassTest, GetOppPluginVendorsTest01) { SetLogLevelForC(0); std::string opp_path = __FILE__; opp_path = opp_path.substr(0, opp_path.rfind("/") + 1) + "opp_path/"; @@ -224,7 +224,78 @@ TEST_F(OmOptimizationPassTest, NpuOpsIdentifierTest01) { system(("mkdir -p " + path_vendors).c_str()); system(("echo 'load_priority=customize,mdc,lhisi' > " + path_config).c_str()); std::vector vendors; - NpuOpsIdentifier::GetOppPluginVendors(path_config, vendors); + EXPECT_TRUE(NpuOpsIdentifier::GetOppPluginVendors(path_config, vendors)); + EXPECT_EQ(vendors.size(), 3); + EXPECT_EQ(vendors[0], "customize"); + EXPECT_EQ(vendors[1], "mdc"); + EXPECT_EQ(vendors[2], "lhisi"); + ClearLogLevelForC(); + system(("rm -rf " + opp_path).c_str()); +} +TEST_F(OmOptimizationPassTest, GetOppPluginVendorsTest02) { + SetLogLevelForC(0); + std::string opp_path = __FILE__; + opp_path = opp_path.substr(0, opp_path.rfind("/") + 1) + "opp_path/"; + setenv("ASCEND_OPP_PATH", opp_path.c_str(), 1); + std::string path_builtin = opp_path + "built-in"; + std::string path_vendors = opp_path + "vendors"; + std::string path_config = path_vendors + "/config.ini"; + system(("mkdir -p " + path_builtin).c_str()); + system(("mkdir -p " + path_vendors).c_str()); + system(("echo '' > " + path_config).c_str()); + std::vector vendors; + EXPECT_FALSE(NpuOpsIdentifier::GetOppPluginVendors(path_config, vendors)); + EXPECT_EQ(vendors.size(), 0); + ClearLogLevelForC(); + system(("rm -rf " + opp_path).c_str()); +} +TEST_F(OmOptimizationPassTest, GetOppPluginVendorsTest03) { + SetLogLevelForC(0); + std::string opp_path = __FILE__; + opp_path = opp_path.substr(0, opp_path.rfind("/") + 1) + "opp_path/"; + setenv("ASCEND_OPP_PATH", opp_path.c_str(), 1); + std::string path_builtin = opp_path + "built-in"; + std::string path_vendors = opp_path + "vendors"; + std::string path_config = path_vendors + "/config.ini"; + system(("mkdir -p " + path_builtin).c_str()); + system(("mkdir -p " + path_vendors).c_str()); + system(("echo 'load_priority' > " + path_config).c_str()); + std::vector vendors; + EXPECT_FALSE(NpuOpsIdentifier::GetOppPluginVendors(path_config, vendors)); + EXPECT_EQ(vendors.size(), 0); + ClearLogLevelForC(); + system(("rm -rf " + opp_path).c_str()); +} +TEST_F(OmOptimizationPassTest, GetOppPluginVendorsTest04) { + SetLogLevelForC(0); + std::string opp_path = __FILE__; + opp_path = opp_path.substr(0, opp_path.rfind("/") + 1) + "opp_path/"; + setenv("ASCEND_OPP_PATH", opp_path.c_str(), 1); + std::string path_builtin = opp_path + "built-in"; + std::string path_vendors = opp_path + "vendors"; + std::string path_config = path_vendors + "/config.ini"; + system(("mkdir -p " + path_builtin).c_str()); + system(("mkdir -p " + path_vendors).c_str()); + system(("rm -rf " + path_config).c_str()); + std::vector vendors; + EXPECT_FALSE(NpuOpsIdentifier::GetOppPluginVendors(path_config, vendors)); + EXPECT_EQ(vendors.size(), 0); + ClearLogLevelForC(); + system(("rm -rf " + opp_path).c_str()); +} +TEST_F(OmOptimizationPassTest, GetOppPluginVendorsTest05) { + SetLogLevelForC(0); + std::string opp_path = __FILE__; + opp_path = opp_path.substr(0, opp_path.rfind("/") + 1) + "opp_path/"; + setenv("ASCEND_OPP_PATH", opp_path.c_str(), 1); + std::string path_builtin = opp_path + "built-in"; + std::string path_vendors = opp_path + "vendors"; + std::string path_config = path_vendors + "/config.ini"; + system(("mkdir -p " + path_builtin).c_str()); + system(("mkdir -p " + path_vendors).c_str()); + system(("echo ' load_priority = customize , mdc , lhisi ' > " + path_config).c_str()); + std::vector vendors; + EXPECT_TRUE(NpuOpsIdentifier::GetOppPluginVendors(path_config, vendors)); EXPECT_EQ(vendors.size(), 3); EXPECT_EQ(vendors[0], "customize"); EXPECT_EQ(vendors[1], "mdc"); diff --git a/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc b/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc index cf6b30c7005601f0c9fbaad7b0aa12863223ab14..dc796242b1b1124ae18f82b45bb9b545786cb5c3 100644 --- a/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc +++ b/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc @@ -85,6 +85,22 @@ TEST_F(NpuAttrTest, CheckAoeMode) { EXPECT_EQ(s.ok(), false); } +TEST_F(NpuAttrTest, CheckPrecisionMode ) { + GraphOptimizationPassOptions options; + SessionOptions session_options; + session_options.config.mutable_graph_options()->mutable_optimizer_options()->set_do_function_inlining(true); + auto *custom_config = + session_options.config.mutable_graph_options()->mutable_rewrite_options()->add_custom_optimizers(); + custom_config->set_name("NpuOptimizer"); + options.session_options = &session_options; + + AttrValue precision_mode = AttrValue(); + precision_mode.set_s("force_Dp32"); + (*custom_config->mutable_parameter_map())["precision_mode"] = precision_mode; + Status s = NpuAttrs::SetNpuOptimizerAttr(options, nullptr); + EXPECT_EQ(s.ok(), false); +} + TEST_F(NpuAttrTest, GetDumpPath) { setenv("DUMP_GRAPH_PATH", "./", 1); string path = GetDumpPath(); diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index ad611a61b7faefe52727a2404ae4e105e1cd9863..3c88960c4540ea9a5c0deb8032682336c4cec7ba 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -30,6 +30,7 @@ #include "mmpa/mmpa_api.h" #include "tf_adapter/util/ge_plugin.h" #include "ge/ge_api.h" +#include "tf_adapter_2.x/npu_device/core/npu_micros.h" namespace tensorflow { namespace { bool kIsNewDataTransfer = true; @@ -726,7 +727,7 @@ std::map NpuAttrs::GetPassOptions(const GraphOptimizat if (dynamic_input) { if (params.count("dynamic_graph_execute_mode") > 0) { dynamic_graph_execute_mode = params.at("dynamic_graph_execute_mode").s(); - if (dynamic_graph_execute_mode != "lazy_recompile" && dynamic_graph_execute_mode != "dynamic_execute") { + if ((dynamic_graph_execute_mode != "lazy_recompile") && (dynamic_graph_execute_mode != "dynamic_execute")) { ADP_LOG(ERROR) << "dynamic_graph_execute_mode should be lazy_recompile or dynamic_execute."; LOG(FATAL) << "dynamic_graph_execute_mode should be lazy_recompile or dynamic_execute."; } @@ -1733,6 +1734,10 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options } if (params.count("precision_mode") > 0) { precision_mode = params.at("precision_mode").s(); + const static std::vector kPrecisionModeList = {"force_fp32", "allow_fp32_to_fp16", + "force_fp16", "must_keep_origin_dtype", + "allow_mix_precision", "cube_fp16in_fp32out"}; + NPU_REQUIRES_OK(CheckValueAllowed(precision_mode, kPrecisionModeList)); } else { if (static_cast(graph_run_mode)) { precision_mode = "allow_fp32_to_fp16"; @@ -1864,7 +1869,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options } if (params.count("buffer_optimize") > 0) { buffer_optimize = params.at("buffer_optimize").s(); - if (buffer_optimize != "l2_optimize" && buffer_optimize != "off_optimize") { + if ((buffer_optimize != "l2_optimize") && (buffer_optimize != "off_optimize")) { ADP_LOG(FATAL) << "buffer_optimize is valid, should be one of [l2_optimize, off_optimize]"; LOG(FATAL) << "buffer_optimize is valid, should be one of [l2_optimize, off_optimize]"; } @@ -1881,7 +1886,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options if (params.count("fusion_switch_file") > 0) { fusion_switch_file = params.at("fusion_switch_file").s(); } - if (params.count("enable_compress_weight") > 0 && params.count("compress_weight_conf") > 0) { + if ((params.count("enable_compress_weight") > 0) && (params.count("compress_weight_conf") > 0)) { ADP_LOG(FATAL) << "enable_compress_weight can not use with compress_weight_conf."; LOG(FATAL) << "enable_compress_weight can not use with compress_weight_conf."; } diff --git a/tf_adapter/util/npu_attrs.h b/tf_adapter/util/npu_attrs.h index 048bbeb3f1446df909f172cbe14c3af2cd00af70..7e73c693a2c100c8c9b0b5836d79e9a8ca823301 100644 --- a/tf_adapter/util/npu_attrs.h +++ b/tf_adapter/util/npu_attrs.h @@ -17,6 +17,9 @@ #ifndef TENSORFLOW_NPU_ATTRS_H_ #define TENSORFLOW_NPU_ATTRS_H_ +#include +#include +#include #include #include #include "ge/ge_api_types.h" @@ -58,6 +61,32 @@ class NpuAttrs { static bool GetNewDataTransferFlag(); // only use for ut/st static void SetNewDataTransferFlag(bool flag); + template + static std::string VectorToString(const std::vector &values) { + std::stringstream ss; + ss << '['; + const auto size = values.size(); + for (size_t i = 0U; i < size; ++i) { + ss << values[i]; + if (i != (size - 1U)) { + ss << ", "; + } + } + ss << ']'; + return ss.str(); + } + template + static Status CheckValueAllowed(const T &v, const std::vector &allowed_values) { + if (find(allowed_values.begin(), allowed_values.end(), v) != allowed_values.cend()) { + return Status::OK(); + } else { + std::stringstream ss; + ss << v << " is invalid, it should be one of the list:"; + ss << VectorToString(allowed_values); + return errors::InvalidArgument(ss.str()); + } + } + private: static bool CheckIsNewDataTransfer(); static std::map turn_on_tdt_info_; diff --git a/tf_adapter/util/npu_ops_identifier.cc b/tf_adapter/util/npu_ops_identifier.cc index e2b504552f53c30d031f8b99c8df6ac952482467..ada3082f303578375d1274dae606479f1709343d 100644 --- a/tf_adapter/util/npu_ops_identifier.cc +++ b/tf_adapter/util/npu_ops_identifier.cc @@ -74,6 +74,7 @@ bool NpuOpsIdentifier::GetOppPluginVendors(const std::string &vendors_config, st ADP_LOG(ERROR) << "Format of file content is invalid!"; return false; } + (void) for_each(vendors.begin(), vendors.end(), &ge::StringUtils::Trim); return true; } diff --git a/tf_adapter_2.x/npu_device/core/npu_device.cpp b/tf_adapter_2.x/npu_device/core/npu_device.cpp index f1d6f0f8e7dad5c623bb148175e9078f8eac7540..1849e34ddaaca2606467bf894a82bf41bcc1b547 100644 --- a/tf_adapter_2.x/npu_device/core/npu_device.cpp +++ b/tf_adapter_2.x/npu_device/core/npu_device.cpp @@ -808,7 +808,7 @@ void NpuDevice::RunGeGraphAsync(TFE_Context *context, uint64_t graph_id, int num if (err_msg.empty()) { err_msg = " code:" + std::to_string(s); } - done(tensorflow::errors::Internal("Graph engine process graph failed: ", err_msg)); + done(tensorflow::errors::Internal("Graph engine process graph failed:\n", err_msg)); return; } else if (ge_outputs.size() != static_cast(num_outputs)) { done(tensorflow::errors::Internal("Graph engine process graph succeed but output num ", ge_outputs.size(), diff --git a/tf_adapter_2.x/npu_device/core/npu_micros.h b/tf_adapter_2.x/npu_device/core/npu_micros.h index cfeeb4ee06241189296b84df1d92f48809a685ce..933ef44a8c51a399b35660ede228f36eb2bc4d20 100644 --- a/tf_adapter_2.x/npu_device/core/npu_micros.h +++ b/tf_adapter_2.x/npu_device/core/npu_micros.h @@ -79,7 +79,7 @@ if (err_msg.empty()) { \ err_msg = " code:" + std::to_string(_status); \ } \ - CTX->status = tensorflow::errors::Internal(PREFIX, ":", err_msg); \ + CTX->status = tensorflow::errors::Internal(PREFIX, ":\n", err_msg); \ LOG(ERROR) << CTX->status.ToString(); \ return; \ } \ @@ -93,7 +93,7 @@ if (err_msg.empty()) { \ err_msg = " code:" + std::to_string(_status); \ } \ - (CTX)->status = tensorflow::errors::Internal(PREFIX, ":", err_msg); \ + (CTX)->status = tensorflow::errors::Internal(PREFIX, ":\n", err_msg); \ LOG(ERROR) << (CTX)->status.ToString(); \ return RET; \ } \ diff --git a/tf_adapter_2.x/npu_device/core/op_executors/npu_static_shape_op.cpp b/tf_adapter_2.x/npu_device/core/op_executors/npu_static_shape_op.cpp index f3c58b2330aa0bbaccbd3a582645d223375b0451..84668f73796289c3d431a8ba7b0101f5edd081db 100644 --- a/tf_adapter_2.x/npu_device/core/op_executors/npu_static_shape_op.cpp +++ b/tf_adapter_2.x/npu_device/core/op_executors/npu_static_shape_op.cpp @@ -39,16 +39,10 @@ std::string NpuStaticShapeOp::AttachedDebugString() const { return ss.str(); } -#ifdef TFA2_COMPILED_FOR_EAGER_MODE -#define TFA2_EAGER_ENABLED true -#else -#define TFA2_EAGER_ENABLED false -#endif - void NpuStaticShapeOp::RunWithShape(TFE_Context *context, NpuDevice *device, const OpExecutor *spec, TensorShapes output_shapes, int num_inputs, TFE_TensorHandle **inputs, int num_outputs, TFE_TensorHandle **outputs, TF_Status *status) { - if (kGraphEngineGreedyMemory || (!TFA2_EAGER_ENABLED)) { + if (kGraphEngineGreedyMemory) { DLOG() << "NPU Executing op " << spec->Op() << " fallback cpu in graph engine greedy memory mode"; device->FallbackCPU(context, spec->NodeDef(), num_inputs, inputs, num_outputs, outputs, status); return; diff --git a/tf_adapter_2.x/python/npu_device/configs/npu_config.py b/tf_adapter_2.x/python/npu_device/configs/npu_config.py index da5492e08447515c8cb7da51ddf5105299f6567e..30791404b0fea48455cc468c937153b4b4c34817 100644 --- a/tf_adapter_2.x/python/npu_device/configs/npu_config.py +++ b/tf_adapter_2.x/python/npu_device/configs/npu_config.py @@ -37,7 +37,7 @@ class NpuConfig(NpuBaseConfig): self.fusion_switch_file = OptionValue(None, None) self.precision_mode = OptionValue('allow_fp32_to_fp16', ['force_fp32', 'allow_fp32_to_fp16', 'force_fp16', 'must_keep_origin_dtype', - 'allow_mix_precision']) + 'allow_mix_precision', 'cube_fp16in_fp32out']) self.op_select_implmode = DeprecatedValue(['high_performance', 'high_precision'], replacement='op_precision_mode') self.optypelist_for_implmode = DeprecatedValue(None, replacement='op_precision_mode')