From 3f776e03f608e777ed3b5c8badf2130edd3dcf51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=86=8A=E6=94=80?= Date: Thu, 25 Sep 2025 15:06:27 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96multimodelrunner=E6=98=BE?= =?UTF-8?q?=E5=AD=98=E5=8D=A0=E7=94=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../include/api/multi_model_runner.h | 15 +- .../cxx_api/model/multi_model_runner.cc | 212 ++++++++++++------ .../delegate/ascend_acl/acl_graph_executor.cc | 6 - .../src/extendrt/session/delegate_session.cc | 4 + .../test/st/python/test_inference_cloud.py | 2 +- .../st/python/test_inference_cloud_nocofig.py | 36 +-- 6 files changed, 168 insertions(+), 107 deletions(-) diff --git a/mindspore-lite/include/api/multi_model_runner.h b/mindspore-lite/include/api/multi_model_runner.h index d95ae976..14bdeaf6 100644 --- a/mindspore-lite/include/api/multi_model_runner.h +++ b/mindspore-lite/include/api/multi_model_runner.h @@ -39,14 +39,17 @@ class MS_API ModelExecutor { /// \param[in] executor_output_names Which is a vector of string, name of ModelExecutor's outputs. /// \param[in] subgraph_input_names Which is a vector of vector of string, name of every model's inputs in /// ModelExecutor. + /// \param[in] model_output_tensors Which is a vector of MSTensor, output tensor of model. ModelExecutor(const std::vector> &models, const std::vector &executor_input_names, const std::vector &executor_output_names, - const std::vector> &subgraph_input_names) + const std::vector> &subgraph_input_names, + const std::vector> &model_output_tensors) : models_(models), executor_input_names_(executor_input_names), executor_output_names_(executor_output_names), - subgraph_input_names_(subgraph_input_names) {} + subgraph_input_names_(subgraph_input_names), + model_output_tensors_(model_output_tensors) {} /// \brief Destructor of ModelExecutor. ~ModelExecutor() = default; /// \brief Inference ModelExecutor API. @@ -66,13 +69,6 @@ class MS_API ModelExecutor { /// \return The vector that includes all output tensors. std::vector GetOutputs() const; - /// \brief Initialize ModelExecutor API. - /// - /// \param[in] model_context Define the context used to store options during execution. - /// - /// \return Status. - Status Initialize(const std::shared_ptr &model_context); - private: std::vector> models_; std::vector executor_input_names_; @@ -124,6 +120,7 @@ class MS_API MultiModelRunner { Status UpdateConfig(const std::vector §ion, const std::pair, std::vector> &config); std::vector executors_; std::vector> models_; + std::vector> model_output_tensors_; std::string config_file_ = ""; ConfigInfos config_info_; }; diff --git a/mindspore-lite/src/extendrt/cxx_api/model/multi_model_runner.cc b/mindspore-lite/src/extendrt/cxx_api/model/multi_model_runner.cc index 2e5cbcaa..f8cb4cd6 100644 --- a/mindspore-lite/src/extendrt/cxx_api/model/multi_model_runner.cc +++ b/mindspore-lite/src/extendrt/cxx_api/model/multi_model_runner.cc @@ -31,6 +31,9 @@ std::mutex g_load_mindir_lock; std::mutex g_config_lock; constexpr size_t kMaxSectionNum = 100; constexpr size_t kMaxConfigNumPerSection = 1000; +constexpr size_t kInferPathSize = 2; +constexpr size_t kInferPathBeginIndex = 0; +constexpr size_t kInferPathEndIndex = 1; FuncGraphPtr LoadGraphByBufferImpl(const void *model_buff, const size_t &model_size, const ModelType &model_type, const std::shared_ptr &model_context, const std::string &model_path) { @@ -77,16 +80,85 @@ void SetInputOutputNames(const size_t &cnode_count, const std::vectorUpdateConfig(lite::kInnerGraphSplit, std::make_pair(lite::kInnerOutputNames, output_name_str)); } +Status GetDeviceIdFromContext(const std::shared_ptr &model_context, int32_t *device_id) { + MS_CHECK_TRUE_MSG(model_context != nullptr, kLiteNullptr, "model_context is nullptr!"); + if (model_context->MutableDeviceInfo().empty()) { + MS_LOG(ERROR) << "deviceinfo of context is empty!"; + return kLiteError; + } + auto device_info = model_context->MutableDeviceInfo()[0]; + if (device_info == nullptr) { + MS_LOG(ERROR) << "device info is nullptr!"; + return kLiteError; + } + if (device_info->GetDeviceType() != DeviceType::kAscend) { + MS_LOG(ERROR) << "ModelExecutor only support ascend backend!"; + return kLiteError; + } + auto ascend_device = device_info->Cast(); + if (ascend_device == nullptr) { + MS_LOG(ERROR) << "not ascend device!"; + return kLiteError; + } + *device_id = ascend_device->GetDeviceID(); + return kSuccess; +} + +Status GetOmInfoFromCnode(const CNodePtr &cnode, void **om_data, size_t *om_size) { + MS_CHECK_TRUE_MSG(om_data != nullptr, kLiteNullptr, "om_data is nullptr!"); + MS_CHECK_TRUE_MSG(om_size != nullptr, kLiteNullptr, "om_size is nullptr!"); + std::vector inputs; + std::vector outputs; + auto ret = mindspore::FuncGraphUtils::GetCNodeInputsOutputs(cnode, &inputs, &outputs); + if (ret == false) { + MS_LOG(ERROR) << "GetCnodeInputsOutputs failed!"; + return kLiteError; + } + auto om_inputs = inputs.back(); + auto tensor_data = mindspore::FuncGraphUtils::GetConstNodeValue(om_inputs.first); + if (tensor_data == nullptr) { + MS_LOG(ERROR) << "tensor_data is nullptr!"; + return kLiteError; + } + *om_data = tensor_data->data_c(); + *om_size = tensor_data->Size(); + return kSuccess; +} + +Status UpdateModelConfig(const std::string &config_file, const ConfigInfos &config_info, + const std::shared_ptr &model_impl_ptr) { + if (!config_file.empty()) { + auto ret = model_impl_ptr->LoadConfig(config_file); + if (ret != kSuccess) { + MS_LOG(ERROR) << "Model LoadConfig failed!"; + return ret; + } + } + for (auto key_value : config_info) { + for (auto pair : key_value.second) { + auto ret = model_impl_ptr->UpdateConfig(key_value.first, pair); + if (ret != kSuccess) { + MS_LOG(ERROR) << "Model updateconfig failed!"; + return ret; + } + } + } + return kSuccess; +} + Status BuildModels(const FuncGraphPtr &func_graph, const std::vector> &subgraph_input_names, const std::vector> &subgraph_output_names, const std::shared_ptr &model_context, const std::string &config_file, - const ConfigInfos &config_info, std::vector> *models) { + const ConfigInfos &config_info, std::vector> *models, + std::vector> *model_output_tensors) { MS_CHECK_TRUE_MSG(func_graph != nullptr, kLiteError, "func_graph is nullptr!"); MS_CHECK_TRUE_MSG(model_context != nullptr, kLiteError, "model_context is nullptr!"); MS_CHECK_TRUE_MSG(models != nullptr, kLiteError, "modes is nullptr!"); auto nodes = func_graph->TopoSort(func_graph->get_return()); MS_CHECK_TRUE_MSG(!nodes.empty(), kLiteError, "There are no nodes in the func_graph"); size_t cnode_count = 0; + std::vector om_datas; + std::vector om_sizes; for (const auto &node : nodes) { auto cnode = node->cast(); if (!cnode || !mindspore::AnfUtils::IsRealKernel(cnode)) { @@ -104,37 +176,66 @@ Status BuildModels(const FuncGraphPtr &func_graph, const std::vectorset_inputs(inputs); } - std::vector inputs; - std::vector outputs; - mindspore::FuncGraphUtils::GetCNodeInputsOutputs(cnode, &inputs, &outputs); - auto om_inputs = inputs.back(); - auto tensor_data = mindspore::FuncGraphUtils::GetConstNodeValue(om_inputs.first); - auto om_data = tensor_data->data_c(); - auto om_size = tensor_data->Size(); + void *om_data = nullptr; + size_t om_size = 0; + auto ret = GetOmInfoFromCnode(cnode, &om_data, &om_size); + if (ret != kSuccess) { + MS_LOG(ERROR) << "GetOmInfo From Cnode failed!"; + return ret; + } + om_datas.push_back(om_data); + om_sizes.push_back(om_size); auto model_impl_ptr = std::make_shared(); MS_CHECK_TRUE_MSG(model_impl_ptr != nullptr, kLiteError, "model_impl_ptr is nullptr"); - if (!config_file.empty()) { - auto ret = model_impl_ptr->LoadConfig(config_file); - if (ret != kSuccess) { - MS_LOG(ERROR) << "Model LoadConfig failed!"; - return ret; - } + // share work space + model_impl_ptr->UpdateConfig(lite::kInnerCommon, std::make_pair(lite::kInnerCalcWorkspaceSize, "true")); + ret = model_impl_ptr->Build(om_data, om_size, kOM, model_context); + if (ret != kSuccess) { + MS_LOG(ERROR) << "Model build failed!"; + return ret; } - for (auto key_value : config_info) { - for (auto pair : key_value.second) { - auto ret = model_impl_ptr->UpdateConfig(key_value.first, pair); - if (ret != kSuccess) { - MS_LOG(ERROR) << "Model updateconfig failed!"; - return ret; - } - } + } + for (size_t i = 0; i < om_datas.size(); i++) { + auto om_data = om_datas[i]; + auto om_size = om_sizes[i]; + auto model_impl_ptr = std::make_shared(); + MS_CHECK_TRUE_MSG(model_impl_ptr != nullptr, kLiteError, "model_impl_ptr is nullptr"); + auto ret = model_impl_ptr->UpdateConfig(lite::kInnerCommon, std::make_pair(lite::kInnerSharingWorkspace, "true")); + if (ret != kSuccess) { + MS_LOG(ERROR) << "UpdateConfig failed!"; + return ret; + } + ret = model_impl_ptr->UpdateConfig(lite::kInnerCommon, std::make_pair(lite::kInnerWorkspace, "true")); + if (ret != kSuccess) { + MS_LOG(ERROR) << "UpdateConfig failed!"; + return ret; } - SetInputOutputNames(cnode_count, subgraph_input_names, subgraph_output_names, model_impl_ptr); - auto ret = model_impl_ptr->Build(om_data, om_size, kOM, model_context); + ret = UpdateModelConfig(config_file, config_info, model_impl_ptr); + if (ret != kSuccess) { + MS_LOG(ERROR) << "update model config failed!"; + return ret; + } + SetInputOutputNames(i + 1, subgraph_input_names, subgraph_output_names, model_impl_ptr); + ret = model_impl_ptr->Build(om_data, om_size, kOM, model_context); if (ret != kSuccess) { MS_LOG(ERROR) << "Model build failed!"; return ret; } + int32_t device_id = 0; + ret = GetDeviceIdFromContext(model_context, &device_id); + if (ret != kSuccess) { + MS_LOG(ERROR) << "GetDeviceIdFromContext failed!"; + return ret; + } + auto model_outputs = model_impl_ptr->GetOutputs(); + std::vector output_tensors = {}; + for (auto output : model_outputs) { + auto tensor = MSTensor::CreateTensor(output.Name(), output, "ascend", device_id); + MS_CHECK_TRUE_MSG(tensor != nullptr, kLiteError, "tensor is nullptr!"); + output_tensors.push_back(*tensor); + delete tensor; + } + model_output_tensors->push_back(output_tensors); models->emplace_back(model_impl_ptr); } return kSuccess; @@ -168,22 +269,32 @@ Status MultiModelRunner::Build(const std::vector &model_path, const ModelT auto extended_subgraph_input_output = GetValue>>>(extended_subgraph_input_output_val); auto ret = BuildModels(func_graph, subgraph_input_names, subgraph_output_names, model_context, config_file_, - config_info_, &models_); + config_info_, &models_, &model_output_tensors_); if (ret != kSuccess) { MS_LOG(ERROR) << "BuildModels failed!"; return ret; } for (size_t executor_id = 0; executor_id < subgraph_infer_path.size(); executor_id++) { std::vector> curr_executor_models; + std::vector> curr_executor_model_output_tensors; + MS_CHECK_TRUE_MSG(extended_subgraph_input_output.size() > executor_id, kLiteError, + "size of extended_subgraph_input_output should larger than executor_id!"); auto curr_executor_input_output_names = extended_subgraph_input_output[executor_id]; std::vector> curr_subgraph_input_names; - auto min_index = subgraph_infer_path[executor_id][0]; - auto max_index = subgraph_infer_path[executor_id][1]; + MS_CHECK_TRUE_MSG(subgraph_infer_path[executor_id].size() == kInferPathSize, kLiteError, + "size of elsments of subgraph_infer_path should be 2!"); + auto min_index = subgraph_infer_path[executor_id][kInferPathBeginIndex]; + auto max_index = subgraph_infer_path[executor_id][kInferPathEndIndex]; for (auto subgraph_id = min_index; subgraph_id <= max_index; subgraph_id++) { + MS_CHECK_TRUE_MSG(model_output_tensors_.size() > static_cast(subgraph_id), kLiteError, + "subgraph id out of range!"); + curr_executor_model_output_tensors.emplace_back(model_output_tensors_[subgraph_id]); curr_executor_models.emplace_back(models_[subgraph_id]); curr_subgraph_input_names.push_back(subgraph_input_names[subgraph_id]); } std::vector curr_executor_input_names; + MS_CHECK_TRUE_MSG(!curr_executor_input_output_names.empty(), kLiteError, + "curr_executor_input_output_names should not be empty!"); if (curr_executor_input_output_names[0].empty()) { for (auto single_subgraph_input_names : curr_subgraph_input_names) { for (auto input_name : single_subgraph_input_names) { @@ -198,12 +309,7 @@ Status MultiModelRunner::Build(const std::vector &model_path, const ModelT curr_executor_input_output_names[0].end()); } auto executor = ModelExecutor(curr_executor_models, curr_executor_input_names, curr_executor_input_output_names[1], - curr_subgraph_input_names); - ret = executor.Initialize(model_context); - if (ret != kSuccess) { - MS_LOG(ERROR) << "executor init failed!"; - return ret; - } + curr_subgraph_input_names, curr_executor_model_output_tensors); executors_.push_back(executor); } return kSuccess; @@ -238,46 +344,6 @@ Status MultiModelRunner::UpdateConfig(const std::vector §ion, return kSuccess; } -Status ModelExecutor::Initialize(const std::shared_ptr &model_context) { - if (initialized_) { - MS_LOG(WARNING) << "ModelExecutor has been initialized!"; - return kSuccess; - } - MS_CHECK_TRUE_MSG(model_context != nullptr, kLiteError, "model_context is nullptr!"); - int32_t device_id = 0; - if (!model_context->MutableDeviceInfo().empty()) { - auto device_info = model_context->MutableDeviceInfo()[0]; - if (device_info == nullptr) { - MS_LOG(ERROR) << "device info is nullptr!"; - return kLiteError; - } - if (device_info->GetDeviceType() != DeviceType::kAscend) { - MS_LOG(ERROR) << "ModelExecutor only support ascend backend!"; - return kLiteError; - } - auto ascend_device = device_info->Cast(); - if (ascend_device == nullptr) { - MS_LOG(ERROR) << "not ascend device!"; - return kLiteError; - } - device_id = ascend_device->GetDeviceID(); - } - for (auto model : models_) { - MS_CHECK_TRUE_MSG(model != nullptr, kLiteError, "model is nullptr!"); - auto model_outputs = model->GetOutputs(); - std::vector output_tensors = {}; - for (auto output : model_outputs) { - auto tensor = MSTensor::CreateTensor(output.Name(), output, "ascend", device_id); - MS_CHECK_TRUE_MSG(tensor != nullptr, kLiteError, "tensor is nullptr!"); - output_tensors.push_back(*tensor); - delete tensor; - } - model_output_tensors_.push_back(output_tensors); - } - initialized_ = true; - return kSuccess; -} - Status ModelExecutor::Predict(const std::vector &inputs, std::vector *outputs) { MS_CHECK_TRUE_MSG(outputs != nullptr, kLiteError, "outputs is nullptr!"); MS_CHECK_TRUE_MSG(models_.size() == model_output_tensors_.size(), kLiteError, diff --git a/mindspore-lite/src/extendrt/delegate/ascend_acl/acl_graph_executor.cc b/mindspore-lite/src/extendrt/delegate/ascend_acl/acl_graph_executor.cc index fc71804d..923dbf59 100644 --- a/mindspore-lite/src/extendrt/delegate/ascend_acl/acl_graph_executor.cc +++ b/mindspore-lite/src/extendrt/delegate/ascend_acl/acl_graph_executor.cc @@ -303,12 +303,6 @@ bool AclGraphExecutor::CompileGraph(const void *model_data, size_t data_size, MS_LOG(ERROR) << "Load om data failed."; return false; } - std::vector output_tensors; - auto ret = GetOutputTensors(output_names_, &output_tensors); - if (ret != kSuccess) { - MS_LOG(ERROR) << "GetOutputTensors failed!"; - return false; - } AclEnvGuard::AddModel(model_infer_); load_model_ = true; return true; diff --git a/mindspore-lite/src/extendrt/session/delegate_session.cc b/mindspore-lite/src/extendrt/session/delegate_session.cc index 579efa3a..f569c6e0 100644 --- a/mindspore-lite/src/extendrt/session/delegate_session.cc +++ b/mindspore-lite/src/extendrt/session/delegate_session.cc @@ -58,6 +58,10 @@ Status GraphSinkSession::CompileGraph(const void *model_data, size_t data_size, MS_LOG(ERROR) << "GraphSinkSession::CompileGraph compile graph failed"; return kCoreFailed; } + auto prepare_share_mem = GetConfigOption(lite::kInnerCommon, lite::kInnerCalcWorkspaceSize); + if (prepare_share_mem == "true") { + return kSuccess; + } sharable_handle_ = graph_executor_->GetShareableHandle(); DelegateGraphInfo graph_info; auto status = InitGraphInfo(&graph_info, *graph_id); diff --git a/mindspore-lite/test/st/python/test_inference_cloud.py b/mindspore-lite/test/st/python/test_inference_cloud.py index 0130bfd2..7572887b 100644 --- a/mindspore-lite/test/st/python/test_inference_cloud.py +++ b/mindspore-lite/test/st/python/test_inference_cloud.py @@ -193,7 +193,6 @@ def test_model_group_weight_workspace_for_ascend(model_path, in_data_path, input # use model one for inference test_model_inference_ascend(model_file, in_data_file_list, shapes) - if __name__ == '__main__': model_file = sys.argv[1] in_data_file = sys.argv[2] @@ -225,6 +224,7 @@ if __name__ == '__main__': print("run model inference ascend success.") test_parallel_inference_ascend(model_file, in_data_file_list, shapes) print("run parallel inference ascend success.") + print("run graph split success.") elif backend == "CPU_PARALLEL": test_parallel_inference_cpu(model_file, in_data_file_list, shapes) print("run parallel inference cpu success.") diff --git a/mindspore-lite/test/st/python/test_inference_cloud_nocofig.py b/mindspore-lite/test/st/python/test_inference_cloud_nocofig.py index 473f3902..9e19bbe9 100644 --- a/mindspore-lite/test/st/python/test_inference_cloud_nocofig.py +++ b/mindspore-lite/test/st/python/test_inference_cloud_nocofig.py @@ -76,6 +76,23 @@ def test_model_group_inference_ascend(mindir_dir): outputs = model1.predict(inputs) assert (outputs[0].get_data_to_numpy() == (np.ones((4, 4), np.float32) * 2)).all() +@lite_test +def test_model_invalid_dynamic_dims_error_ascend(mindir_dir): + context = mslite.Context() + context.target = ["ascend"] + context.ascend.device_id = 0 + context.ascend.provider = "ge" + model0 = mslite.Model() + + model_path0 = os.path.join(mindir_dir, "model_group_first.mindir") + config_dict = {"ascend_context": {"input_format": "NHWC", "input_shape": "input:[1,-1,-1,3]", + "dynamic_dims": "[19200,960],960"}} + try: + model0.build_from_file(model_path0, mslite.ModelType.MINDIR, context, "", config_dict) + assert False + except RuntimeError as ex: + assert "build_from_file failed" in str(ex) + @lite_test def test_graph_split_ascend(mindir_dir): dtype_map = { @@ -117,29 +134,12 @@ def test_graph_split_ascend(mindir_dir): except: raise RuntimeError('run graph split model failed!') -@lite_test -def test_model_invalid_dynamic_dims_error_ascend(mindir_dir): - context = mslite.Context() - context.target = ["ascend"] - context.ascend.device_id = 0 - context.ascend.provider = "ge" - model0 = mslite.Model() - - model_path0 = os.path.join(mindir_dir, "model_group_first.mindir") - config_dict = {"ascend_context": {"input_format": "NHWC", "input_shape": "input:[1,-1,-1,3]", - "dynamic_dims": "[19200,960],960"}} - try: - model0.build_from_file(model_path0, mslite.ModelType.MINDIR, context, "", config_dict) - assert False - except RuntimeError as ex: - assert "build_from_file failed" in str(ex) - - if __name__ == '__main__': print("test_inference_cloud_nocofig.py: begin run testcases.") model_dir = sys.argv[1] backend = sys.argv[2] if backend == "Ascend": + test_graph_split_ascend(model_dir) test_model_group_inference_ascend(model_dir) test_model_invalid_dynamic_dims_error_ascend(model_dir) else: -- Gitee