2.4K Star 8.1K Fork 4.3K

GVPMindSpore / mindspore

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
lite_session.cc 60.56 KB
一键复制 编辑 原始数据 按行查看 历史
Greatpan 提交于 2023-04-18 20:24 . thread pool opt r1.8
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848
/**
* Copyright 2020-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/lite_session.h"
#include <set>
#include "src/runtime/pack_weight_manager.h"
#include "src/runtime/runtime_pass.h"
#if defined(LINUX_RUNTIME)
#include <malloc.h>
#endif
#include <vector>
#include <utility>
#include "include/errorcode.h"
#include "src/common/log_adapter.h"
#include "src/runtime/scheduler.h"
#include "src/runtime/inner_allocator.h"
#include "src/runtime/executor.h"
#include "src/common/context_util.h"
#include "src/common/utils.h"
#include "src/common/graph_util.h"
#include "src/common/tensor_util.h"
#include "src/common/file_utils.h"
#include "src/runtime/lite_model.h"
#include "src/runtime/weight_decoder.h"
#include "src/runtime/runtime_allocator.h"
#include "src/runtime/kernel_exec_util.h"
#ifndef CUSTOM_KERNEL_REGISTRY_CLIP
#include "src/registry/register_kernel_impl.h"
#endif
#ifdef ENABLE_MINDRT
#include "src/runtime/mindrt_executor.h"
#endif
#if SUPPORT_NPU
#include "src/runtime/delegate/npu/npu_delegate.h"
#endif
#if GPU_OPENCL
#include "src/runtime/kernel/opencl/opencl_subgraph.h"
#endif
#if GPU_TENSORRT
#include "src/extendrt/delegate/tensorrt/tensorrt_delegate.h"
#endif
#include "src/runtime/runtime_convert.h"
#include "extendrt/mindir_loader/model_loader.h"
#include "thread/parallel_thread_pool_manager.h"
using AbstractBaseModel = mindspore::infer::AbstractBaseModel;
namespace mindspore {
#ifdef USE_GLOG
extern "C" {
extern void mindspore_log_init();
}
#endif
namespace lite {
namespace {
bool ExistCustomCpuKernel() {
#ifndef CUSTOM_KERNEL_REGISTRY_CLIP
const std::string kArchCPU = "CPU";
auto custom_kernel_creators = registry::RegistryKernelImpl::GetInstance()->GetCustomKernelCreators();
for (const auto &custom_kernel_creator : custom_kernel_creators) { // <provider, <arch, <type, CreateKernel*>>>
if (custom_kernel_creator.second.empty()) {
continue;
}
if (std::any_of(
custom_kernel_creator.second.begin(), custom_kernel_creator.second.end(),
[kArchCPU](const std::pair<std::string, std::unordered_map<std::string, registry::CreateKernel *>> &pair) {
return pair.first == kArchCPU && !pair.second.empty();
})) {
return true;
}
}
#endif
return false;
}
} // namespace
LiteSession::LiteSession() {
#ifdef USE_GLOG
#if defined(_WIN32) || defined(_WIN64) || defined(__APPLE__)
#ifdef _MSC_VER
mindspore::mindspore_log_init();
#endif
#else
mindspore::mindspore_log_init();
#endif
#endif
this->is_running_.store(false);
}
int LiteSession::CheckTensorValid(lite::Tensor *dst_tensor) {
MS_ASSERT(dst_tensor != nullptr);
if (dst_tensor->data_type() == kObjectTypeTensorType) {
return RET_OK;
}
if (dst_tensor->IsGraphInput() || dst_tensor->IsGraphOutput()) {
return RET_OK;
}
if (dst_tensor->IsConst() == false && dst_tensor->data() != nullptr) {
return RET_ERROR;
}
return RET_OK;
}
void LiteSession::ConvertTensorsQuantParam(const schema::Tensor *src_tensor, lite::Tensor *dst_tensor) {
MS_ASSERT(src_tensor != nullptr);
MS_ASSERT(dst_tensor != nullptr);
auto quant_params = src_tensor->quantParams();
if (quant_params != nullptr) {
for (size_t j = 0; j < quant_params->size(); j++) {
auto quant_param = quant_params->Get(j);
LiteQuantParam quant_arg{};
if (quant_param == nullptr) {
quant_arg.inited = false;
} else {
quant_arg.inited = true;
quant_arg.bitNum = quant_param->numBits();
quant_arg.scale = quant_param->scale();
quant_arg.zeroPoint = quant_param->zeroPoint();
quant_arg.var_corr = quant_param->varCorr();
quant_arg.mean_corr = quant_param->meanCorr();
quant_arg.roundType = quant_param->roundType();
quant_arg.multiplier = quant_param->multiplier();
quant_arg.dstDtype = quant_param->dstDtype();
quant_arg.min = quant_param->min();
quant_arg.max = quant_param->max();
}
dst_tensor->AddQuantParam(quant_arg);
}
}
auto quant_clusters = src_tensor->quantClusters();
if (quant_clusters != nullptr) {
std::vector<float> clusters;
for (size_t j = 0; j < quant_clusters->size(); j++) {
clusters.push_back(quant_clusters->Get(j));
}
dst_tensor->set_quant_clusters(clusters);
}
}
int LiteSession::ConvertTensorsData(const lite::LiteModel *model, size_t tensor_index, lite::Tensor *dst_tensor) {
MS_ASSERT(model != nullptr);
MS_ASSERT(dst_tensor != nullptr);
auto src_tensor = model->GetSchemaTensor(tensor_index);
if (src_tensor == nullptr || src_tensor->handler() == nullptr || src_tensor->data() == nullptr ||
src_tensor->length() == 0) {
MS_LOG(DEBUG) << "No valid data converted.";
return RET_OK;
}
/* tensor list convert */
if (dst_tensor->data_type() == kObjectTypeTensorType) {
const int *src_data = reinterpret_cast<const int *>(src_tensor->data());
return DecodeTensorLsit(dst_tensor, src_data);
}
/* normal tensor check */
auto shape_info = dst_tensor->shape();
if (shape_info.end() !=
std::find_if(shape_info.begin(), shape_info.end(), [](const int shape) { return shape <= 0; })) {
MS_LOG(ERROR) << "Invalid shape size, tensor name: " << src_tensor->handler()->name();
return RET_ERROR;
}
auto ret = WeightDecoder::DecompressTensor(*src_tensor, dst_tensor);
if (ret == RET_NO_CHANGE) {
if (dst_tensor->Size() == 0 || src_tensor->length() < dst_tensor->Size()) {
MS_LOG(ERROR) << "Tensor data shape invalid";
return RET_ERROR;
}
auto data_pair = src_tensor->ReleaseData();
dst_tensor->set_data(data_pair.second);
dst_tensor->set_own_data(data_pair.first);
} else if (ret != RET_OK) {
MS_LOG(ERROR) << "Decompress tensor data failed: " << ret;
return ret;
}
return RET_OK;
}
lite::Tensor *LiteSession::ConvertTensor(const schema::Tensor &src_tensor) {
int32_t data_type = src_tensor.dataType();
if (data_type <= kTypeUnknown || data_type >= kMonadTypeEnd) {
MS_LOG(ERROR) << "invalid data type. " << data_type;
return nullptr;
}
auto src_category = TensorCategory(src_tensor);
std::vector<int> shape;
if (src_tensor.dims() == nullptr) {
MS_LOG(DEBUG) << "Dims of src_tensor is nullptr";
}
if (src_tensor.dims() != nullptr) {
if (src_tensor.dataType() == kObjectTypeString && src_tensor.data() != nullptr) {
shape.push_back(src_tensor.data()->size());
} else {
for (size_t j = 0; j < src_tensor.dims()->size(); j++) {
shape.push_back(src_tensor.dims()->data()[j]);
}
}
if (std::any_of(shape.begin(), shape.end(), [](const int &element) { return element < 0 && element != -1; })) {
MS_LOG(ERROR) << "Dims of src_tensor is unsupported";
return nullptr;
}
}
lite::Tensor *dst_tensor = nullptr;
if (TypeId(data_type) == kObjectTypeTensorType) {
auto src_data = src_tensor.data()->data();
dst_tensor = CreateTensorList(shape, src_category, src_data);
} else {
dst_tensor = new (std::nothrow)
Tensor(TypeId(data_type), shape, static_cast<mindspore::Format>(src_tensor.format()), src_category);
}
if (src_tensor.name() != nullptr) {
dst_tensor->set_tensor_name(src_tensor.name()->str());
}
return dst_tensor;
}
int LiteSession::ConvertTensors(const lite::Model *model) {
MS_ASSERT(model != nullptr);
auto lite_model = reinterpret_cast<const lite::LiteModel *>(model);
uint32_t tensor_count = model->graph_.all_tensors_.size();
auto model_input_indices = model->graph_.input_indices_;
auto model_output_indices = model->graph_.output_indices_;
for (uint32_t i = 0; i < tensor_count; ++i) {
auto *src_tensor = model->graph_.all_tensors_[i];
if (src_tensor == nullptr) {
MS_LOG(ERROR) << i << "th tensor in model is nullptr";
return RET_NULL_PTR;
}
auto *dst_tensor = ConvertTensor(*src_tensor);
if (dst_tensor == nullptr) {
MS_LOG(ERROR) << "Convert new " << i << "th tensor failed!";
return RET_NULL_PTR;
}
auto ret = ConvertTensorsData(lite_model, i, dst_tensor);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Convert data of " << i << "th tensor failed";
delete dst_tensor;
return ret;
}
ConvertTensorsQuantParam(src_tensor, dst_tensor);
if (IsContain(model_input_indices, i)) {
dst_tensor->set_category(Category::GRAPH_INPUT);
}
if (IsContain(model_output_indices, i)) {
// a tensor is as both input and output, would be treated as an input.
if (!dst_tensor->IsGraphInput()) {
dst_tensor->set_category(Category::GRAPH_OUTPUT);
}
}
ret = CheckTensorValid(dst_tensor);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Check " << i << "th tensor failed";
delete dst_tensor;
return ret;
}
this->tensors_.emplace_back(dst_tensor);
}
return RET_OK;
}
void LiteSession::InitGraphInputTensors(const lite::Model *model) {
MS_ASSERT(model != nullptr);
auto graph_in_size = model->graph_.input_indices_.size();
for (size_t i = 0; i < graph_in_size; ++i) {
auto in_tensor_idx = model->graph_.input_indices_[i];
MS_ASSERT(in_tensor_idx < this->tensors_.size());
auto *in_tensor = this->tensors_.at(in_tensor_idx);
MS_ASSERT(in_tensor != nullptr);
this->inputs_.emplace_back(in_tensor);
}
}
void LiteSession::InitGraphInputMSTensors() {
MS_ASSERT(this->input_vec_.empty());
for (auto &input_tensor : this->inputs_) {
MS_ASSERT(input_tensor != nullptr);
this->input_vec_.emplace_back(input_tensor);
}
}
void LiteSession::InitGraphOutputTensors(const lite::Model *model) {
MS_ASSERT(model != nullptr);
MS_ASSERT(this->outputs_.empty());
auto graph_out_size = model->graph_.output_indices_.size();
for (size_t i = 0; i < graph_out_size; ++i) {
auto out_tensor_idx = model->graph_.output_indices_[i];
MS_ASSERT(out_tensor_idx < this->tensors_.size());
auto *out_tensor = this->tensors_.at(out_tensor_idx);
MS_ASSERT(out_tensor != nullptr);
this->outputs_.emplace_back(out_tensor);
}
}
void LiteSession::InitGraphInputMap(const lite::Model *model) {
MS_ASSERT(model != nullptr);
MS_ASSERT(this->input_map_.empty());
MS_ASSERT(this->input_shape_map_.empty());
auto graph_input_node_indexes = GetGraphInputNodes(model);
auto graph_in_size = model->graph_.input_indices_.size();
for (auto in_node_index : graph_input_node_indexes) {
auto in_node = model->graph_.all_nodes_[in_node_index];
MS_ASSERT(in_node != nullptr);
auto in_size = in_node->input_indices_.size();
for (size_t i = 0; i < in_size; ++i) {
MS_ASSERT(this->input_map_.find(in_node->name_ + std::to_string(i)) == this->input_map_.end());
auto in_tensor_index = size_t(in_node->input_indices_[i]);
bool is_graph_input = false;
for (size_t j = 0; j < graph_in_size; ++j) {
if (in_tensor_index == model->graph_.input_indices_[j]) {
is_graph_input = true;
break;
}
}
if (!is_graph_input) {
continue;
}
MS_ASSERT(in_tensor_index < this->tensors_.size());
auto *in_tensor = this->tensors_.at(in_tensor_index);
if (in_tensor == nullptr) {
MS_LOG(ERROR) << "in_tensor is null!";
return;
}
auto tensor_name = in_node->name_ + std::to_string(i);
this->input_map_[tensor_name] = in_tensor;
this->input_shape_map_[in_tensor] = in_tensor->shape();
if (!in_tensor->tensor_name().empty()) {
this->input_map_[in_tensor->tensor_name()] = in_tensor;
}
}
}
}
void LiteSession::InitGraphOutputNodeMap(const lite::Model *model) {
MS_ASSERT(model != nullptr);
auto graph_output_node_indexes = GetGraphOutputNodes(model);
auto graph_out_size = model->graph_.output_indices_.size();
for (auto out_node_index : graph_output_node_indexes) {
auto out_node = model->graph_.all_nodes_[out_node_index];
MS_ASSERT(out_node != nullptr);
auto out_size = out_node->output_indices_.size();
for (size_t i = 0; i < out_size; ++i) {
auto out_tensor_index = out_node->output_indices_[i];
bool is_graph_output = false;
for (size_t j = 0; j < graph_out_size; ++j) {
if (out_tensor_index == model->graph_.output_indices_[j]) {
is_graph_output = true;
break;
}
}
if (!is_graph_output) {
continue;
}
MS_ASSERT(out_tensor_index < this->tensors_.size());
auto *out_tensor = this->tensors_.at(out_tensor_index);
if (out_tensor == nullptr) {
MS_LOG(ERROR) << "out_tensor is null!";
return;
}
this->output_node_map_[out_node->name_].emplace_back(out_tensor);
}
}
}
void LiteSession::InitGraphOutputTensorMap(const lite::Model *model) {
MS_ASSERT(model != nullptr);
MS_ASSERT(this->output_tensor_map_.empty());
auto graph_out_size = model->graph_.output_indices_.size();
for (size_t i = 0; i < graph_out_size; ++i) {
size_t graph_out_index = model->graph_.output_indices_[i];
MS_ASSERT(graph_out_index < this->tensors_.size());
auto *out_tensor = this->tensors_.at(graph_out_index);
if (out_tensor == nullptr) {
MS_LOG(ERROR) << "out_tensor is null!";
return;
}
if (!out_tensor->tensor_name().empty()) {
this->output_tensor_map_.insert(std::make_pair(out_tensor->tensor_name(), out_tensor));
this->output_tensor_names_.emplace_back(out_tensor->tensor_name());
} else {
this->output_tensor_map_.insert(std::make_pair(std::to_string(graph_out_index), out_tensor));
this->output_tensor_names_.emplace_back(std::to_string(graph_out_index));
}
}
}
void LiteSession::InitGraphInOutTensorsMap(const lite::Model *model) {
InitGraphInputMSTensors();
InitGraphInputMap(model);
InitGraphOutputNodeMap(model);
InitGraphOutputTensorMap(model);
}
int LiteSession::IsolateOutputTensor() {
for (Tensor *src_tensor : outputs_) {
if (src_tensor->IsGraphInput()) {
continue;
}
Tensor *new_tensor = new (std::nothrow)
Tensor(src_tensor->data_type(), src_tensor->shape(), src_tensor->format(), Category::GRAPH_OUTPUT);
if (MS_UNLIKELY(new_tensor == nullptr)) {
MS_LOG(ERROR) << "duplicate new output failed.";
return RET_NULL_PTR;
}
new_tensor->set_allocator(src_tensor->allocator()); /* GPU use opencl allocator */
new_tensor->set_tensor_name(src_tensor->tensor_name() + "_duplicate");
for (LiteQuantParam quant : src_tensor->quant_params()) {
new_tensor->AddQuantParam(quant);
}
new_tensor->set_init_ref_count(src_tensor->init_ref_count());
/* src tensor set for graph calculate */
#ifdef ENABLE_FP16
if (src_tensor->data_type() == kNumberTypeFloat16) {
src_tensor->set_data_type(kNumberTypeFloat32);
}
#endif
src_tensor->set_ref_count(1);
isolate_graph_output_map_.insert(std::make_pair(new_tensor, src_tensor));
/* set new tensor for calculate */
for (auto subgraph : kernels_) {
/* subgraph input and output */
auto in_size = subgraph->in_tensors().size();
for (size_t i = 0; i < in_size; ++i) {
if (subgraph->in_tensors()[i] == src_tensor) {
subgraph->set_in_tensor(new_tensor, i);
}
}
auto out_size = subgraph->out_tensors().size();
for (size_t i = 0; i < out_size; ++i) {
if (subgraph->out_tensors()[i] == src_tensor) {
subgraph->set_out_tensor(new_tensor, i);
}
}
if (subgraph->desc().arch == kernel::kDelegate) {
continue;
}
/* node input and output */
auto nodes = reinterpret_cast<kernel::SubGraphKernel *>(subgraph)->nodes();
auto nodes_size = nodes.size();
for (size_t i = 0; i < nodes_size; ++i) {
auto node = nodes[i];
out_size = node->out_tensors().size();
for (size_t j = 0; j < out_size; ++j) {
if (node->out_tensors()[j] == src_tensor) {
node->set_out_tensor(new_tensor, j);
break;
}
}
in_size = node->in_tensors().size();
for (size_t j = 0; j < in_size; ++j) {
if (node->in_tensors()[j] == src_tensor) {
node->set_in_tensor(new_tensor, j);
}
}
}
}
}
UpdateLinkInfoForIsolateOutput();
return RET_OK;
}
void LiteSession::UpdateLinkInfoForIsolateOutput() {
for (auto &item : isolate_graph_output_map_) {
context_->ReplaceLinkInfoReceiverWithNewOne(item.first, item.second);
}
return;
}
void LiteSession::FreePackOpWeight(const std::vector<kernel::KernelExec *> &kernels) {
// For reducing runtime RAM
// free pack-op weight because pack-op will not access origin weight in runtime
for (auto *kernel : kernels) {
MS_ASSERT(kernel != nullptr);
if (kernel->subgraph_type() == kernel::kNotSubGraph) {
if (!IsPackedOp(static_cast<int>(kernel->type()))) {
continue;
}
} else {
auto subgraph = reinterpret_cast<kernel::SubGraphKernel *>(kernel);
FreePackOpWeight(subgraph->nodes());
}
auto inputs = kernel->in_tensors();
for (auto *tensor : inputs) {
MS_ASSERT(tensor != nullptr);
if (!tensor->IsConst()) {
continue;
}
tensor->FreeData();
}
}
}
int LiteSession::CompileGraph(Model *model) {
auto ret = PreCheck(model);
if (MS_UNLIKELY(ret != RET_OK)) {
MS_LOG(ERROR) << "schedule check failed: " << ret;
is_running_.store(false);
return ret;
}
if (model->model_type_ != ModelType_MSLite) {
ret = reinterpret_cast<AbstractBaseModel *>(model)->ConvertTensors(&this->tensors_);
} else {
// Convert to abstract base model interface
ret = ConvertTensors(model);
}
if (MS_UNLIKELY(ret != RET_OK)) {
MS_LOG(ERROR) << "ConvertTensors failed: " << ret;
is_running_.store(false);
return ret;
}
ret = lite::PackWeightManager::GetInstance()->StoreOriginTensorData(model, &tensors_);
if (MS_UNLIKELY(ret != RET_OK)) {
MS_LOG(ERROR) << "StoreOriginTensorData failed.";
return RET_ERROR;
}
InitGraphInputTensors(model);
InitGraphOutputTensors(model);
// scheduler kernels
Scheduler scheduler(context_, ms_context_, model, &tensors_, &inputs_, &outputs_, is_train_session_, &is_infershape_,
&is_control_flow_, &infer_along_running_, execution_plan_, delegate_, delegate_device_type_);
scheduler.SetupSchedulerCb(std::move(sched_cb_));
scheduler.SetConfig(config_info_);
ret = scheduler.Schedule(&kernels_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Schedule kernels failed: " << ret;
is_running_.store(false);
return ret;
}
infer_along_running_ = infer_along_running_ && !is_control_flow_ && !is_train_session_;
InitGraphInOutTensorsMap(model);
non_tail_call_kernels_ = scheduler.NonTailCallNodes();
ret = PrepareKernels(model);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare kernels failed: " << ret;
is_running_.store(false);
return ret;
}
if (is_train_session_) {
is_running_.store(false);
return RET_OK;
}
ret = InitExecutor();
if (ret != RET_OK) {
MS_LOG(ERROR) << "InitExecutor failed: " << ret;
is_running_.store(false);
return ret;
}
FreePackOpWeight(kernels_);
ret = RuntimeAllocatorInit();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Runtime allocator init failed.";
is_running_.store(false);
return ret;
}
infer_along_running_ = infer_along_running_ && (runtime_allocator_ == nullptr);
if (infer_along_running_) {
this->context_->set_infer_checker(InferCheckerAll);
}
is_running_.store(false);
#if defined(LINUX_RUNTIME)
(void)malloc_trim(0);
#endif
return RET_OK;
}
bool LiteSession::IsIsolatedSubGraph(const kernel::KernelExec *kernel) {
auto cur_in_tensors = kernel->in_tensors();
for (auto cur_kernel : this->kernels_) {
if (cur_kernel == kernel) {
continue;
}
auto out_tensors = cur_kernel->out_tensors();
for (auto tensor : cur_in_tensors) {
if (IsContain(out_tensors, tensor)) {
return false;
}
}
}
return true;
}
int LiteSession::SetAllocatorForDelegateKernels(const kernel::KernelExec *kernel) {
if (kernel == nullptr) {
return RET_NULL_PTR;
}
for (auto input : kernel->in_tensors()) {
CHECK_NULL_RETURN(input);
input->set_allocator(this->context_->allocator);
}
for (auto output : kernel->out_tensors()) {
CHECK_NULL_RETURN(output);
output->set_allocator(this->context_->allocator);
}
return RET_OK;
}
int LiteSession::PrepareKernels(const Model *model) {
// find kernel's in_kernels and out_kernels in every subgraph
kernel::KernelExecUtil::FindAllInoutKernelsInSubgraphKernel(this->kernels_);
// find in_kernels and out_kernels between subgraph kernels
kernel::KernelExecUtil::FindAllInoutKernels(this->kernels_);
// init init_ref_count for subgraphs and kernels
auto ret = SetTensorInitRefCount(model);
if (ret != RET_OK) {
MS_LOG(ERROR) << "SetTensorInitRefCount failed.";
return ret;
}
for (auto kernel : this->kernels_) {
if (kernel->desc().arch == kernel::kDelegate) {
ret = SetAllocatorForDelegateKernels(kernel);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare kernel " << kernel->name() << " failed: " << ret;
return ret;
}
}
if (!is_train_session_ && kernel->desc().arch != kernel::kDelegate && kernel->desc().arch != kernel::kGPU) {
auto subgraph_kernel = static_cast<kernel::SubGraphKernel *>(kernel);
if (subgraph_kernel == nullptr) {
MS_LOG(ERROR) << "kernel: " << kernel->name() << " not is subgraph kernel.";
return RET_ERROR;
}
for (auto &node : subgraph_kernel->nodes()) {
ret = node->Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "node: " << node->name() << " prepare failed.";
return ret;
}
}
}
ret = kernel->Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare kernel " << kernel->name() << " failed: " << ret;
return ret;
}
}
return RET_OK;
}
int LiteSession::SetTensorInitRefCount(const Model *model) {
for (auto *kernel : this->kernels_) {
kernel->InitOutTensorInitRefCount();
if (kernel->desc().arch == kernel::kDelegate) {
continue;
}
if (IsIsolatedSubGraph(kernel)) {
static_cast<kernel::SubGraphKernel *>(kernel)->InitInputTensorInitRefCount();
}
}
if (!non_tail_call_kernels_.empty()) {
return SetNonTaiCallSubgraphOutputInitRefCount(non_tail_call_kernels_);
}
return RET_OK;
}
int LiteSession::SetNonTaiCallSubgraphOutputInitRefCount(
const std::vector<kernel::KernelExec *> &non_tail_call_kernels) {
for (auto call_kernel : non_tail_call_kernels_) {
auto call_output = call_kernel->out_tensors();
auto all_out_subgraphs = kernel::KernelExecUtil::GetCallInputPartialsCorrespondingOutputSubgraph(call_kernel);
for (auto subgraph : all_out_subgraphs) {
MS_CHECK_TRUE_MSG(subgraph->out_tensors().size() == call_output.size(), RET_ERROR,
"non tail call output size is not same as subgraph output.");
std::set<Tensor *> subgraph_outputs_set{};
for (size_t i = 0; i < subgraph->out_tensors().size(); ++i) {
auto output = subgraph->out_tensors()[i];
if (subgraph_outputs_set.find(output) == subgraph_outputs_set.end()) {
output->set_init_ref_count(1);
subgraph_outputs_set.insert(output);
} else {
output->set_init_ref_count(output->init_ref_count() + 1);
}
}
}
}
return RET_OK;
}
std::vector<mindspore::lite::Tensor *> LiteSession::GetInputs() const { return this->input_vec_; }
int LiteSession::RunGraph(const KernelCallBack &before, const KernelCallBack &after) {
bool expected = false;
if (!is_running_.compare_exchange_strong(expected, true)) {
MS_LOG(ERROR) << "Not support multi-threading";
return RET_ERROR;
}
STATUS ret = CheckTensorsInvalid(inputs_);
if (MS_UNLIKELY(ret != RET_OK)) {
is_running_.store(false);
MS_LOG(ERROR) << "CheckInputs failed.";
return ret;
}
ret = CheckGraphInputShapes(inputs_, input_shape_map_);
if (MS_UNLIKELY(ret != RET_OK)) {
is_running_.store(false);
MS_LOG(ERROR) << "Check graph input shapes failed.";
return ret;
}
MS_ASSERT(this->context_ != nullptr);
ret = executor_->Run(this->inputs_, this->outputs_, this->kernels_, before, after);
if (MS_UNLIKELY(ret != RET_OK)) {
MS_LOG(ERROR) << "RunGraph failed : " << ret;
}
if (infer_along_running_) {
this->context_->set_infer_checker(InferCheckerInput);
for (auto input : inputs_) {
input->set_shape_changed(false);
}
}
is_running_.store(false);
return ret;
}
int LiteSession::ContextInit(InnerContext *context) {
if (context == nullptr) {
MS_LOG(ERROR) << "context is nullptr";
return RET_NULL_PTR;
}
this->context_ = context;
std::string runner_id;
if (config_info_ != nullptr) {
auto it_id = config_info_->find(kInnerIDs);
if (it_id != config_info_->end()) {
auto item_runner = it_id->second.find(kInnerRunnerID);
if (item_runner != it_id->second.end()) {
runner_id = it_id->second.at(kInnerRunnerID);
}
}
}
context_->SetBindRunnerId(runner_id);
auto ret = this->context_->Init();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init Context failed";
return ret;
}
ms_context_ = MSContextFromContext(context);
if (ms_context_ == nullptr) {
MS_LOG(ERROR) << "transfer context to ms context failed.";
return RET_NULL_PTR;
}
#ifdef MS_COMPILE_IOS
context_->thread_pool()->SetMaxSpinCount(kDefaulLiteIosSpinCount);
context_->thread_pool()->SetMinSpinCount(kDefaulLiteIosSpinCount);
#endif
#ifdef PARALLEL_INFERENCE
if (context_->inter_op_parallel_num_ > 1 && !runner_id.empty() &&
ParallelThreadPoolManager::GetInstance()->GetEnableSharedThreadPool(runner_id)) {
MS_LOG(INFO) << "Enable subgraph parallelism and enable thread pool sharing";
ParallelThreadPoolManager::GetInstance()->BindPoolToRunner(context_->thread_pool(), config_info_);
}
#endif
return RET_OK;
}
int LiteSession::CreateTensorRTDelegate() {
#if GPU_TENSORRT
std::string cache_model_path;
std::string serialize_path;
size_t vocab_size = 0;
size_t device_cache_size = 0;
std::map<std::string, std::string> ms_cache;
std::map<std::string, std::string> input_ranges;
if (config_info_ != nullptr) {
auto input_ranges_iter = config_info_->find(kGPUContext);
if (input_ranges_iter != config_info_->end()) {
input_ranges = input_ranges_iter->second;
}
auto ms_cache_iter = config_info_->find(kMSCache);
if (ms_cache_iter != config_info_->end()) {
ms_cache = ms_cache_iter->second;
auto model_path_iter = ms_cache.find(kMSCacheModelPath);
if (model_path_iter != ms_cache.end()) {
cache_model_path = model_path_iter->second;
}
auto vocab_size_iter = ms_cache.find(kMSCacheVocabSize);
if (vocab_size_iter != ms_cache.end()) {
auto vocab_size_opt = GenericParseValue<size_t>(vocab_size_iter->second);
if (!vocab_size_opt.IsNone()) {
vocab_size = vocab_size_opt.Get();
}
}
auto device_cache_size_iter = ms_cache.find(kMSCacheDeviceSize);
if (device_cache_size_iter != ms_cache.end()) {
auto device_cache_size_opt = GenericParseValue<size_t>(device_cache_size_iter->second);
if (!device_cache_size_opt.IsNone()) {
device_cache_size = device_cache_size_opt.Get();
}
}
auto serialize_path_iter = ms_cache.find(kMSCacheSerializePath);
if (serialize_path_iter != ms_cache.end()) {
auto serialize_path_opt = GenericParseValue<std::string>(serialize_path_iter->second);
if (!serialize_path_opt.IsNone()) {
serialize_path = serialize_path_opt.Get();
}
}
}
}
delegate_ = std::make_shared<TensorRTDelegate>(ms_context_, cache_model_path, vocab_size, device_cache_size,
serialize_path, input_ranges);
if (delegate_ == nullptr) {
MS_LOG(ERROR) << "New tensorrt delegate_ failed";
return RET_ERROR;
}
delegate_device_type_ = DT_GPU;
this->context_->delegate = delegate_;
#endif
return RET_OK;
}
int LiteSession::CreateNPUDelegate() {
#if SUPPORT_NPU
delegate_ = std::make_shared<NPUDelegate>(context_->GetDeviceInfo(DT_NPU).npu_device_info_);
if (delegate_ == nullptr) {
MS_LOG(ERROR) << "New delegate_ failed";
return RET_ERROR;
}
delegate_device_type_ = DT_NPU;
this->context_->delegate = delegate_;
#endif
return RET_OK;
}
int LiteSession::DelegateInit() {
#ifndef DELEGATE_CLIP
if (context_->delegate != nullptr) {
delegate_ = context_->delegate;
delegate_device_type_ = -1;
} else {
if (context_->IsDeviceTypeEnabled(DT_NPU)) {
auto ret = CreateNPUDelegate();
if (ret != RET_OK) {
return ret;
}
}
if (context_->IsDeviceTypeEnabled(DT_GPU)) {
auto ret = CreateTensorRTDelegate();
if (ret != RET_OK) {
return ret;
}
}
}
if (delegate_ != nullptr) {
auto delegate_ret = delegate_->Init();
if (delegate_ret == mindspore::kLiteNotSupport) {
MS_LOG(DEBUG) << "Delegate is unsupported";
delegate_.reset();
delegate_ = nullptr;
} else if (delegate_ret == mindspore::kSuccess) {
MS_LOG(INFO) << "Delegate init successfully";
} else {
MS_LOG(ERROR) << "Delegate init failed";
return RET_ERROR;
}
}
#endif
return RET_OK;
}
int LiteSession::Init(InnerContext *context) {
bool expected = false;
if (!is_running_.compare_exchange_strong(expected, true)) {
delete context;
context = nullptr;
MS_LOG(ERROR) << "Not support multi-threading";
return RET_ERROR;
}
auto ret = ContextInit(context);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init Context failed";
is_running_.store(false);
return ret;
}
ret = DelegateInit();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init delegate failed.";
is_running_.store(false);
return ret;
}
ret = InitGPURuntime();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init GPU runtime failed.";
is_running_.store(false);
return ret;
}
is_running_.store(false);
return RET_OK;
}
void LiteSession::BindThread(bool if_bind) {
// Abandoned code
// Bind thread in executor
return;
}
LiteSession::~LiteSession() {
delegate_.reset();
bool expected = false;
if (!is_running_.compare_exchange_strong(expected, true)) {
MS_LOG(ERROR) << "Not support multi-threading";
return;
}
for (auto *kernel : kernels_) {
delete kernel;
kernel = nullptr;
}
for (auto tensor : tensors_) {
if (tensor == nullptr) {
continue;
}
// Data of const tensor which doesn't own data will not freed.
// Such as const data from meta_graph which will be freed when freeing meta_graph.
if (tensor->IsConst() && !tensor->own_data()) {
tensor->set_data(nullptr);
}
/* situation : user set graph-output-tensor data */
if (tensor->IsGraphOutput() && tensor->allocator() == nullptr) {
tensor->set_data(nullptr);
}
delete tensor;
tensor = nullptr;
}
for (auto item : isolate_graph_output_map_) {
auto isolate_output_tensor = item.first;
isolate_output_tensor->set_data(nullptr);
delete isolate_output_tensor;
isolate_output_tensor = nullptr;
}
for (auto map : isolate_input_map_) {
auto isolate_input_tensor = map.first;
isolate_input_tensor->set_data(nullptr);
delete isolate_input_tensor;
}
// Tensor * in input_map output_map are freed in tensors
input_map_.clear();
input_shape_map_.clear();
output_node_map_.clear();
output_tensor_map_.clear();
input_vec_.clear();
isolate_graph_output_map_.clear();
delete this->executor_;
this->executor_ = nullptr;
#if GPU_OPENCL
delete opencl_runtime_wrapper_;
opencl_runtime_wrapper_ = nullptr;
#endif
delete ms_context_;
ms_context_ = nullptr;
delete this->context_;
this->context_ = nullptr;
delete (model_);
model_ = nullptr;
is_running_.store(false);
}
mindspore::lite::Tensor *LiteSession::GetInputsByTensorName(const std::string &name) const {
auto ret = input_map_.find(name);
if (ret == input_map_.end()) {
MS_LOG(WARNING) << "Tensor " << name << " is not exist";
return nullptr;
}
return ret->second;
}
std::vector<mindspore::lite::Tensor *> LiteSession::GetOutputsByNodeName(const std::string &node_name) const {
auto ret = output_node_map_.find(node_name);
if (ret == output_node_map_.end()) {
MS_LOG(WARNING) << "Node " << node_name << " is not an output node";
std::vector<mindspore::lite::Tensor *> empty_ret;
return empty_ret;
}
return ret->second;
}
std::vector<std::string> LiteSession::GetOutputTensorNames() const { return this->output_tensor_names_; }
mindspore::lite::Tensor *LiteSession::GetOutputByTensorName(const std::string &tensor_name) const {
auto ret = output_tensor_map_.find(tensor_name);
if (ret == output_tensor_map_.end()) {
MS_LOG(WARNING) << "Tensor " << tensor_name << " is not an output node";
return nullptr;
}
return ret->second;
}
std::unordered_map<std::string, mindspore::lite::Tensor *> LiteSession::GetOutputs() const {
return this->output_tensor_map_;
}
int LiteSession::UpdateInputShapeMap() {
for (auto input : inputs_) {
MS_CHECK_TRUE_MSG(input != nullptr, RET_ERROR, "graph input tensor is nullptr.");
if (input_shape_map_.find(input) != input_shape_map_.end()) {
input_shape_map_.at(input) = input->shape();
} else {
MS_LOG(ERROR) << "can't find " << input->tensor_name() << " in input_shape_map";
return RET_ERROR;
}
}
return RET_OK;
}
int LiteSession::ResizeInputs(const std::vector<mindspore::lite::Tensor *> &inputs,
const std::vector<std::vector<int>> &dims) {
if (inputs.size() != inputs_.size()) {
MS_LOG(ERROR) << "Inputs size " << inputs.size() << " is not equal to " << inputs_.size();
return RET_PARAM_INVALID;
}
if (dims.size() != inputs.size()) {
MS_LOG(ERROR) << "Input dims size " << dims.size() << " is not equal to the inputs size " << inputs.size();
return RET_PARAM_INVALID;
}
for (size_t i = 0; i < inputs.size(); ++i) {
if (inputs[i] != inputs_[i]) {
MS_LOG(ERROR) << "Input[" << i << "] tensor is not equal to the inputs have been saved!";
return RET_PARAM_INVALID;
}
inputs_[i]->FreeData();
if (infer_along_running_ && !inputs_[i]->get_shape_changed()) {
inputs_[i]->set_shape_changed(dims[i] != inputs_[i]->shape());
}
inputs_[i]->set_shape(dims[i]);
}
if (!is_train_session_) {
executor_->Resize(inputs, dims);
}
return RET_OK;
}
void LiteSession::ResetInputsShape(const std::vector<std::vector<int>> &dims) {
for (size_t i = 0; i < inputs_.size(); ++i) {
inputs_[i]->FreeData();
inputs_[i]->set_shape(dims[i]);
inputs_[i]->set_shape_changed(false);
}
}
int LiteSession::ReSizeKernels(const std::vector<kernel::KernelExec *> &kernels,
const std::unordered_map<Tensor *, Tensor *> &isolate_input_map) {
for (auto kernel : kernels) {
if (kernel == nullptr) {
MS_LOG(ERROR) << "input kernel is nullptr!";
return RET_ERROR;
}
auto ret = RET_OK;
if (kernel->desc().arch == kernel::kDelegate) {
ret = kernel->ReSize();
} else {
// resize subgraph inputs
auto sub_graph_kernel = reinterpret_cast<kernel::SubGraphKernel *>(kernel);
for (auto input : sub_graph_kernel->in_tensors()) {
if (isolate_input_map.find(input) != isolate_input_map.end()) {
input->set_shape(isolate_input_map.at(input)->shape());
}
}
if (kernel->subgraph_type() == kernel::kGpuFp16SubGraph || kernel->subgraph_type() == kernel::kGpuFp32SubGraph) {
#if GPU_OPENCL
auto sub_graph = reinterpret_cast<kernel::OpenCLSubGraph *>(kernel);
ret = sub_graph->ReSize(false);
#endif
} else {
auto sub_graph = reinterpret_cast<kernel::SubGraphKernel *>(kernel);
ret = sub_graph->ReSize();
}
}
if (ret == RET_INFER_INVALID) {
MS_LOG(DEBUG) << "InferShape is interrupted";
continue;
}
if (ret != RET_OK) {
MS_LOG(ERROR) << "ReSize node " << kernel->name() << " failed";
return RET_ERROR;
}
}
return RET_OK;
}
void LiteSession::SynIsolateInOutputDataType() {
for (auto &tensor_map : isolate_input_map_) {
auto dst_tensor = tensor_map.second;
auto src_tensor = tensor_map.first;
src_tensor->set_data_type(dst_tensor->data_type());
}
for (auto &tensor_map : isolate_graph_output_map_) {
auto dst_tensor = tensor_map.second;
auto src_tensor = tensor_map.first;
src_tensor->set_data_type(dst_tensor->data_type());
}
}
int LiteSession::BindGLTexture2DMemory(const std::map<std::string, unsigned int> &inputGLTexture,
std::map<std::string, unsigned int> *outputGLTexture) {
#if GPU_OPENCL
if (!this->context_->GetDeviceInfo(DT_GPU).gpu_device_info_.enable_gl_texture_) {
MS_LOG(ERROR) << "the context isn't set to support OpenGL texture";
return RET_ERROR;
}
for (const auto &[name, GLTexture_id] : inputGLTexture) {
auto iter = input_map_.find(name);
if (iter == input_map_.end()) {
MS_LOG(ERROR) << "the in tensor name " << name << "is not match any model input name";
return RET_ERROR;
}
auto in_data = iter->second->MutableData();
if (in_data == nullptr) {
std::cout << "MallocData for input Tensor failed" << std::endl;
return RET_ERROR;
}
memcpy(in_data, &GLTexture_id, sizeof(cl_GLuint));
iter->second->set_data_type(kNumberTypeGLUInt);
}
for (auto [name, GLTexture_id] : *outputGLTexture) {
auto iter = output_tensor_map_.find(name);
if (iter == output_tensor_map_.end()) {
MS_LOG(ERROR) << "the out tensor name " << name << "is not match any model output name";
return RET_ERROR;
}
auto out_data = iter->second->MutableData();
if (out_data == nullptr) {
std::cout << "MallocData for input Tensor failed" << std::endl;
return RET_ERROR;
}
memcpy(out_data, &GLTexture_id, sizeof(cl_GLuint));
iter->second->set_data_type(kNumberTypeGLUInt);
}
#ifdef ENABLE_MINDRT
SynIsolateInOutputDataType(); // Synchronized input/output with isolate input/output data types
#endif
if (this->kernels_.size() != 1) {
MS_LOG(ERROR) << "Now only support one opencl subgraph if you want to input opengl texture";
return RET_ERROR;
}
auto opencl_subgraph = reinterpret_cast<kernel::OpenCLSubGraph *>(kernels_.front());
for (size_t i = 0; i < outputs_.size(); i++) {
(opencl_subgraph)->set_out_tensor(outputs_[i], i);
}
for (auto node : opencl_subgraph->out_nodes()) {
node->set_out_tensors(opencl_subgraph->out_tensors());
}
#endif
return RET_OK;
}
int LiteSession::Resize(const std::vector<mindspore::lite::Tensor *> &inputs,
const std::vector<std::vector<int>> &dims) {
bool expected = false;
if (!is_running_.compare_exchange_strong(expected, true)) {
MS_LOG(ERROR) << "Not support multi-threading";
return RET_ERROR;
}
std::vector<std::vector<int>> old_dims;
for (size_t i = 0; i < inputs_.size(); ++i) {
old_dims.push_back(inputs_[i]->shape());
}
auto ret = ResizeInputs(inputs, dims);
if (ret != RET_OK) {
ResetInputsShape(old_dims);
is_running_.store(false);
return ret;
}
ret = UpdateInputShapeMap();
if (ret != RET_OK) {
MS_LOG(ERROR) << "update input shape map failed.";
return RET_ERROR;
}
if (infer_along_running_) {
is_running_.store(false);
return ret;
}
ret = ReSizeKernels(kernels_, isolate_input_map_);
if (ret != RET_OK) {
ResetInputsShape(old_dims);
auto resize_ret = ReSizeKernels(kernels_);
if (resize_ret != RET_OK) {
MS_LOG(ERROR) << "restore kernel size fail!ret: " << resize_ret;
}
is_running_.store(false);
return ret;
}
if (RuntimeAllocatorInit() != RET_OK) {
MS_LOG(ERROR) << "Runtime allocator in resize failed.";
is_running_.store(false);
return RET_ERROR;
}
auto status = GraphOptimizePass(&kernels_);
if (status != RET_OK) {
MS_LOG(ERROR) << "GraphOptimizePass failed.";
return RET_ERROR;
}
is_running_.store(false);
return RET_OK;
}
int LiteSession::PreCheck(Model *model) {
bool expected = false;
if (!is_running_.compare_exchange_strong(expected, true)) {
MS_LOG(ERROR) << "Not support multi-threading";
return RET_ERROR;
}
if (model == nullptr) {
MS_LOG(ERROR) << "The input model is nullptr.";
return RET_PARAM_INVALID;
}
if (model->buf == nullptr) {
MS_LOG(ERROR) << "The input model buf is nullptr.";
return RET_PARAM_INVALID;
}
if (model->model_type_ != ModelType_MSLite) {
// abstract base model
if (!reinterpret_cast<AbstractBaseModel *>(model)->ModelVerify()) {
MS_LOG(ERROR) << "wrong model input, please check";
return RET_ERROR;
}
} else {
// old routine, convert to abstract base model
if (!reinterpret_cast<LiteModel *>(model)->ModelVerify()) {
MS_LOG(ERROR) << "wrong model input, please check";
return RET_ERROR;
}
}
#ifndef ENABLE_FP16
if (context_->GetDeviceInfo(DT_CPU).cpu_device_info_.enable_float16_) {
MS_LOG(WARNING) << unsupport_fp16_log;
}
#endif
return RET_OK;
}
int LiteSession::InitExecutor() {
int ret;
#ifdef ENABLE_MINDRT
if (ms_context_->GetThreadNum() == 1 && !context_->IsCpuFloat16Enabled()) {
executor_ = new (std::nothrow) Executor();
} else {
ret = IsolateOutputTensor();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Isolate output tensor failed.";
return ret;
}
executor_ = new (std::nothrow) MindrtExecutor(&isolate_graph_output_map_, &isolate_input_map_);
}
#else
executor_ = new (std::nothrow) Executor();
#endif
if (executor_ == nullptr) {
MS_LOG(ERROR) << "New Executor failed";
return RET_ERROR;
}
ret = executor_->Prepare(kernels_, inputs_, outputs_, context_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare executor failed: " << ret;
return ret;
}
return RET_OK;
}
int LiteSession::RuntimeAllocatorValid() {
#ifdef ENABLE_ARM32
MS_LOG(DEBUG) << "Not support runtime allocator in arm32.";
return RET_ERROR;
#endif
#ifndef ENABLE_MINDRT
MS_LOG(DEBUG) << "Not support runtime allocator in converter.";
return RET_ERROR;
#endif
#ifdef BFC_MEMORY
MS_LOG(DEBUG) << "Not support runtime allocator when BFC_MEMORY on.";
return RET_ERROR;
#endif
if ((context_->enable_parallel_ == true) || (context_->inter_op_parallel_num_ > 1)) {
MS_LOG(DEBUG) << "Not support runtime allocator in subgraph parallel.";
return RET_ERROR;
}
if (is_train_session_ == true) {
MS_LOG(DEBUG) << "Not support runtime allocator in train session.";
return RET_ERROR;
}
if (is_infershape_ != RET_OK) {
MS_LOG(DEBUG) << "Not support runtime allocator in runtime-infershape.";
return RET_ERROR;
}
if (kernels_.size() != 1) {
MS_LOG(DEBUG) << "Not support runtime allocator in random subgraph sort";
return RET_ERROR;
}
#ifdef ENABLE_ARM64
MS_LOG(DEBUG) << "support runtime allocator.";
return RET_OK;
#endif
return RET_ERROR;
}
void LiteSession::RuntimeAllocatorInitGraphOutput() {
AllocatorPtr default_allocator = context_->allocator;
for (auto graph_out : isolate_graph_output_map_) {
auto cal_t = graph_out.first;
auto out_t = graph_out.second;
if (cal_t->allocator() != runtime_allocator_ || out_t->allocator() != default_allocator) {
continue;
}
out_t->set_allocator(runtime_allocator_);
if (cal_t->data_type() != out_t->data_type()) {
runtime_allocator_->MallocTensorData(out_t);
}
}
return;
}
void RuntimeAllocatorInitSubgraphInputs(const kernel::KernelExec *subgraph, const AllocatorPtr &default_allocator,
const RuntimeAllocatorPtr &runtime_allocator,
const std::unordered_map<Tensor *, Tensor *> &isolate_input_map,
std::unordered_map<Tensor *, int> *tensor_ref_count,
std::unordered_map<size_t, int> *data_ref_count) {
MS_ASSERT(subgraph != nullptr && tensor_ref_count != nullptr && data_ref_count != nullptr);
for (auto in_tensor : subgraph->in_tensors()) {
auto iter = isolate_input_map.find(in_tensor);
if (isolate_input_map.end() == iter) break;
auto src_t = iter->second;
if (src_t->data_type() == in_tensor->data_type()) {
in_tensor->set_allocator(src_t->allocator());
if (src_t->allocator() == runtime_allocator) {
(*tensor_ref_count)[in_tensor] = in_tensor->init_ref_count();
(*data_ref_count)[runtime_allocator->GetOffsetMap().at(src_t)] += in_tensor->init_ref_count();
runtime_allocator->SetDataOffset(in_tensor, runtime_allocator->GetOffsetMap().at(src_t));
}
} else {
if (in_tensor->allocator() == default_allocator) {
in_tensor->set_allocator(runtime_allocator);
runtime_allocator->MallocTensorData(in_tensor);
(*tensor_ref_count)[in_tensor] = in_tensor->init_ref_count();
(*data_ref_count)[runtime_allocator->GetOffsetMap().at(in_tensor)] = in_tensor->init_ref_count();
}
}
if (src_t->allocator() != runtime_allocator) {
continue;
}
(*tensor_ref_count)[src_t]--;
(*data_ref_count)[runtime_allocator->GetOffsetMap().at(src_t)]--;
if ((*tensor_ref_count)[src_t] <= 0) {
if ((*data_ref_count)[runtime_allocator->GetOffsetMap().at(src_t)] <= 0) {
runtime_allocator->FreeTensorData(src_t);
}
}
}
}
void LiteSession::RuntimeAllocatorInitSubgraph() {
AllocatorPtr default_allocator = context_->allocator;
std::unordered_map<lite::Tensor *, int> tensor_ref_count;
std::unordered_map<size_t, int> data_ref_count;
for (auto subgraph : kernels_) {
if (subgraph->desc().arch != kernel::KERNEL_ARCH::kCPU) {
continue;
}
RuntimeAllocatorInitSubgraphInputs(subgraph, default_allocator, runtime_allocator_, isolate_input_map_,
&tensor_ref_count, &data_ref_count);
auto kernel_list = reinterpret_cast<kernel::SubGraphKernel *>(subgraph)->nodes();
for (auto kernel : kernel_list) {
/* malloc for output */
for (auto tensor : kernel->out_tensors()) {
if (tensor->allocator() != default_allocator || tensor->IsConst()) {
continue;
}
tensor->set_allocator(runtime_allocator_);
runtime_allocator_->MallocTensorData(tensor);
tensor_ref_count[tensor] = tensor->init_ref_count();
data_ref_count[runtime_allocator_->GetOffsetMap().at(tensor)] = tensor->init_ref_count();
}
/* free input after run */
for (auto tensor : kernel->in_tensors()) {
if (tensor->allocator() != runtime_allocator_) {
continue;
}
tensor_ref_count[tensor]--;
data_ref_count[runtime_allocator_->GetOffsetMap().at(tensor)]--;
if (tensor_ref_count[tensor] <= 0 && tensor->allocator() == runtime_allocator_) {
if (data_ref_count[runtime_allocator_->GetOffsetMap().at(tensor)] <= 0) {
runtime_allocator_->FreeTensorData(tensor);
}
}
}
}
}
return;
}
int LiteSession::RuntimeAllocatorInit() {
if (RuntimeAllocatorValid() != RET_OK) {
return RET_OK;
}
if (ExistCustomCpuKernel()) {
return RET_OK;
}
if (runtime_allocator_ == nullptr) {
runtime_allocator_ = std::shared_ptr<RuntimeAllocator>(new (std::nothrow) RuntimeAllocator());
} else {
runtime_allocator_->Clear(context_->allocator);
}
if (runtime_allocator_ == nullptr) {
MS_LOG(ERROR) << "RuntimeAllocator is null.";
return RET_ERROR;
}
RuntimeAllocatorInitSubgraph();
RuntimeAllocatorInitGraphOutput();
auto ret = RuntimeAllocatorSetData();
if (ret != RET_OK) {
MS_LOG(ERROR) << "using optimize allocator failed.";
return ret;
}
return RET_OK;
}
int LiteSession::RuntimeAllocatorSetData() {
void *data = runtime_allocator_->MallocOptData();
if (data == nullptr) {
MS_LOG(ERROR) << "malloc optimize data failed.";
return RET_ERROR;
}
int8_t *int8_data = reinterpret_cast<int8_t *>(data);
auto offset_map = runtime_allocator_->GetOffsetMap();
for (auto &iter : offset_map) {
auto tensor = iter.first;
if (tensor->allocator() != runtime_allocator_) {
return RET_ERROR;
}
tensor->set_data(int8_data + iter.second);
}
return RET_OK;
}
int LiteSession::InitGPURuntime() {
if (context_->IsDeviceTypeEnabled(DT_CPU)) {
CpuBindMode cpu_bind_mode = context_->GetDeviceInfo(DT_CPU).cpu_device_info_.cpu_bind_mode_;
ThreadPool *thread_pool = this->context_->thread_pool();
if (thread_pool == nullptr) {
MS_LOG(ERROR) << "thread pool is nullptr";
is_running_.store(false);
return RET_NULL_PTR;
}
thread_pool->SetProcessAffinity(static_cast<BindMode>(cpu_bind_mode));
}
#if GPU_OPENCL
if (this->context_->IsDeviceTypeEnabled(DT_GPU)) {
opencl_runtime_wrapper_ = new (std::nothrow) opencl::OpenCLRuntimeInnerWrapper();
if (opencl_runtime_wrapper_ == nullptr) {
MS_LOG(ERROR) << "create OpenCLRuntimeInnerWrapper failed";
return RET_ERROR;
}
const auto &gpu_device_info = this->context_->GetDeviceInfo(DT_GPU).gpu_device_info_;
auto opencl_runtime = opencl_runtime_wrapper_->GetInstance();
opencl_runtime->SetGLTextureEnable(gpu_device_info.enable_gl_texture_);
opencl_runtime->SetGLContext(gpu_device_info.gl_context_);
opencl_runtime->SetGLDisplay(gpu_device_info.gl_display_);
if (opencl_runtime->Init() != RET_OK) {
if (gpu_device_info.enable_gl_texture_) {
MS_LOG(ERROR) << "Init OpenCL runtime failed, enable_gl_texture set true, only support GPU mode.";
return RET_ERROR;
}
this->context_->device_list_ = {{DT_CPU, {gpu_device_info.enable_float16_, MID_CPU}}};
MS_LOG(WARNING) << "Init OpenCL runtime failed, change to CPU mode.";
} else {
MS_LOG(INFO) << "Init OpenCL runtime success.";
}
opencl_runtime->SetFp16Enable(gpu_device_info.enable_float16_);
/* check chip support shared memory */
auto enable_arm_import_memory = opencl_runtime->isExtensionEnable(EXT_ARM_IMPORT_MEMORY_HOST);
if (!enable_arm_import_memory) {
MS_LOG(WARNING) << "GPU do not support shared memory!";
}
}
#endif
// Setting the binding core will affect the opencl drive scheduling.
if (context_->IsDeviceTypeEnabled(DT_CPU)) {
ThreadPool *thread_pool = this->context_->thread_pool();
thread_pool->SetProcessAffinity(static_cast<BindMode>(NO_BIND));
}
return RET_OK;
}
} // namespace lite
lite::LiteSession *lite::LiteSession::CreateSession(const lite::Context *context) {
if (context == nullptr) {
return nullptr;
}
auto session = new (std::nothrow) lite::LiteSession();
if (session == nullptr) {
MS_LOG(ERROR) << "create session failed";
return nullptr;
}
mindspore::lite::InnerContext *inner_context = new (std::nothrow) mindspore::lite::InnerContext(context);
if (inner_context == nullptr) {
MS_LOG(ERROR) << "new inner context failed";
delete session;
return nullptr;
}
auto ret = session->Init(inner_context);
if (ret != mindspore::lite::RET_OK) {
MS_LOG(ERROR) << "init session failed";
delete session;
return nullptr;
}
return session;
}
lite::LiteSession *lite::LiteSession::CreateSession(const char *model_buf, size_t size, const lite::Context *context) {
auto *session = lite::LiteSession::CreateSession(context);
if (session == nullptr) {
MS_LOG(ERROR) << "Create session failed";
return nullptr;
}
auto ret = reinterpret_cast<lite::LiteSession *>(session)->LoadModelAndCompileByBuf(
model_buf, mindspore::ModelType::kMindIR_Lite, size);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init session failed";
delete session;
return nullptr;
}
return session;
}
lite::LiteSession *lite::LiteSession::CreateSession(const std::string &model_path, const lite::Context *context) {
auto *session = lite::LiteSession::CreateSession(context);
if (session == nullptr) {
MS_LOG(ERROR) << "Create session failed";
return nullptr;
}
auto ret = reinterpret_cast<lite::LiteSession *>(session)->LoadModelAndCompileByPath(
model_path, mindspore::ModelType::kMindIR_Lite);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init session failed";
delete session;
return nullptr;
}
return session;
}
mindspore::ModelType lite::LiteSession::LoadModelByBuff(const char *model_buf, const size_t &buf_size, char **lite_buf,
size_t *size, mindspore::ModelType model_type) {
if (model_type == mindspore::ModelType::kMindIR_Lite) {
*size = buf_size;
*lite_buf = const_cast<char *>(model_buf);
return mindspore::ModelType::kMindIR_Lite;
}
if (model_type != mindspore::ModelType::kMindIR) {
return mindspore::ModelType::kUnknownType;
}
flatbuffers::Verifier verify((const uint8_t *)model_buf, buf_size);
auto version_verify = lite::LiteModel::VersionVerify(&verify);
if (version_verify != SCHEMA_INVALID) {
MS_LOG(DEBUG) << "The kMindIR type model buffer is valid mslite model buffer";
*size = buf_size;
*lite_buf = const_cast<char *>(model_buf);
return mindspore::ModelType::kMindIR_Lite;
}
MS_LOG(WARNING) << "Invalid mslite model.";
return mindspore::ModelType::kMindIR;
}
mindspore::ModelType lite::LiteSession::LoadModelByBuff(const char *model_buf, const size_t &buf_size, char **lite_buf,
size_t *size, mindspore::ModelType model_type,
const std::shared_ptr<mindspore::Context> &ms_context) {
if (model_type == mindspore::ModelType::kMindIR_Lite) {
*size = buf_size;
*lite_buf = const_cast<char *>(model_buf);
return mindspore::ModelType::kMindIR_Lite;
}
if (model_type != mindspore::ModelType::kMindIR) {
return mindspore::ModelType::kUnknownType;
}
flatbuffers::Verifier verify((const uint8_t *)model_buf, buf_size);
auto version_verify = lite::LiteModel::VersionVerify(&verify);
if (version_verify != SCHEMA_INVALID) {
MS_LOG(DEBUG) << "The kMindIR type model buffer is valid mslite model buffer";
*size = buf_size;
*lite_buf = const_cast<char *>(model_buf);
return mindspore::ModelType::kMindIR_Lite;
}
MS_LOG(WARNING) << "Invalid mslite model.";
#ifdef RUNTIME_CONVERT
*lite_buf = RuntimeConvert(model_buf, buf_size, size, ms_context);
#else
MS_LOG(WARNING) << "Please enable runtime convert.";
#endif
#ifdef ENABLE_CLOUD_FUSION_INFERENCE
*size = buf_size;
*lite_buf = const_cast<char *>(model_buf);
#endif
return mindspore::ModelType::kMindIR;
}
const char *lite::LiteSession::LoadModelByPath(const std::string &file, mindspore::ModelType model_type, size_t *size) {
size_t buf_size;
auto model_buf = lite::ReadFile(file.c_str(), &buf_size);
if (model_buf == nullptr) {
MS_LOG(ERROR) << "The model path is invalid";
return model_buf;
}
char *lite_buf = nullptr;
auto buf_model_type = LoadModelByBuff(model_buf, buf_size, &lite_buf, size, model_type);
if (buf_model_type == mindspore::ModelType::kUnknownType || lite_buf == nullptr) {
return nullptr;
}
if (buf_model_type == mindspore::ModelType::kMindIR) {
delete[] model_buf;
model_buf = nullptr;
}
return lite_buf;
}
const char *lite::LiteSession::LoadModelByPath(const std::string &file, mindspore::ModelType model_type, size_t *size,
const std::shared_ptr<mindspore::Context> &ms_context) {
size_t buf_size;
auto model_buf = lite::ReadFile(file.c_str(), &buf_size);
if (model_buf == nullptr) {
MS_LOG(ERROR) << "The model path is invalid";
return model_buf;
}
char *lite_buf = nullptr;
auto buf_model_type = LoadModelByBuff(model_buf, buf_size, &lite_buf, size, model_type, ms_context);
if (buf_model_type == mindspore::ModelType::kUnknownType || lite_buf == nullptr) {
return nullptr;
}
return lite_buf;
}
int lite::LiteSession::LoadModelAndCompileByBuf(const char *model_buf, mindspore::ModelType model_type,
const size_t &buf_size) {
size_t lite_buf_size = 0;
char *lite_buf = nullptr;
auto buf_model_type = LoadModelByBuff(model_buf, buf_size, &lite_buf, &lite_buf_size, model_type);
if (buf_model_type == mindspore::ModelType::kUnknownType || lite_buf == nullptr) {
MS_LOG(ERROR) << "Invalid model_buf";
return RET_ERROR;
}
mindspore::lite::Model *model = nullptr;
model = lite::ImportFromBuffer(lite_buf, lite_buf_size, true, model_type);
if (model == nullptr) {
MS_LOG(ERROR) << "Import model failed";
return RET_ERROR;
}
auto ret = CompileGraph(model);
model->buf = nullptr;
if (buf_model_type == mindspore::ModelType::kMindIR) {
delete[] lite_buf;
lite_buf = nullptr;
}
if (ret != lite::RET_OK) {
MS_LOG(ERROR) << "Compile model failed";
delete model;
return RET_ERROR;
}
set_model(model);
return RET_OK;
}
std::string lite::LiteSession::ParseWeightPath() {
std::string weight_path = "";
if (config_info_ != nullptr) {
auto ms_weight = config_info_->find(kWeight);
if (ms_weight != config_info_->end()) {
auto ms_weight_iter = ms_weight->second;
if (ms_weight_iter.find(kWeightPath) != ms_weight_iter.end()) {
weight_path = ms_weight_iter[kWeightPath];
}
}
}
return weight_path;
}
int lite::LiteSession::LoadModelAndCompileByBuf(const char *model_buf, mindspore::ModelType model_type,
const size_t &buf_size,
const std::shared_ptr<mindspore::Context> &ms_context) {
size_t lite_buf_size = 0;
char *lite_buf = nullptr;
auto buf_model_type = LoadModelByBuff(model_buf, buf_size, &lite_buf, &lite_buf_size, model_type, ms_context);
if (buf_model_type == mindspore::ModelType::kUnknownType || lite_buf == nullptr) {
MS_LOG(ERROR) << "Invalid model_buf";
return RET_ERROR;
}
auto weight_path = ParseWeightPath();
auto *model = lite::ImportFromBuffer(lite_buf, lite_buf_size, true, model_type, weight_path);
if (model == nullptr) {
MS_LOG(ERROR) << "Import model failed";
return RET_ERROR;
}
auto status = lite::PackWeightManager::GetInstance()->InitPackWeightByBuf(model_buf, buf_size);
MS_CHECK_FALSE_MSG(status != RET_OK, RET_ERROR, "InitPackWeightByBuf failed.");
auto ret = CompileGraph(model);
model->buf = nullptr;
if (buf_model_type == mindspore::ModelType::kMindIR) {
delete[] lite_buf;
lite_buf = nullptr;
}
if (ret != lite::RET_OK) {
MS_LOG(ERROR) << "Compile model failed";
delete model;
return RET_ERROR;
}
set_model(model);
return RET_OK;
}
int lite::LiteSession::LoadModelAndCompileByPath(const std::string &model_path, mindspore::ModelType model_type) {
size_t model_size;
auto model_buf = LoadModelByPath(model_path, model_type, &model_size);
if (model_buf == nullptr) {
MS_LOG(ERROR) << "Read model file failed";
return RET_ERROR;
}
auto *model = lite::ImportFromBuffer(model_buf, model_size, true, model_type, model_path);
if (model == nullptr) {
MS_LOG(ERROR) << "Import model failed";
return RET_ERROR;
}
(reinterpret_cast<lite::LiteModel *>(model))->set_keep_model_buf(true);
auto ret = CompileGraph(model);
if (ret != lite::RET_OK) {
delete model;
MS_LOG(ERROR) << "Compile model failed";
return RET_ERROR;
}
set_model(model);
return RET_OK;
}
int lite::LiteSession::LoadModelAndCompileByPath(const std::string &model_path, mindspore::ModelType model_type,
const std::shared_ptr<mindspore::Context> &ms_context) {
size_t model_size;
auto model_buf = LoadModelByPath(model_path, model_type, &model_size, ms_context);
if (model_buf == nullptr) {
MS_LOG(ERROR) << "Read model file failed";
return RET_ERROR;
}
auto *model = lite::ImportFromBuffer(model_buf, model_size, true, model_type, model_path);
if (model == nullptr) {
MS_LOG(ERROR) << "Import model failed";
delete[] model_buf;
return RET_ERROR;
}
auto status = lite::PackWeightManager::GetInstance()->InitPackWeightByBuf(model_buf, model_size);
MS_CHECK_FALSE_MSG(status != RET_OK, RET_ERROR, "InitPackWeightByBuf failed.");
(reinterpret_cast<lite::LiteModel *>(model))->set_keep_model_buf(true);
auto ret = CompileGraph(model);
if (ret != lite::RET_OK) {
MS_LOG(ERROR) << "Compile model failed";
delete[] model_buf;
model->buf = nullptr;
delete model;
return RET_ERROR;
}
set_model(model);
return RET_OK;
}
} // namespace mindspore
Python
1
https://gitee.com/mindspore/mindspore.git
git@gitee.com:mindspore/mindspore.git
mindspore
mindspore
mindspore
r1.8

搜索帮助

53164aa7 5694891 3bd8fe86 5694891