diff --git a/mindspore-src/source/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.cc b/mindspore-src/source/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.cc index 28d46104e2b47d8fd97a40bc4156ec68448b0c0d..d80c4b10bc15c0438edef6bdbc91a41a9a4da974 100644 --- a/mindspore-src/source/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.cc +++ b/mindspore-src/source/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.cc @@ -59,6 +59,7 @@ Status NNRTDelegate::Build(DelegateModel *model) { } if (IsKirinNPUWithOfflineInference()) { MS_LOG(DEBUG) << "Choose to build offline inference model"; + build_offline_ = true; return BuildOfflineModel(model); } #endif diff --git a/mindspore-src/source/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.h b/mindspore-src/source/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.h index 764bf407d035274ec590336084ed29e943c50e4a..71cfc881970c144e47ace2ec161afe2f60e1e941 100644 --- a/mindspore-src/source/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.h +++ b/mindspore-src/source/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.h @@ -57,7 +57,7 @@ class NNRTDelegate : public Delegate { } static std::vector GetNNRTSubgraphRanges(DelegateModel *model, const std::vector &op_supports); - + bool IsBuildOffline() const { return build_offline_; } private: void InitExtensionOptions(); Status BuildNormalModel(DelegateModel *model); @@ -102,6 +102,7 @@ class NNRTDelegate : public Delegate { std::map dequant_schema_tensors_buffer_map_; std::vector replaced_schema_tensors_; void *hiai_handle_{nullptr}; + bool build_offline_ = false; }; } // namespace lite } // namespace mindspore diff --git a/mindspore-src/source/mindspore/lite/src/litert/lite_model.cc b/mindspore-src/source/mindspore/lite/src/litert/lite_model.cc index 5ffd5939eaf43278c25bdbf833cd88da648430df..97fd5c2452a567375ef642b498f368ba1a0893be 100644 --- a/mindspore-src/source/mindspore/lite/src/litert/lite_model.cc +++ b/mindspore-src/source/mindspore/lite/src/litert/lite_model.cc @@ -40,11 +40,20 @@ namespace { constexpr size_t kMaxModelBufferSize = static_cast(1024) * 1024 * 1024 * 2; } -void LiteModel::Free() { +void LiteModel::UnmapModelBuf() { + if (this->buf == nullptr) { + return; + } + if (this->model_buf_by_mmap_) { UnmapMmapBuffer(static_cast(this->buf), this->buf_size_); this->buf = nullptr; } +} + +void LiteModel::Free() { + UnmapModelBuf(); + if (this->buf != nullptr && !this->model_buf_by_mmap_) { delete[](this->buf); this->buf = nullptr; diff --git a/mindspore-src/source/mindspore/lite/src/litert/lite_model.h b/mindspore-src/source/mindspore/lite/src/litert/lite_model.h index c0847c1ea7bdc32f22e465c167b8be7685ae5432..3445b073e870376ea6cfae56f9f8acce7cc7845a 100644 --- a/mindspore-src/source/mindspore/lite/src/litert/lite_model.h +++ b/mindspore-src/source/mindspore/lite/src/litert/lite_model.h @@ -71,6 +71,7 @@ class MS_API LiteModel : public Model { #else bool PrepareInnerTensors(); #endif + void UnmapModelBuf(); private: bool CheckQuantAllInit(const flatbuffers::Vector> *quant_params); diff --git a/mindspore-src/source/mindspore/lite/src/litert/lite_session.cc b/mindspore-src/source/mindspore/lite/src/litert/lite_session.cc index 53fb37e4044a95f3751b0566cbc90544591b744b..f185d6b899f2ed073240b0158e9bc40893900df1 100644 --- a/mindspore-src/source/mindspore/lite/src/litert/lite_session.cc +++ b/mindspore-src/source/mindspore/lite/src/litert/lite_session.cc @@ -2141,6 +2141,13 @@ int lite::LiteSession::LoadModelAndCompileByPath(const std::string &model_path, return RET_ERROR; } set_model(model); +#ifdef SUPPORT_NNRT + if (context_->IsDeviceTypeEnabled(DT_NNRT)) { + if (delegate_ != nullptr && reinterpret_cast(delegate_.get())->IsBuildOffline()) { + (reinterpret_cast(model))->UnmapModelBuf(); + } + } +#endif return RET_OK; }