diff --git a/ops/framework/ms_kernels_internal/pyboost/internal_pyboost_runner.cc b/ops/framework/ms_kernels_internal/pyboost/internal_pyboost_runner.cc index f30de74da83a7e81ee2c9839eb4345bfdf6bac6d..fff0ee5941eee611b54e50dedad60977dd08fe70 100644 --- a/ops/framework/ms_kernels_internal/pyboost/internal_pyboost_runner.cc +++ b/ops/framework/ms_kernels_internal/pyboost/internal_pyboost_runner.cc @@ -26,27 +26,47 @@ size_t GetAlignedSize(size_t size) { namespace ms_custom_ops { void InternalPyboostRunner::GetOrCreateKernel(const TensorList &inputs, const TensorList &outputs) { + std::lock_guard op_guard(internal_op_lock_); + auto key = GetOrGenerateOpKey(op_key_); - auto it = hash_map_.find(key); - if (it != hash_map_.end()) { - internal_op_ = it->second; - MS_LOG(DEBUG) << "Internal Op [" << this->op_name() << "] hit cache"; - } else { + internal_v2::InternalOpPtr cached_op = nullptr; + + { + std::lock_guard guard(hash_map_lock_); + auto it = hash_map_.find(key); + if (it != hash_map_.end()) { + cached_op = it->second; + MS_LOG(DEBUG) << "Internal Op [" << this->op_name() << "] hit cache"; + } + } + + if (cached_op == nullptr) { MS_LOG(DEBUG) << "Internal Op [" << this->op_name() << "] miss cache"; TransDataType(inputs, outputs); UpdateArgImmutableInfo(&inputs_ii_, inputs, true); UpdateArgImmutableInfo(&outputs_ii_, outputs); - internal_op_ = CreateKernel(inputs_ii_, outputs_ii_); - MS_EXCEPTION_IF_NULL(internal_op_); - auto status = internal_op_->Init(); + + auto new_internal_op = CreateKernel(inputs_ii_, outputs_ii_); + MS_EXCEPTION_IF_NULL(new_internal_op); + auto status = new_internal_op->Init(); if (status != mindspore::internal_v2::kInternalOk) { - internal_op_ = nullptr; MS_LOG(EXCEPTION) << "Init internal kernel failed, kernel_name: " << this->op_name(); - return; } - hash_map_[key] = internal_op_; + + { + std::lock_guard guard(hash_map_lock_); + auto it = hash_map_.find(key); + if (it == hash_map_.end()) { + hash_map_[key] = new_internal_op; + cached_op = new_internal_op; + } else { + cached_op = it->second; + } + } } + internal_op_ = cached_op; + internal_inputs_shape_.clear(); internal_outputs_shape_.clear(); internal_inputs_shape_.resize(inputs.size()); @@ -66,6 +86,7 @@ void InternalPyboostRunner::GetOrCreateKernel(const TensorList &inputs, const Te } size_t InternalPyboostRunner::CalcWorkspace() { + std::lock_guard op_guard(internal_op_lock_); MS_EXCEPTION_IF_NULL(internal_op_); auto workspace_size_list = internal_op_->GetWorkspaceSize(); // all workspace will be aligned, like graph_mode @@ -190,6 +211,7 @@ void InternalPyboostRunner::UpdateArgImmutableInfo(std::vector op_guard(internal_op_lock_); auto workspace_ptr = this->workspace_ptr(); if (workspace_ptr == nullptr) { return; @@ -207,6 +229,7 @@ void InternalPyboostRunner::GetWorkspace(const internal_v2::InternalOpPtr &inter } void InternalPyboostRunner::LaunchKernel() { + std::lock_guard op_guard(internal_op_lock_); MS_EXCEPTION_IF_NULL(tiling_cache_item_); MS_EXCEPTION_IF_NULL(internal_op_); internal_v2::InputsAddrList inputs_addr; diff --git a/ops/framework/ms_kernels_internal/pyboost/internal_pyboost_runner.h b/ops/framework/ms_kernels_internal/pyboost/internal_pyboost_runner.h index 0ed36aa843c6f34e970e822f86a23b2e61eb614b..1d8fab185eb66467a0a2bcc09912a0e347cad308 100644 --- a/ops/framework/ms_kernels_internal/pyboost/internal_pyboost_runner.h +++ b/ops/framework/ms_kernels_internal/pyboost/internal_pyboost_runner.h @@ -85,6 +85,8 @@ class InternalPyboostRunner : public ms::pynative::PyboostRunner { uint64_t tiling_key_{0}; internal_v2::InternalOpPtr internal_op_{nullptr}; inline static std::unordered_map hash_map_; + inline static SimpleSpinLock hash_map_lock_; + inline static SimpleSpinLock internal_op_lock_; internal_v2::DtypeInfoList internal_inputs_dtype_; internal_v2::DtypeInfoList internal_outputs_dtype_; internal_v2::ShapeInfoList internal_inputs_shape_;