From bfa401be4830823167e3792f90ad8c40fdce641a Mon Sep 17 00:00:00 2001 From: YzLi Date: Thu, 6 Nov 2025 19:36:54 +0800 Subject: [PATCH] remove useless code for GE mode in SuperKernelActor --- .../core/actors/base/super_kernel_actor.cc | 441 +----------------- .../core/actors/base/super_kernel_actor.h | 13 - 2 files changed, 16 insertions(+), 438 deletions(-) diff --git a/mindspore/ccsrc/runtime/core/actors/base/super_kernel_actor.cc b/mindspore/ccsrc/runtime/core/actors/base/super_kernel_actor.cc index 85a19a079d4..20646bf4a91 100644 --- a/mindspore/ccsrc/runtime/core/actors/base/super_kernel_actor.cc +++ b/mindspore/ccsrc/runtime/core/actors/base/super_kernel_actor.cc @@ -16,6 +16,11 @@ #include "runtime/core/actors/base/super_kernel_actor.h" #include +#include +#include +#include +#include +#include #include #include "include/runtime/memory/mem_pool/mem_tracker.h" #include "runtime/core/graph_scheduler/base/scheduler_helper.h" @@ -55,46 +60,6 @@ static std::mutex mtx; bool SuperKernelActor::already_allocate_trace_memory_ = false; namespace { -inline void UpdateShape(const AnfNodePtr &input_node, const KernelTensorPtr &node_device_kernel_tensor, - const KernelTensorPtr &input_kernel_tensor, const KernelTransformType &type) { - MS_EXCEPTION_IF_NULL(input_node); - MS_EXCEPTION_IF_NULL(node_device_kernel_tensor); - MS_EXCEPTION_IF_NULL(input_kernel_tensor); - if (type != KernelTransformType::kSuperKernelActor || input_node->cast()->has_dynamic_shape()) { - // For dynamic shape in sub graph sink and any type parameter, the input size should be updated. - node_device_kernel_tensor->device_address()->SetSize(input_kernel_tensor->device_address()->GetSize()); - // Update Shape. - node_device_kernel_tensor->SetShape(input_kernel_tensor->GetShape()->Clone()); - } -} - -inline bool InputDataNoNeedCopy(const AnfNodePtr &input_node, const KernelTensorPtr &input_kernel_tensor, - const KernelTensorPtr &node_kernel_tensor, const KernelTransformType &type) { - if (input_kernel_tensor == nullptr) { - return true; - } - auto node_device_tensor = node_kernel_tensor->device_address().get(); - auto input_device_tensor = input_kernel_tensor->device_address().get(); - MS_EXCEPTION_IF_NULL(node_device_tensor); - if (input_device_tensor == nullptr) { - return true; - } - - if (input_device_tensor == node_device_tensor) { - (void)input_kernel_tensor->TouchSyncHandler(); - return true; - } - - UpdateShape(input_node, node_kernel_tensor, input_kernel_tensor, type); - - if (TEST_FLAG(node_kernel_tensor->flag(), device::kDeviceAddressFlagNotUsed) || - input_device_tensor->GetPtr() == node_device_tensor->GetPtr()) { - return true; - } - - return false; -} - bool IsOnlyDependShape(const CNodePtr &kernel, size_t input_index) { auto ms_context = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(ms_context); @@ -268,57 +233,6 @@ void SuperKernelActor::Init() { auto kernel_tensor = AnfAlgo::GetOutputKernelTensor(output_node, IntToSize(data_arrow->from_output_index_), false); data->data_ = kernel_tensor; } - - if (enable_kbk_sub_graph_execute_) { - // 1. Don't cache DeviceAddress of Parameter node into node_device_tensors_ on PyNative mode. - // 2. Ignore the operator of SuperKernelActor for O2(GE) mode. - return; - } - - const auto &output_with_indexs = common::AnfAlgo::GetAllOutputWithIndex(graph_->output()); - for (const auto &origin_output_with_index : output_with_indexs) { - const auto &output_with_index = common::AnfAlgo::FetchRealNodeSkipMonadControl(origin_output_with_index); - const auto &output_node = output_with_index.first; - MS_EXCEPTION_IF_NULL(output_node); - if (output_node->isa() && (!HasAbstractMonad(output_node))) { - auto kernel_tensor = AnfAlgo::GetOutputKernelTensor(output_node, output_with_index.second, false); - MS_EXCEPTION_IF_NULL(kernel_tensor); - auto device_address = kernel_tensor->device_address(); - MS_EXCEPTION_IF_NULL(device_address); - if (kernel_tensor->is_ptr_persisted() || graph_->is_dynamic_shape()) { - MS_VLOG(VL_RUNTIME_FRAMEWORK_DEVICE_ADDRESS) - << "Actor:" << GetAID() << " skip alloc memory for device address:" << device_address - << " is persist:" << kernel_tensor->is_ptr_persisted() << " is dynamic shape:" << graph_->is_dynamic_shape() - << " output node:" << output_node->DebugString(); - continue; - } - // Free the ptr in device address of output node. - if (device_address->GetPtr() != nullptr) { - MS_LOG(INFO) << "Output node:" << output_node->DebugString() << " has a default ptr, maybe a mem leak."; - device_address->set_ptr(nullptr); - } - if (IsSkippedLaunch()) { - device_address_to_node_[device_address.get()] = {device_address->GetSize(), output_node->fullname_with_scope()}; - } - memory_alloc_list_.emplace_back(kernel_tensor); - } - } - - // Check whether the parameter needs to be copied out. - node_kernel_tensors_.resize(graph_->input_nodes().size()); - is_parameters_need_copy_.resize(graph_->input_nodes().size()); - copy_input_kernel_tensors_.resize(graph_->input_nodes().size()); - for (size_t i = 0; i < graph_->input_nodes().size(); ++i) { - const auto &input_node = graph_->input_nodes()[i]; - MS_EXCEPTION_IF_NULL(input_node); - node_kernel_tensors_[i] = AnfAlgo::GetOutputKernelTensor(input_node, 0, false); - if (!common::AnfAlgo::HasAbstractRef(input_node)) { - is_parameters_need_copy_[i] = false; - continue; - } - // If the parameter has ref attribute and is directly used by the kernel in the graph, it needs to be copied. - is_parameters_need_copy_[i] = true; - } } void SuperKernelActor::InitParallelDispatchResource() { @@ -412,7 +326,7 @@ void SuperKernelActor::FetchInputDeviceTensor(OpContext *const con << ", index: " << index; } - if (!enable_kbk_sub_graph_execute_ || ActorDispatcher::enable_use_trace_memory()) { + if (ActorDispatcher::enable_use_trace_memory()) { if (input_data->data_->new_ref_count() != SIZE_MAX) { (void)memory_free_list.emplace_back(input_data->data_); } @@ -420,7 +334,7 @@ void SuperKernelActor::FetchInputDeviceTensor(OpContext *const con continue; } } - if (!enable_kbk_sub_graph_execute_ || ActorDispatcher::enable_use_trace_memory()) { + if (ActorDispatcher::enable_use_trace_memory()) { memory_free_lists_.push(memory_free_list); return; } @@ -431,89 +345,15 @@ void SuperKernelActor::Run(OpContext *const context) { MS_EXCEPTION_IF_NULL(context); MS_EXCEPTION_IF_NULL(graph_); MS_VLOG(VL_RUNTIME_FRAMEWORK_ACTOR) << "Super Kernel actor:" << GetAID() << " start run."; - if (enable_kbk_sub_graph_execute_) { - try { - return RunGraphKernelByKernel(context); - } catch (const std::exception &e) { - if (context->error_info_.empty()) { - MsException::Instance().SetException(); - std::string error_info = - "Run graph[" + std::to_string(graph_->graph_id()) + "] by kernek by kernel failed, exception: " + e.what(); - SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info); - } - } - return; - } - if (NeedRunMemTracker()) { - device::tracker::CALL_MEMORY_TRACKER_WITH_FILE(AddTask, GetAID().Name(), "SuperKernelActor", graph_->ToString(), - true); - } - if (device_contexts_.empty() || device_contexts_[0] == nullptr) { - SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), "Invalid device context for super kernel actor:" + GetAID().Name()); - } - MS_LOG(INFO) << "Super kernel actor(" << GetAID().Name() - << ") launches graph: " << std::to_string(graph_->graph_id()); - if (memory::mem_pool::IsNeedProfilieMemoryLog()) { - MS_LOG(WARNING) << "Need Profile Memory, launch actor name: " << GetAID().Name() - << ", kernel graph: " << graph_->ToString(); - } - if (!WaitRuntimePipelineFinish(context, GetAID().Name())) { - MS_LOG(INFO) << "Run failed and early stop."; - return; - } - FetchInputDeviceTensor(context); - if (!already_fetch_persistent_device_tensor_) { - FetchPersistentDeviceTensor(); - already_fetch_persistent_device_tensor_ = is_infer_phase_; - } - - TrackInputMemory(); - - if (memory_alloc_list_.size() > 0) { - for (auto &kernel_tensor : memory_alloc_list_) { - MS_EXCEPTION_IF_NULL(kernel_tensor); - auto device_tensor = kernel_tensor->device_address().get(); - MS_EXCEPTION_IF_NULL(device_tensor); - if (kernel_tensor->IsNotNeedAlloc()) { - continue; - } - if (memory::mem_pool::IsNeedProfilieMemoryLog()) { - auto &info = device_address_to_node_[device_tensor]; - auto output_address = reinterpret_cast(device_tensor); - MS_LOG(WARNING) << "Need Profile Memory, Memory need allocated, actor name: " << GetAID().Name() - << ", kernel graph: " << graph_->ToString() << ", node: " << info.node_full_name - << ", device address class ptr: " << output_address << ", device address size: " << info.size; - } - if (NeedRunMemTracker()) { - device::tracker::CALL_MEMORY_TRACKER_WITH_FILE(AddMemInfo, GetAID().Name(), - memory::mem_pool::MemType::kGraphOutput, - device_tensor->GetSize(), device_tensor); - } - } - SendMemoryAllocReq(context); - } else { - OnMemoryAllocFinish(context); - } - if (memory::mem_pool::IsNeedProfilieMemoryLog()) { - MS_LOG(WARNING) << "Need Profile Memory, end launch, actor name: " << GetAID().Name() - << ", kernel graph: " << graph_->ToString(); - } - MS_VLOG(VL_RUNTIME_FRAMEWORK_ACTOR) << "Super Kernel actor:" << GetAID() << " end run."; -} - -void SuperKernelActor::FetchPersistentDeviceTensor() { - for (auto &device_tensor_store_key : device_tensor_store_keys_) { - auto input_kernel_tensor = DeviceTensorStore::GetInstance().Fetch(device_tensor_store_key.second.get(), - device_contexts_[0]->GetDeviceType()); - // Ge backend maybe nullptr. - if (input_kernel_tensor == nullptr) { - MS_LOG(DEBUG) << "Failed get device tensor for node:" << device_tensor_store_key.second->DebugString() - << " index:" << device_tensor_store_key.first; - continue; + try { + return RunGraphKernelByKernel(context); + } catch (const std::exception &e) { + if (context->error_info_.empty()) { + MsException::Instance().SetException(); + std::string error_info = + "Run graph[" + std::to_string(graph_->graph_id()) + "] by kernek by kernel failed, exception: " + e.what(); + SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info); } - - size_t index = device_tensor_store_key.first; - input_kernel_tensors_[index] = input_kernel_tensor; } } @@ -1391,233 +1231,6 @@ void SuperKernelActor::RunGraphKernelByKernel(OpContext *const con PostRun(context); } -void SuperKernelActor::SendMemoryAllocReq(OpContext *const context) { - MS_EXCEPTION_IF_NULL(context); - if (device_contexts_.empty() || device_contexts_[0] == nullptr) { - SET_OPCONTEXT_FAIL_RET_WITH_ERROR_BY_STRATEGY(GraphExecutionStrategy::kPipeline, (*context), - "Invalid device context for super kernel actor:" + GetAID().Name()); - } - sort(memory_alloc_list_.begin(), memory_alloc_list_.end(), [](const KernelTensorPtr a, const KernelTensorPtr b) { - MS_EXCEPTION_IF_NULL(a); - MS_EXCEPTION_IF_NULL(b); - MS_EXCEPTION_IF_NULL(a->device_address()); - MS_EXCEPTION_IF_NULL(b->device_address()); - return a->device_address()->GetSize() > b->device_address()->GetSize(); - }); - if (ActorDispatcher::is_memory_allocation_sync()) { - ActorDispatcher::SendSync(memory_manager_aid_, &MemoryManagerActor::AllocateMemory, &memory_alloc_list_, - device_contexts_[0], context, GetAID()); - OnMemoryAllocFinish(context); - } else { - ActorDispatcher::Send(memory_manager_aid_, &MemoryManagerActor::AllocateMemory, &memory_alloc_list_, - device_contexts_[0], context, GetAID()); - } -} - -void SuperKernelActor::OnMemoryAllocFinish(OpContext *const context) { - MS_EXCEPTION_IF_NULL(context); - MS_EXCEPTION_IF_NULL(graph_); - if (IsRunningFailed(context)) { - MS_LOG(INFO) << "Running failed in actor:" << GetAID().Name(); - return; - } - { - ProfilerRecorder profiler(ProfilerModule::kRuntime, ProfilerEvent::kPreLaunch, GetAID().Name()); - if (!CopyInputData(context, graph_)) { - std::string error_info = "Copy the input data failed, graph id: " + std::to_string(graph_->graph_id()); - SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info); - } - } - - try { - if (device_contexts_.empty() || device_contexts_[0] == nullptr) { - SET_OPCONTEXT_FAIL_RET_WITH_ERROR_BY_STRATEGY(GraphExecutionStrategy::kPipeline, (*context), - "Invalid device context for super kernel actor:" + GetAID().Name()); - } - MS_LOG(EXCEPTION) << "Launch graph error."; - } catch (const std::exception &e) { - MsException::Instance().SetException(); - std::string error_info = "Launch graph exception, graph id: " + std::to_string(graph_->graph_id()); - SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info); - } - - { - ProfilerRecorder profiler(ProfilerModule::kRuntime, ProfilerEvent::kPostLaunch, GetAID().Name()); - for (auto item : ref_node_addr_map_) { - MS_EXCEPTION_IF_NULL(item.first); - MS_EXCEPTION_IF_NULL(item.second); - MS_LOG(INFO) << "The input ref node copy back from address: " << item.first->GetPtr() - << " to address: " << item.second->GetPtr() << "."; - if (!SyncCopy(item.second, item.first, kDefaultStreamIndex)) { - SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), "Copy data failed."); - } - } - ref_node_addr_map_.clear(); - } - - // Debug actor is blocked, must wait debug actor callback message to process continue. - if (debug_aid_ != nullptr) { - SendDebugReq(context); - return; - } - PostRun(context); -} - -void SuperKernelActor::SendDebugReq(OpContext *const context) { - running_dependent_msg_num_ = 1; - if (device_contexts_.empty() || device_contexts_[0] == nullptr) { - SET_OPCONTEXT_FAIL_RET_WITH_ERROR_BY_STRATEGY(GraphExecutionStrategy::kPipeline, (*context), - "Invalid device context for super kernel actor:" + GetAID().Name()); - } - OnDebugFinish(context); -} - -bool SuperKernelActor::CopyInputDataPersistedHandle(const DeviceContext *device_context, - const KernelTensorPtr &input_kernel_tensor, - const KernelTensorPtr &node_kernel_tensor, size_t i) { - auto &input_device_tensor = input_kernel_tensor->device_address(); - auto &node_device_tensor = node_kernel_tensor->device_address(); - MS_EXCEPTION_IF_NULL(node_device_tensor); - if ((input_device_tensor->GetDeviceType() == node_device_tensor->GetDeviceType()) && - AnfAlgo::IsEquivalentFormat(input_kernel_tensor->format(), node_kernel_tensor->format())) { - MS_VLOG(VL_RUNTIME_FRAMEWORK_DEVICE_ADDRESS) - << "Not need copy for device tensor:" << node_device_tensor << " ptr:" << node_device_tensor->GetPtr() - << " index:" << i << " for actor:" << GetAID(); - // Set the ptr from input_device_tensor and set mem pool false to avoid memory double management for - // supporting zero copy. - if (type_ != KernelTransformType::kSuperKernelActor) { - node_device_tensor->set_ptr(input_device_tensor->GetMutablePtr()); - } else { - node_device_tensor->set_ptr(input_kernel_tensor->GetValidPtr(input_device_tensor->stream_id())); - } - MS_VLOG(VL_RUNTIME_FRAMEWORK_DEVICE_ADDRESS) - << "Actor:" << GetAID() << "set need sync flag from:" << input_device_tensor << " to:" << node_device_tensor - << " sync user data handler:" << node_kernel_tensor->need_sync_user_data(); - node_device_tensor->set_from_mem_pool(false); - // continue - return true; - } - if (device_context->GetDeviceType() != node_device_tensor->GetDeviceType()) { - device_context = device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext( - {node_device_tensor->GetDeviceType(), node_device_tensor->device_id()}); - MS_EXCEPTION_IF_NULL(device_context); - MS_EXCEPTION_IF_NULL(device_context->device_res_manager_); - } - - if (copy_input_kernel_tensors_[i] == nullptr) { - MS_EXCEPTION_IF_NULL(node_kernel_tensor); - const auto new_kernel_tensor = SchedulerHelper::CloneKernelTensorWithDeviceInfo(node_kernel_tensor, device_context); - MS_EXCEPTION_IF_NULL(new_kernel_tensor); - new_kernel_tensor->set_device_ptr(nullptr); - - copy_input_kernel_tensors_[i] = new_kernel_tensor; - MS_LOG(DEBUG) << "Create new kernel tensor:" << copy_input_kernel_tensors_[i] << " index:" << i - << " for actor:" << GetAID(); - } - auto copy_kernel_tensor = copy_input_kernel_tensors_[i]; - MS_EXCEPTION_IF_NULL(copy_kernel_tensor); - auto ©_device_tensor = copy_kernel_tensor->device_address(); - MS_EXCEPTION_IF_NULL(copy_device_tensor); - copy_kernel_tensor->set_user_data(node_kernel_tensor->user_data()); - copy_kernel_tensor->set_need_sync_user_data(node_kernel_tensor->need_sync_user_data()); - if (copy_device_tensor->GetPtr() == nullptr) { - if (!device_context->device_res_manager_->AllocateMemory(copy_device_tensor.get())) { - MS_LOG(ERROR) << "Device(id:" << std::to_string(device_context->device_context_key().device_id_) - << ") memory isn't enough and alloc failed, kernel name: " << GetAID() - << ", alloc size: " + std::to_string(copy_device_tensor->GetSize()) << "B."; - return true; - } - static std::string name = "Alloc memory"; - copy_kernel_tensor->IncreaseNewRefCount(name); - } - MS_VLOG(VL_RUNTIME_FRAMEWORK_DEVICE_ADDRESS) - << "Alloc memory for device tensor:" << copy_device_tensor << " ptr:" << copy_device_tensor->GetPtr() - << " size:" << copy_device_tensor->GetSize() << " index:" << i << " for actor:" << GetAID(); - if (type_ != KernelTransformType::kSuperKernelActor) { - node_device_tensor->set_ptr(copy_device_tensor->GetMutablePtr()); - } else { - node_device_tensor->set_ptr(copy_kernel_tensor->GetValidPtr(copy_device_tensor->stream_id())); - } - node_device_tensor->set_from_mem_pool(false); - return false; -} - -bool SuperKernelActor::CopyInputData(const OpContext *context, const KernelGraphPtr &graph) { - MS_EXCEPTION_IF_NULL(context); - MS_EXCEPTION_IF_NULL(graph); - if (device_contexts_.empty() || device_contexts_[0] == nullptr || - device_contexts_[0]->device_res_manager_ == nullptr) { - MS_LOG(ERROR) << "Invalid device context for actor:" << GetAID(); - return false; - } - auto device_context = device_contexts_[0]; - auto &input_nodes = graph->input_nodes(); - if (input_kernel_tensors_.size() != node_kernel_tensors_.size()) { - MS_LOG(ERROR) << "The size of input_kernel_tensors_[" << input_kernel_tensors_.size() - << "] is not equal to the size of node_kernel_tensors_[" << node_kernel_tensors_.size() << "]."; - return false; - } - - for (size_t i = 0; i < input_kernel_tensors_.size(); ++i) { - auto &node_device_kernel_tensor = node_kernel_tensors_[i]; - MS_EXCEPTION_IF_NULL(node_device_kernel_tensor); - auto &node_device_tensor = node_device_kernel_tensor->device_address(); - MS_EXCEPTION_IF_NULL(node_device_tensor); - MS_EXCEPTION_IF_NULL(input_nodes[i]); - auto &input_kernel_tensor = input_kernel_tensors_[i]; - if (InputDataNoNeedCopy(input_nodes[i], input_kernel_tensor, node_device_kernel_tensor, type_)) { - MS_LOG(DEBUG) << "Actor:" << GetAID() << " input kernel tensor " << i << ":" << input_kernel_tensor - << " no need copy."; - continue; - } - MS_EXCEPTION_IF_NULL(input_kernel_tensor); - auto input_device_tensor = input_kernel_tensors_[i]->device_address(); - MS_EXCEPTION_IF_NULL(input_device_tensor); - UpdateShape(input_nodes[i], node_device_kernel_tensor, input_kernel_tensor, type_); - node_device_kernel_tensor->set_user_data(input_kernel_tensors_[i]->user_data()); - node_device_kernel_tensor->set_need_sync_user_data(input_kernel_tensors_[i]->need_sync_user_data()); - if (type_ != KernelTransformType::kSuperKernelActor) { - node_device_kernel_tensor->SetValue(input_kernel_tensor->GetValueTrack()); - } - - // Copy. - DeviceTensorPtr copy_device_tensor = nullptr; - // If the input is not a persist device address, in a heterogeneous scenario, a new device address needs to - // be created. And set ptr to node device address to support the zero copy of graph input nodes. - if (!node_device_kernel_tensor->is_ptr_persisted()) { - if (CopyInputDataPersistedHandle(device_context, input_kernel_tensors_[i], node_device_kernel_tensor, i)) { - continue; - } - copy_device_tensor = copy_input_kernel_tensors_[i]->device_address(); - } else { - if (node_device_tensor->GetPtr() == nullptr) { - MS_LOG(INFO) << "The node device tensor:" << node_device_tensor - << ", which shared with another graph, has no device memory and will skip " - "copy for actor:" - << GetAID(); - continue; - } - copy_device_tensor = node_device_tensor; - } - MS_EXCEPTION_IF_NULL(copy_device_tensor); - MS_LOG(INFO) << "The input data of node:" << input_nodes[i]->DebugString() - << " need copy from device address:" << input_device_tensor << " ptr:" << input_device_tensor->GetPtr() - << " size:" << input_device_tensor->GetSize() << ", type:" << input_device_tensor->GetDeviceType() - << " to device address:" << copy_device_tensor << " ptr:" << copy_device_tensor->GetPtr() - << " size:" << copy_device_tensor->GetSize() << ", type:" << copy_device_tensor->GetDeviceType() - << ", is ref node need copy back:" << is_parameters_need_copy_[i] << " for actor:" << GetAID(); - if (!SyncCopy(copy_device_tensor, input_device_tensor, kDefaultStreamIndex)) { - MS_LOG(ERROR) << "Copy data failed for actor:" << GetAID() << " input index:" << i; - continue; - } - - if (is_parameters_need_copy_[i]) { - ref_node_addr_map_[copy_device_tensor] = input_device_tensor; - } - } - return true; -} - void SuperKernelActor::SendMemoryFreeReq(OpContext *const context) { MS_EXCEPTION_IF_NULL(context); MS_EXCEPTION_IF_NULL(graph_); @@ -2849,28 +2462,6 @@ void SuperKernelActor::LinkKernelActorByDeviceType(const CNodePtr &kernel, size_ kernel_actor->memory_free_list_[input_index] = input_copy_kernel_tensor; } -void SuperKernelActor::TrackInputMemory() { - if (!device::tracker::MemTrackerManager::GetInstance().IsEnabled()) { - return; - } - - for (auto &kernel_tensor : input_kernel_tensors_) { - if (kernel_tensor == nullptr || kernel_tensor->device_address() == nullptr || - !kernel_tensor->device_address()->IsPtrValid()) { - continue; - } - device::tracker::CALL_MEMORY_TRACKER_WITH_FILE(UseMemBlock, GetAID().Name(), - kernel_tensor->device_address()->GetPtr()); - } -} - -void SuperKernelActor::IncreaseNewRefCounts(OpContext *const context) { - if (enable_kbk_sub_graph_execute_) { - MS_LOG(DEBUG) << "Skip increaase new ref count for actor:" << GetAID(); - return; - } - std::for_each(output_data_.begin(), output_data_.end(), - [this](const auto &pair) { IncreaseNewRefCount(pair.first.get()); }); -} +void SuperKernelActor::IncreaseNewRefCounts(OpContext *const context) {} } // namespace runtime } // namespace mindspore diff --git a/mindspore/ccsrc/runtime/core/actors/base/super_kernel_actor.h b/mindspore/ccsrc/runtime/core/actors/base/super_kernel_actor.h index 071f693b22c..240ab082b54 100644 --- a/mindspore/ccsrc/runtime/core/actors/base/super_kernel_actor.h +++ b/mindspore/ccsrc/runtime/core/actors/base/super_kernel_actor.h @@ -78,16 +78,9 @@ class SuperKernelActor : public DebugAwareActor { size_t FetchInputNodePosition(const AnfNodePtr &intput_node); virtual void FetchInputDeviceTensor(OpContext *const context); - // The debug related operation interface. - void SendDebugReq(OpContext *const context) override; - // The memory related operation interface. - void SendMemoryAllocReq(OpContext *const context) override; - // The callback after memory alloc finished. - void OnMemoryAllocFinish(OpContext *const context) override; // The input may come from the control actor, so need free the input memory by the dynamic ref count. void SendMemoryFreeReq(OpContext *const context) override; - bool CopyInputData(const OpContext *context, const KernelGraphPtr &graph); const KernelGraphPtr &graph() const { return graph_; } @@ -161,9 +154,6 @@ class SuperKernelActor : public DebugAwareActor { std::queue> memory_free_lists_; protected: - bool CopyInputDataPersistedHandle(const DeviceContext *device_context, const KernelTensorPtr &input_kernel_tensor, - const KernelTensorPtr &node_kernel_tensor, size_t i); - // Generate and initialize all kernel actors by execution order of graph_ for kerkel by kernl execute a sub garph // mode. void BuildKernelActors(); @@ -189,7 +179,6 @@ class SuperKernelActor : public DebugAwareActor { size_t output_index); void RunGraphKernelByKernel(OpContext *const context); - void FetchPersistentDeviceTensor(); void UpdateMemoryTraceMangerStatus(OpContext *const context); void SetTraceMemoryForKernel(const KernelRunnerPtr &kernel_actor, bool safe_update = false); @@ -214,8 +203,6 @@ class SuperKernelActor : public DebugAwareActor { // Sync dispatch a kernel, including infer/resize/launch. void SyncDispatchKernel(OpContext *const context, KernelRunner *kernel_actor, bool hp_mode); - void TrackInputMemory(); - void FetchParameterInput(const KernelRunnerPtr &kernel_actor, OpContext *const context, size_t stream_id = SIZE_MAX); void FreeInputParamWithoutUser(OpContext *const context); -- Gitee