diff --git a/.gitmodules b/.gitmodules index d7f1a58bf9acd0c48d5a174b9a2eb55d068adfb2..16b5a0ee833fff2f656fd63fea2d9bbed15433cd 100644 --- a/.gitmodules +++ b/.gitmodules @@ -2,4 +2,4 @@ path = mindspore url = https://gitee.com/mindspore/mindspore.git # shallow = true - branch = r2.7.rc1 \ No newline at end of file + branch = tensor-storage-refactor diff --git a/mindspore-lite/cmake/ccsrc_converter.cmake b/mindspore-lite/cmake/ccsrc_converter.cmake index 51815b7c9afa54420904a7b2618e4be9d07ac048..22480151213347f4c78430027cb5d2abb4b62db3 100644 --- a/mindspore-lite/cmake/ccsrc_converter.cmake +++ b/mindspore-lite/cmake/ccsrc_converter.cmake @@ -24,6 +24,7 @@ if(MSLITE_ENABLE_CONVERTER) ${OPS_DIR}/kernel/common/kernel_factory.cc ${OPS_DIR}/kernel/common/format_utils.cc ${CCSRC_DIR}/utils/convert_utils.cc + ${CCSRC_DIR}/runtime/device/res_manager/utils/convert_tensor_utils.cc ) if(MSLITE_ENABLE_CLOUD_FUSION_INFERENCE OR MSLITE_ENABLE_CLOUD_INFERENCE) @@ -42,7 +43,6 @@ if(MSLITE_ENABLE_CONVERTER) ${OPS_DIR}/kernel/common/kernel_build_info.cc ${OPS_DIR}/kernel/common/oplib/oplib.cc ${CCSRC_DIR}/kernel/kernel_info.cc - ${CCSRC_DIR}/runtime/device/res_manager/utils/convert_tensor_utils.cc ${CCSRC_DIR}/utils/ms_device_shape_transfer.cc ${CCSRC_DIR}/runtime/device/kernel_runtime_manager.cc ${CCSRC_DIR}/runtime/hardware/device_context_manager.cc diff --git a/mindspore-lite/src/extendrt/CMakeLists.txt b/mindspore-lite/src/extendrt/CMakeLists.txt index 72c54233b1d2d810f99850b8b64bc4daabc43aec..b0df1a2c6d95a51f09308ba2145b232752decf53 100644 --- a/mindspore-lite/src/extendrt/CMakeLists.txt +++ b/mindspore-lite/src/extendrt/CMakeLists.txt @@ -54,6 +54,7 @@ if(MSLITE_ENABLE_CLOUD_FUSION_INFERENCE OR MSLITE_ENABLE_CLOUD_INFERENCE) ${CMAKE_CURRENT_SOURCE_DIR}/session/factory.cc ${CMAKE_CURRENT_SOURCE_DIR}/memory_offload/infer_strategy_builder.cc ${CMAKE_CURRENT_SOURCE_DIR}/infer_device_address.cc + ${CMAKE_CURRENT_SOURCE_DIR}/lite_device_address.cc ${CMAKE_CURRENT_SOURCE_DIR}/utils/kernel_build_utils.cc ${CMAKE_CURRENT_SOURCE_DIR}/utils/tensor_utils.cc ${CMAKE_CURRENT_SOURCE_DIR}/utils/runtime_utils.cc diff --git a/mindspore-lite/src/extendrt/cxx_api/llm_engine/llm_engine_plugin.cc b/mindspore-lite/src/extendrt/cxx_api/llm_engine/llm_engine_plugin.cc index 9876fcbaedafb0ebbea25488393b8a207e037c9c..83a01e6e2b2ed3e9b47f164888bbb50393336f5a 100644 --- a/mindspore-lite/src/extendrt/cxx_api/llm_engine/llm_engine_plugin.cc +++ b/mindspore-lite/src/extendrt/cxx_api/llm_engine/llm_engine_plugin.cc @@ -24,6 +24,7 @@ #include "ge/llm_engine.h" #include "external/ge_common/ge_api_error_codes.h" #include "ge/llm_error_codes.h" +#include "ir/device_address_maker.h" namespace mindspore { struct LLMModelInfo { @@ -967,7 +968,7 @@ MSTensor LLMEnginePlugin::ConvertGeTensorNoCopy(::ge::Tensor *ge_tensor_ptr) { } auto tensor_data = std::make_shared(ge_data, elem_num, ge_tensor.GetSize(), me_shape.size(), deleter); auto type_id = device::ascend::TransformUtil::ConvertGeDataType(ge_tensor_desc.GetDataType()); - auto tensor = std::make_shared(type_id, me_shape, tensor_data); + auto tensor = std::make_shared(type_id, me_shape, MakeDeviceAddress(type_id, me_shape, tensor_data)); auto tensor_impl = std::make_shared(tensor); return MSTensor(tensor_impl); } diff --git a/mindspore-lite/src/extendrt/cxx_api/model/model_impl.cc b/mindspore-lite/src/extendrt/cxx_api/model/model_impl.cc index 1fbfcc27101fc3f4702d0e7908800f5b62d4d070..fff2c9eb61e30ea2ab7387a4337117f2b2eef3dd 100644 --- a/mindspore-lite/src/extendrt/cxx_api/model/model_impl.cc +++ b/mindspore-lite/src/extendrt/cxx_api/model/model_impl.cc @@ -48,6 +48,7 @@ #include "include/api/model_group.h" #include "src/common/common.h" +#include "ir/tensor_api.h" namespace mindspore { namespace { const char *const kExecutionPlan = "execution_plan"; @@ -73,7 +74,7 @@ FuncGraphPtr CreateFuncGraphFromDataFlow(const void *model_data, size_t data_siz auto type_ptr = TypeIdToType(kNumberTypeUInt8); MS_CHECK_TRUE_RET(type_ptr != nullptr, nullptr); ShapeVector shape = {static_cast(data_size)}; - auto param_tensor = std::make_shared(kNumberTypeUInt8, shape); + auto param_tensor = tensor::empty(kNumberTypeUInt8, shape, device::DeviceType::kCPU); MS_CHECK_TRUE_RET(param_tensor != nullptr, nullptr); if (param_tensor->Size() != data_size) { MS_LOG(ERROR) << "The data size of param value is not equal to the data size: " << data_size; @@ -880,8 +881,9 @@ Status ModelImpl::Predict(const std::vector &inputs, std::vectorGetMutablePtr() != + graph_outputs[i].device_address()->GetMutablePtr()) { output_remain = false; break; } diff --git a/mindspore-lite/src/extendrt/delegate/ascend_ge/ge_graph_executor.cc b/mindspore-lite/src/extendrt/delegate/ascend_ge/ge_graph_executor.cc index fd51b902edcf8969f34dc11cd13cadbb1bd5ba70..e557f41e2d98e36501a45d8ab4766d15b2aad530 100644 --- a/mindspore-lite/src/extendrt/delegate/ascend_ge/ge_graph_executor.cc +++ b/mindspore-lite/src/extendrt/delegate/ascend_ge/ge_graph_executor.cc @@ -26,6 +26,7 @@ #include "backend/ge_backend/graph_ir/utils.h" #include "common/device_type.h" #include "include/common/utils/ms_device_shape_transfer.h" +#include "ir/device_address_maker.h" #include "src/common/common.h" #include "src/common/file_utils.h" #include "cxx_api/acl_utils.h" @@ -1411,7 +1412,7 @@ bool GeGraphExecutor::GetOneRealInputs(const FuncGraphPtr &anf_graph, std::vecto MS_LOG(ERROR) << "Cannot find input " << input_name << " in input_shape " << input_shape_str; return false; } - input = std::make_shared(input->data_type(), it->second); + input = tensor::empty(input->data_type(), it->second, device::DeviceType::kCPU); } else if (GeDynamicUtils::IsDynamicInputShapes({input->shape_c()})) { MS_LOG(ERROR) << "Input " << i << " is dynamic shape " << input->shape_c() << ", but there is no input shape specified in AscendDeviceInfo or config file"; @@ -1876,7 +1877,7 @@ tensor::TensorPtr GeGraphExecutor::ConvertGeTensorNoCopy(::ge::Tensor *ge_tensor return nullptr; } auto tensor_data = std::make_shared(ge_data, elem_num, ge_tensor.GetSize(), me_shape.size(), deleter); - return std::make_shared(type_id, me_shape, tensor_data); + return std::make_shared(type_id, me_shape, MakeDeviceAddress(type_id, me_shape, tensor_data)); } std::vector GeGraphExecutor::GetOutputInfos(uint32_t graph_id) { diff --git a/mindspore-lite/src/extendrt/delegate/graph_executor/litert/graph_executor.cc b/mindspore-lite/src/extendrt/delegate/graph_executor/litert/graph_executor.cc index d8f4f210d7975a7c8f4c14f87433c3882bb2e8c4..f5a40a5633b475416a5682a4c713c257301f783f 100644 --- a/mindspore-lite/src/extendrt/delegate/graph_executor/litert/graph_executor.cc +++ b/mindspore-lite/src/extendrt/delegate/graph_executor/litert/graph_executor.cc @@ -25,6 +25,7 @@ #include "src/litert/lite_model.h" #include "src/litert/cpu_info.h" #include "include/errorcode.h" +#include "ir/device_address_maker.h" #include "flatbuffers/flatbuffers.h" #include "extendrt/mock/lite_runtime/converters.h" #include "extendrt/delegate/factory.h" @@ -292,7 +293,8 @@ std::vector LiteRTGraphExecutor::GetInputInfos(uint32_t) { std::vector lite_shape; std::transform(shape.begin(), shape.end(), std::back_inserter(lite_shape), [](int c) { return static_cast(c); }); - auto tmp = tensor::Tensor(type_id, lite_shape); + auto tmp = + tensor::Tensor(type_id, lite_shape, MakeDeviceAddress(type_id, lite_shape, true, device::DeviceType::kCPU)); tmp.set_name(inputs[i]->tensor_name()); input_tensors.push_back(tmp); } @@ -304,7 +306,8 @@ std::vector LiteRTGraphExecutor::GetOutputInfos(uint32_t) { std::vector output_tensors; for (size_t i = 0; i < outputs.size(); ++i) { auto type_id = static_cast(outputs[i].DataType()); - auto tmp = tensor::Tensor(type_id, outputs[i].Shape()); + auto tmp = tensor::Tensor(type_id, outputs[i].Shape(), + MakeDeviceAddress(type_id, outputs[i].Shape(), true, device::DeviceType::kCPU)); tmp.set_name(outputs[i].Name()); output_tensors.push_back(tmp); } diff --git a/mindspore-lite/src/extendrt/delegate/tensorrt/tensorrt_graph_executor.cc b/mindspore-lite/src/extendrt/delegate/tensorrt/tensorrt_graph_executor.cc index 83c8414a63c7018df991968f940fe4d59c65de3e..c1b507b480a5d5d65840f1fe136e3962503b8687 100644 --- a/mindspore-lite/src/extendrt/delegate/tensorrt/tensorrt_graph_executor.cc +++ b/mindspore-lite/src/extendrt/delegate/tensorrt/tensorrt_graph_executor.cc @@ -34,6 +34,7 @@ #include "src/extendrt/utils/func_graph_utils.h" #include "src/extendrt/delegate/tensorrt/optimizer/tensorrt_optimizer.h" #include "infer/custom.h" +#include "ir/device_address_maker.h" #include "mindspore/ops/op_def/auto_generate/gen_ops_primitive_d.h" #include "mindspore/ops/op_def/auto_generate/gen_ops_primitive_c.h" @@ -79,7 +80,7 @@ tensor::TensorPtr GetConstNodeValue(AnfNodePtr input_node) { } if (value->isa()) { auto tensor = value->cast(); - if (tensor == nullptr || tensor->data().const_data() == nullptr) { + if (tensor == nullptr || tensor->unsafe_data() == nullptr) { return nullptr; } return tensor; @@ -643,7 +644,8 @@ std::vector TensorRTExecutor::GetInputInfos(uint32_t) { for (auto &tensor_info : inputs_) { auto type_id = static_cast(tensor_info.DataType()); auto shape = tensor_info.Shape(); - tensors.push_back(tensor::Tensor(type_id, shape)); + tensors.push_back( + tensor::Tensor(type_id, shape, MakeDeviceAddress(type_id, shape, true, device::DeviceType::kCPU))); } return tensors; } @@ -653,7 +655,8 @@ std::vector TensorRTExecutor::GetOutputInfos(uint32_t) { for (auto &tensor_info : outputs_) { auto type_id = static_cast(tensor_info.DataType()); auto shape = tensor_info.Shape(); - tensors.push_back(tensor::Tensor(type_id, shape)); + tensors.push_back( + tensor::Tensor(type_id, shape, MakeDeviceAddress(type_id, shape, true, device::DeviceType::kCPU))); } return tensors; } diff --git a/mindspore-lite/src/extendrt/delegate/tensorrt/tensorrt_subgraph.cc b/mindspore-lite/src/extendrt/delegate/tensorrt/tensorrt_subgraph.cc index 82f649a0fdd02525d40d723ea8cf10412adb89c9..95e15ff0ce552de8f61a7020a32bdca7b0dedb2d 100644 --- a/mindspore-lite/src/extendrt/delegate/tensorrt/tensorrt_subgraph.cc +++ b/mindspore-lite/src/extendrt/delegate/tensorrt/tensorrt_subgraph.cc @@ -735,19 +735,18 @@ int TensorRTSubGraph::VSLPreExectute(const std::vector &inputs, const int pos_ids_idx = Num2 + is_expert_ids; const int current_idx_idx = Num3 + is_expert_ids; if (i == input_ids_idx || i == expert_ids_idx || i == pos_ids_idx) { - int *in_ptr = static_cast(inputs[i].data_ptr()->data()); + int *in_ptr = static_cast(inputs[i].data_c()); int batch = inputs[trt_in_tensor_name_.size() - Num1].ElementsNum(); int seq = inputs[0].ElementsNum() / batch; int export_num = (i != expert_ids_idx) ? Num1 : inputs[i].ElementsNum() / (batch * seq); - bool incremental_mode = - (static_cast(inputs[pos_ids_idx].data().const_data())[0] != 0) ? true : false; + bool incremental_mode = (static_cast(inputs[pos_ids_idx].unsafe_data())[0] != 0) ? true : false; size_t h_token = 0; for (int k = 0; k < batch; k++) { int actual_seq_len = (incremental_mode) ? Num1 - : (static_cast(inputs[trt_in_tensor_name_.size() - Num1].data().const_data())[k] + Num1); - int batch_valid = static_cast(inputs[trt_in_tensor_name_.size() - Num1].data().const_data())[k]; + : (static_cast(inputs[trt_in_tensor_name_.size() - Num1].unsafe_data())[k] + Num1); + int batch_valid = static_cast(inputs[trt_in_tensor_name_.size() - Num1].unsafe_data())[k]; h_token += (batch_valid == -1) ? 0 : actual_seq_len; } for (int j = 0; j < export_num; j++) { @@ -756,10 +755,9 @@ int TensorRTSubGraph::VSLPreExectute(const std::vector &inputs, int actual_seq_len = (incremental_mode) ? Num1 - : (static_cast(inputs[trt_in_tensor_name_.size() - Num1].data().const_data())[k] + Num1); + : (static_cast(inputs[trt_in_tensor_name_.size() - Num1].unsafe_data())[k] + Num1); for (int l = 0; l < actual_seq_len; l++) { - in_ptr[j * h_token + h_token_idx + l] = - static_cast(inputs[i].data_ptr()->data())[j * batch * seq + k * seq + l]; + in_ptr[j * h_token + h_token_idx + l] = static_cast(inputs[i].data_c())[j * batch * seq + k * seq + l]; } h_token_idx += actual_seq_len; } @@ -788,12 +786,17 @@ int TensorRTSubGraph::PreExecute(const std::vector &inputs, cons if (ret != RET_OK) { return ret; } + auto hasDeviceData = [&](const tensor::Tensor &t) -> bool { + auto device_address = t.device_address(); + return device_address != nullptr && device_address->GetMutablePtr() != nullptr && + device_address->GetDeviceType() != device::DeviceType::kCPU; + }; + for (size_t i = 0; i < trt_in_tensor_name_.size(); i++) { auto trt_tensor_name = trt_in_tensor_name_[i]; void *device_ptr = nullptr; - auto input_device_address = inputs[i].device_address(); - if (input_device_address != nullptr && input_device_address->GetMutablePtr() != nullptr) { - device_ptr = input_device_address->GetMutablePtr(); + if (hasDeviceData(inputs[i])) { + device_ptr = inputs[i].device_address()->GetMutablePtr(); } else { device_ptr = runtime_->GetAllocator()->MallocDeviceMem(trt_tensor_name, inputs_[i].DataSize(), ConvertDataType(inputs_[i].DataType())); @@ -822,7 +825,7 @@ int TensorRTSubGraph::PreExecute(const std::vector &inputs, cons void *device_ptr = nullptr; if (outputs.size() > i) { auto &output = outputs[i]; - if (output.device_address() && output.device_address()->GetMutablePtr()) { + if (hasDeviceData(output)) { if (output.Size() < outputs_[i].DataSize()) { MS_LOG(ERROR) << "Specified output device data size " << output.Size() << " cannot less than execute output data size " << outputs_[i].DataSize() @@ -832,7 +835,7 @@ int TensorRTSubGraph::PreExecute(const std::vector &inputs, cons device_ptr = output.device_address()->GetMutablePtr(); } } - if (!device_ptr) { + if (device_ptr == nullptr) { device_ptr = runtime_->GetAllocator()->MallocDeviceMem(trt_out_tensor_name, outputs_[i].DataSize(), ConvertDataType(outputs_[i].DataType())); if (device_ptr == nullptr) { @@ -864,7 +867,8 @@ int TensorRTSubGraph::PostExecute(std::vector *outputs, bool syn if (has_outputs) { auto &tensor = outputs->at(i); auto dst_device = tensor.device_address(); - if (dst_device == nullptr || dst_device->GetMutablePtr() == nullptr) { + if (dst_device == nullptr || dst_device->GetMutablePtr() == nullptr || + dst_device->GetDeviceType() == device::DeviceType::kCPU) { if (tensor.Size() < outputs_[i].DataSize()) { MS_LOG(ERROR) << "Specified output host data size " << tensor.Size() << " cannot less than execute output data size " << outputs_[i].DataSize() diff --git a/mindspore-lite/src/extendrt/lite_device_address.cc b/mindspore-lite/src/extendrt/lite_device_address.cc new file mode 100644 index 0000000000000000000000000000000000000000..11db38205b163498a09bbfe7d1e2b7040ee09462 --- /dev/null +++ b/mindspore-lite/src/extendrt/lite_device_address.cc @@ -0,0 +1,230 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/extendrt/lite_device_address.h" + +#include +#include +#include + +#include "ir/device_address_maker.h" +#include "runtime/device/res_manager/utils/convert_tensor_utils.h" +#include "utils/ms_context.h" + +namespace mindspore { +namespace runtime { +namespace test { +namespace { +DeviceAddressPtr CreateDeviceAddress(void *ptr, size_t size, const ShapeVector &shape_vector, const Format &format, + TypeId type_id, const std::string &device_name, uint32_t device_id, + uint32_t stream_id, const UserDataPtr &user_data = nullptr) { + return std::make_shared(ptr, size, "falut", type_id, device_name, 0); +} +DeviceSyncPtr MakeTestDeviceAddress(TypeId data_type, const ShapeVector &shape, void *data_ptr, + DeviceAddressDeleter &&deleter) { + auto context = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context); + auto device_id = context->get_param(MS_CTX_DEVICE_ID); + auto data_size = SizeOf(shape) * abstract::TypeIdSize(data_type); + auto device_address = + CreateDeviceAddress(data_ptr, data_size, shape, Format::DEFAULT_FORMAT, data_type, "CPU", device_id, 0); + device_address->SetPointerRefCountDeleter(std::move(deleter)); + return device_address; +} + +const char device_name[] = "CPU"; +REGISTER_DEVICE_ADDRESS_MAKER(device::DeviceType::kCPU, [](TypeId data_type, const ShapeVector &shape, void *data_ptr, + DeviceAddressDeleter &&deleter) { + return MakeTestDeviceAddress(data_type, shape, data_ptr, std::move(deleter)); +}); + +// clang-format off +#define FOR_EACH_TYPE_BASE(M) \ + M(kNumberTypeBool, bool) \ + M(kNumberTypeUInt8, uint8_t) \ + M(kNumberTypeInt4, int8_t) \ + M(kNumberTypeInt8, int8_t) \ + M(kNumberTypeInt16, int16_t) \ + M(kNumberTypeInt32, int32_t) \ + M(kNumberTypeInt64, int64_t) \ + M(kNumberTypeUInt16, uint16_t) \ + M(kNumberTypeUInt32, uint32_t) \ + M(kNumberTypeUInt64, uint64_t) \ + M(kNumberTypeFloat16, float16) \ + M(kNumberTypeFloat32, float) \ + M(kNumberTypeFloat64, double) \ + M(kNumberTypeFloat8E4M3FN, float8_e4m3fn) \ + M(kNumberTypeFloat8E5M2, float8_e5m2) \ + M(kNumberTypeHiFloat8, hifloat8) \ + M(kNumberTypeComplex64, ComplexStorage) \ + M(kNumberTypeComplex128, ComplexStorage) + +#ifndef KERNEL_EXECUTOR_ANDROID +#define FOR_EACH_TYPE_EXTRA(M) M(kNumberTypeBFloat16, bfloat16) +#else +#define FOR_EACH_TYPE_EXTRA(M) +#endif + +#define FOR_EACH_TYPE(M) \ + FOR_EACH_TYPE_BASE(M) \ + FOR_EACH_TYPE_EXTRA(M) + +#define REGISTER_SIZE(address_type_id, address_type) { address_type_id, sizeof(address_type) }, + +static const std::unordered_map kTypeSizeMap = { + FOR_EACH_TYPE(REGISTER_SIZE) +}; + +size_t GetTypeSize(TypeId tid) { + return kTypeSizeMap.at(tid); +} + +template +using DstCopyFunc = void (*)(T *src_ptr, void *dst_ptr, size_t size); + +template +static const std::unordered_map> g_dst_copy_map = { +#define REGISTER_DST(dst_type_id, dst_type) \ + {dst_type_id, +[](T *src_ptr, void *dst_ptr, size_t size) { \ + auto buf = static_cast(dst_ptr); \ + return tensor::TransDataType(src_ptr, buf, size); \ + }}, + FOR_EACH_TYPE(REGISTER_DST) +#undef REGISTER_DST +}; + +template +void CopyData(T *src_ptr, size_t size, void *dst_ptr, TypeId dst_type_id) { + auto &m = g_dst_copy_map; + auto it = m.find(dst_type_id); + if (it == m.end()) { + MS_LOG(EXCEPTION) << "Cannot construct Tensor because of unsupported dst data type: " << dst_type_id << "."; + } + it->second(src_ptr, dst_ptr, size); +} + +using SrcCopyFunc = std::function; + +static const std::unordered_map g_src_copy_map = { +#define REGISTER_SRC(src_type_id, src_type) \ + {src_type_id, +[](void *src_ptr, void *dst_ptr, size_t size, TypeId dst_type_id) { \ + auto buf = static_cast(src_ptr); \ + return CopyData(buf, size, dst_ptr, dst_type_id); \ + }}, + FOR_EACH_TYPE(REGISTER_SRC) +#undef REGISTER_SRC +}; + +#undef FOR_EACH_TYPE +#undef FOR_EACH_TYPE_BASE +#undef FOR_EACH_TYPE_EXTRA +#undef REGISTER_SIZE +// clang-format on + +void CopyData(const DeviceAddress *src_device_address, const DeviceAddress *dst_device_address) { + MS_EXCEPTION_IF_NULL(src_device_address); + MS_EXCEPTION_IF_NULL(dst_device_address); + + TypeId src_type_id = src_device_address->type_id(); + TypeId dst_type_id = dst_device_address->type_id(); + auto src_size = src_device_address->GetSize() / GetTypeSize(src_type_id); + auto dst_size = dst_device_address->GetSize() / GetTypeSize(dst_type_id); + if (src_size != dst_size) { + MS_LOG(EXCEPTION) << "Not same shape in device address:" << src_device_address->ToString() + << " and:" << dst_device_address->ToString(); + } + + void *src_ptr = src_device_address->GetMutablePtr(); + void *dst_ptr = dst_device_address->GetMutablePtr(); + MS_EXCEPTION_IF_NULL(src_ptr); + MS_EXCEPTION_IF_NULL(dst_ptr); + + auto it = g_src_copy_map.find(src_type_id); + if (it == g_src_copy_map.end()) { + MS_LOG(EXCEPTION) << "Unsupported conversion from " << src_type_id << " to " << dst_type_id; + } + it->second(src_ptr, dst_ptr, src_size, dst_type_id); +} +} // namespace + +bool LiteAsyncCopy(const DeviceSyncPtr &dst_device_sync, const DeviceSyncPtr &src_device_sync, size_t stream_id, bool) { + const auto &dst_device_address = dynamic_cast(dst_device_sync.get()); + const auto &src_device_address = dynamic_cast(src_device_sync.get()); + MS_EXCEPTION_IF_NULL(dst_device_address); + MS_EXCEPTION_IF_NULL(src_device_address); + if (dst_device_address->GetSize() == 0 || src_device_address->GetSize() == 0) { + MS_LOG(INFO) << "No need sync for dst device address: " << dst_device_address->ToString() + << " and src device address: " << src_device_address->ToString(); + return true; + } + + if (dst_device_address->format() != src_device_address->format()) { + MS_LOG(ERROR) << "Format is different, src(format:" << src_device_address->format() + << "), dst(format:" << dst_device_address->format() << ") for device address:" << dst_device_address; + return false; + } + auto dst_ptr = dst_device_address->GetMutablePtr(); + auto src_ptr = src_device_address->GetMutablePtr(); + MS_EXCEPTION_IF_NULL(src_device_address->GetMutablePtr()); + MS_EXCEPTION_IF_NULL(dst_device_address->GetMutablePtr()); + if (dst_ptr == src_ptr) { + MS_LOG(DEBUG) << "host_ptr is equal to device ptr, request ignored."; + return true; + } + auto dst_type_id = dst_device_address->type_id(); + auto src_type_id = src_device_address->type_id(); + + if (src_type_id == dst_type_id) { + if (src_device_address->GetSize() > dst_device_address->GetSize()) { + MS_LOG(WARNING) << "Please check whether need sync data, src size: " << src_device_address->GetSize() + << ", dst size: " << dst_device_address->GetSize(); + return true; + } + auto ret_code = memcpy_s(dst_ptr, src_device_address->GetSize(), src_ptr, src_device_address->GetSize()); + // Return ERANGE when the copy size is larger than SECUREC_MEM_MAX_LEN. + if (ret_code == ERANGE) { + device::ConvertSameType(dst_device_address->GetMutablePtr(), src_device_address->GetMutablePtr(), + dst_device_address->GetSize(), src_type_id); + } else if (ret_code != EOK) { + MS_LOG(ERROR) << "Failed to copy tensor from device address:" << src_device_address->ToString() + << " to :" << dst_device_address->ToString(); + return false; + } else { + return true; + } + } + + MS_LOG(INFO) << "Types not match. src type: " << TypeIdLabel(src_type_id) + << ", dst type: " << TypeIdLabel(dst_type_id) << " device_address:" << dst_device_address << " !"; + CopyData(src_device_address, dst_device_address); + return true; +} + +bool LiteSyncCopy(const DeviceSyncPtr &dst_device_sync, const DeviceSyncPtr &src_device_sync, size_t stream_id) { + return LiteAsyncCopy(dst_device_sync, src_device_sync, stream_id, false); +} + +MS_REGISTER_HAL_COPY_FUNC(DeviceType::kCPU, + ([](const DeviceSyncPtr &dst_device_sync, const DeviceSyncPtr &src_device_sync, + size_t stream_id) { return LiteSyncCopy(dst_device_sync, src_device_sync, stream_id); }), + ([](const DeviceSyncPtr &dst_device_sync, const DeviceSyncPtr &src_device_sync, + size_t stream_id, + bool) { return LiteSyncCopy(dst_device_sync, src_device_sync, stream_id); }), + ([](void *dst, const void *src, uint64_t size, size_t stream_id) { return true; })); + +} // namespace test +} // namespace runtime +} // namespace mindspore diff --git a/mindspore-lite/src/extendrt/lite_device_address.h b/mindspore-lite/src/extendrt/lite_device_address.h new file mode 100644 index 0000000000000000000000000000000000000000..4fffd2e4095f77fcf0deed968c79a32cf6ac650f --- /dev/null +++ b/mindspore-lite/src/extendrt/lite_device_address.h @@ -0,0 +1,65 @@ +/** + * Copyright 2025 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_EXTENDRT_LITE_DEVICE_ADDRESS_H_ +#define MINDSPORE_LITE_SRC_EXTENDRT_LITE_DEVICE_ADDRESS_H_ + +#include +#include +#include + +#include "common/device_address.h" + +namespace mindspore { +namespace runtime { +namespace test { +using device::DeviceAddress; +using device::DeviceAddressPtr; +using device::DeviceType; + +class TestDeviceAddress : public DeviceAddress { + public: + TestDeviceAddress() : DeviceAddress() {} + TestDeviceAddress(void *ptr, size_t size) : DeviceAddress(ptr, size) {} + TestDeviceAddress(void *ptr, size_t size, const std::string &format, TypeId type_id, const std::string &device_name, + uint32_t device_id) + : DeviceAddress(ptr, size, format, type_id, device_name, device_id) {} + ~TestDeviceAddress() {} + virtual bool SyncDeviceToHost(const ShapeVector &shape, size_t size, TypeId type, void *host_ptr, + bool sync_on_demand) const { + return true; + } + virtual bool SyncHostToDevice(const ShapeVector &shape, size_t size, TypeId type, const void *host_ptr, + const std::string &format) const { + return true; + } + virtual void ClearDeviceMemory() {} + DeviceType GetDeviceType() const override { return DeviceType::kCPU; } + + void set_data(tensor::TensorDataPtr &&data) override { data_ = std::move(data); } + + const tensor::TensorDataPtr &data() const override { return data_; } + + bool has_data() const override { return data_ != nullptr; } + + private: + // the data for numpy object. + tensor::TensorDataPtr data_; +}; +} // namespace test +} // namespace runtime +} // namespace mindspore +#endif // MINDSPORE_LITE_SRC_EXTENDRT_LITE_DEVICE_ADDRESS_H_ diff --git a/mindspore-lite/src/extendrt/mindir_loader/mindir_model/mindir_model_util.cc b/mindspore-lite/src/extendrt/mindir_loader/mindir_model/mindir_model_util.cc index 6e5e52d688f9caa215d8c0384568752689c99d1f..c5037435f468f855102ad5c9b5f8067fd4833721 100644 --- a/mindspore-lite/src/extendrt/mindir_loader/mindir_model/mindir_model_util.cc +++ b/mindspore-lite/src/extendrt/mindir_loader/mindir_model/mindir_model_util.cc @@ -25,6 +25,7 @@ #include "nnacl/op_base.h" #include "src/common/common.h" #include "src/common/log_util.h" +#include "ir/tensor_api.h" namespace mindspore::infer::mindir { static mindspore::HashMap kDefaultValueSwitchMap{ @@ -85,13 +86,13 @@ mindspore::ValuePtr MindirModelUtil::MakeValueFromTensorAttribute(const mind_ir: for (int i = 0; i < tensor_proto.dims_size(); i++) { shape.push_back(tensor_proto.dims(i)); } - tensor::TensorPtr tensor = std::make_shared(kDefaultValueSwitchMap[attr_tensor_type], shape); + tensor::TensorPtr tensor = tensor::empty(kDefaultValueSwitchMap[attr_tensor_type], shape, device::DeviceType::kCPU); MS_EXCEPTION_IF_NULL(tensor); const std::string &tensor_buf = tensor_proto.raw_data(); if (tensor_proto.has_raw_data()) { auto *tensor_data_buf = reinterpret_cast(tensor->data_c()); - auto ret = memcpy_s(tensor_data_buf, tensor->data().nbytes(), tensor_buf.data(), tensor_buf.size()); + auto ret = memcpy_s(tensor_data_buf, tensor->DataNBytes(), tensor_buf.data(), tensor_buf.size()); MS_CHECK_TRUE_MSG( ret != mindspore::lite::RET_OK, nullptr, "MindirModelUtil: Generate tensor ptr from tensor proto failed, failed to get tensor from tensor proto."); diff --git a/mindspore-lite/src/extendrt/session/ascend_native_session.cc b/mindspore-lite/src/extendrt/session/ascend_native_session.cc index de21153491a16e7a2f2982e86ba03203eb5f83f4..6dc450d03b0b0998fb1f5be757f7df5c4a36af78 100644 --- a/mindspore-lite/src/extendrt/session/ascend_native_session.cc +++ b/mindspore-lite/src/extendrt/session/ascend_native_session.cc @@ -27,6 +27,7 @@ #include "extendrt/delegate/ascend_native/delegate.h" #include "src/common/log_adapter.h" #include "src/litert/cxx_api/converters.h" +#include "ir/device_address_maker.h" #include "ir/graph_utils.h" #include "tools/optimizer/common/gllo_utils.h" #include "extendrt/delegate/ascend_native/ascend_native_impl/utils.h" @@ -403,7 +404,7 @@ std::vector AscendNativeSession::LiteTensorToTensor() std::vector shape64; std::transform(shape.begin(), shape.end(), std::back_inserter(shape64), [](int dim) { return static_cast(dim); }); - mindspore::tensor::Tensor tensor(type_id, shape64, ref_tensor_data); + mindspore::tensor::Tensor tensor(type_id, shape64, MakeDeviceAddress(type_id, shape64, ref_tensor_data)); tensors.emplace_back(std::move(tensor)); } return tensors; diff --git a/mindspore-lite/src/extendrt/session/default_session.cc b/mindspore-lite/src/extendrt/session/default_session.cc index faee07e1254b77906a7d6c3f21e5ed04aea13866..72c305f553effc5022e380ba6ae482d5dcc54970 100644 --- a/mindspore-lite/src/extendrt/session/default_session.cc +++ b/mindspore-lite/src/extendrt/session/default_session.cc @@ -29,6 +29,7 @@ #include "backend/graph_compiler/graph_partition.h" #include "common/tensor_util.h" #include "litert/cxx_api/tensor/tensor_impl.h" +#include "ir/device_address_maker.h" namespace mindspore { Status DefaultInferSession::Init(const std::shared_ptr &context, const ConfigInfos &config_info) { @@ -338,7 +339,7 @@ std::vector DefaultInferSession::LiteTensorToTensor( std::transform(shape.begin(), shape.end(), std::back_inserter(shape64), [](int dim) { return static_cast(dim); }); - mindspore::tensor::Tensor tensor(type_id, shape64, ref_tensor_data); + mindspore::tensor::Tensor tensor(type_id, shape64, MakeDeviceAddress(type_id, shape64, ref_tensor_data)); auto device_address = abstract_tensor->device_data(); if (device_address != nullptr) { auto lite_device_address = std::make_shared(device_address, abstract_tensor->Size()); diff --git a/mindspore-lite/src/extendrt/session/single_op_session.cc b/mindspore-lite/src/extendrt/session/single_op_session.cc index 4cded112d7bc4668edcad70d45ac376bda4ded02..ca9babc38a491cea8656f38a4a2cb111d2d48f69 100644 --- a/mindspore-lite/src/extendrt/session/single_op_session.cc +++ b/mindspore-lite/src/extendrt/session/single_op_session.cc @@ -32,6 +32,7 @@ #include "src/extendrt/utils/kernel_build_utils.h" #include "src/extendrt/kernel/ascend/plugin/ascend_kernel_plugin.h" #include "src/common/common.h" +#include "ir/device_address_maker.h" #include "mindspore/ops/infer/custom.h" #include "extendrt/session/factory.h" #include "extendrt/utils/runtime_utils.h" @@ -409,7 +410,7 @@ void SingleOpInferSession::SetBackOutputIfDynamic(std::vector *o }; auto ref_tensor_data = std::make_shared(host_addr->addr, elem_num, host_addr->size, shape.size(), acl_mem_deleter); - (*outputs)[i] = tensor::Tensor(out_type, shape, ref_tensor_data); + (*outputs)[i] = tensor::Tensor(out_type, shape, MakeDeviceAddress(out_type, shape, ref_tensor_data)); MS_LOG(DEBUG) << "resetting kernel tensor shape to 0 for the next prediction"; kernel_args_.outputs[i]->SetShapeVector({0}); } @@ -434,7 +435,8 @@ Status SingleOpInferSession::InitInputOutputData(const std::vectorGetMutablePtr() != nullptr) { + if (input_device_address != nullptr && input_device_address->GetMutablePtr() != nullptr && + input_device_address->GetDeviceType() != device::DeviceType::kCPU) { auto device_ptr = input_device_address->GetMutablePtr(); kernel_args_.inputs[i]->SetData(std::make_shared(device_ptr, input.Size())); kernel_args_.inputs[i]->SetHostData(nullptr); @@ -446,7 +448,11 @@ Status SingleOpInferSession::InitInputOutputData(const std::vectorempty()) { std::transform(kernel_args_.outputs.begin(), kernel_args_.outputs.end(), std::back_inserter(*outputs), - [](auto &item) { return tensor::Tensor(item->dtype_id(), item->GetShapeVector()); }); + [](auto &item) { + return tensor::Tensor( + item->dtype_id(), item->GetShapeVector(), + MakeDeviceAddress(item->dtype_id(), item->GetShapeVector(), true, device::DeviceType::kCPU)); + }); } if (outputs->size() != kernel_args_.outputs.size()) { MS_LOG(ERROR) << "Given outputs size " << outputs->size() << " != graph inputs size " @@ -463,7 +469,8 @@ Status SingleOpInferSession::InitInputOutputData(const std::vectorGetMutablePtr() != nullptr) { + if (output_device_address != nullptr && output_device_address->GetMutablePtr() != nullptr && + output_device_address->GetDeviceType() != device::DeviceType::kCPU) { auto device_ptr = output_device_address->GetMutablePtr(); kernel_args_.outputs[i]->SetData(std::make_shared(device_ptr, output.Size())); kernel_args_.outputs[i]->SetHostData(nullptr); @@ -507,7 +514,8 @@ Status SingleOpInferSession::InitVariableWeights(const std::vector(data_type), shape); kernel_tensor->SetData(std::make_shared(input->data_c(), input->Size())); auto input_device_address = input->device_address(); - if (input_device_address != nullptr && input_device_address->GetMutablePtr() != nullptr) { + if (input_device_address != nullptr && input_device_address->GetMutablePtr() != nullptr && + input_device_address->GetDeviceType() != device::DeviceType::kCPU) { auto device_ptr = input_device_address->GetMutablePtr(); kernel_tensor->SetData(std::make_shared(device_ptr, input->Size())); kernel_tensor->SetHostData(nullptr); diff --git a/mindspore-lite/src/extendrt/utils/func_graph_utils.cc b/mindspore-lite/src/extendrt/utils/func_graph_utils.cc index 1bc434c891e556362c64aa3c82e7e9da2b7fc0d7..f54fe973d95bb95e9be16201ff4297d186b7c3b0 100644 --- a/mindspore-lite/src/extendrt/utils/func_graph_utils.cc +++ b/mindspore-lite/src/extendrt/utils/func_graph_utils.cc @@ -35,6 +35,7 @@ #include "mindspore/ops/op_def/auto_generate/gen_ops_primitive_m.h" #include "mindspore/ops/op_def/auto_generate/gen_ops_primitive_s.h" +#include "ir/tensor_api.h" namespace mindspore { const PrimitivePtr kPrimMakeTupleV2 = std::make_shared("make_tuple"); ValuePtr FuncGraphUtils::GetNodeValuePtr(AnfNodePtr input_node) { @@ -67,7 +68,7 @@ tensor::TensorPtr FuncGraphUtils::GetConstNodeValue(AnfNodePtr input_node) { } if (value->isa()) { auto tensor = value->cast(); - if (tensor == nullptr || tensor->data().const_data() == nullptr) { + if (tensor == nullptr || tensor->unsafe_data() == nullptr) { return nullptr; } return tensor; @@ -326,7 +327,7 @@ void FuncGraphUtils::GetFuncGraphInputsInfo(const FuncGraphPtr &func_graph, std: auto name = FuncGraphUtils::GetTensorName(tensor); auto data_type = FuncGraphUtils::GetTensorDataType(tensor); auto shape = FuncGraphUtils::GetTensorShape(tensor); - auto ms_tensor = std::make_shared(static_cast(data_type), shape); + auto ms_tensor = tensor::empty(static_cast(data_type), shape, device::DeviceType::kCPU); ms_tensor->set_name(name); inputs->push_back(ms_tensor); inputs_name->push_back(name); @@ -349,7 +350,7 @@ void FuncGraphUtils::GetFuncGraphOutputsInfo(const FuncGraphPtr &func_graph, std auto name = FuncGraphUtils::GetTensorName(tensor); auto data_type = FuncGraphUtils::GetTensorDataType(tensor); auto shape = FuncGraphUtils::GetTensorShape(tensor); - auto ms_tensor = std::make_shared(static_cast(data_type), shape); + auto ms_tensor = tensor::empty(static_cast(data_type), shape, device::DeviceType::kCPU); ms_tensor->set_name(name); outputs->push_back(ms_tensor); output_names->push_back(name); diff --git a/mindspore-lite/src/extendrt/utils/tensor_utils.cc b/mindspore-lite/src/extendrt/utils/tensor_utils.cc index 3d1ce7d1c58e5f944ee5316af3981cbab6aef715..95b66e853bfed830f36d7447d0353db66cda208b 100644 --- a/mindspore-lite/src/extendrt/utils/tensor_utils.cc +++ b/mindspore-lite/src/extendrt/utils/tensor_utils.cc @@ -24,6 +24,7 @@ #include "common/common_utils.h" #include "mindspore/ccsrc/kernel/framework_utils.h" #include "common/format_utils.h" +#include "ir/device_address_maker.h" namespace mindspore { TensorRefData::TensorRefData(void *data, size_t bytes_size, size_t data_size, size_t ndim, @@ -51,7 +52,7 @@ ssize_t TensorRefData::ndim() const { return static_cast(ndim_); } void *TensorRefData::data() { return data_; } -const void *TensorRefData::const_data() const { return data_; } +void *TensorRefData::const_data() const { return data_; } std::string TensorRefData::ToString(TypeId type, const ShapeVector &shape, bool use_comma) const { std::stringstream stream; @@ -88,7 +89,8 @@ std::vector TensorUtils::MSTensorToTensorPtr(const auto data = ms_tensor.MutableData(); auto data_size = ms_tensor.DataSize(); auto ref_tensor_data = std::make_shared(data, ms_tensor.ElementNum(), data_size, shape.size()); - auto tensor_ptr = std::make_shared(type_id, shape, ref_tensor_data); + auto tensor_ptr = + std::make_shared(type_id, shape, MakeDeviceAddress(type_id, shape, ref_tensor_data)); tensor_ptr->set_name(ms_tensor.Name()); tensor_ptr->set_data_type(type_id); tensor_ptrs.push_back(tensor_ptr); @@ -118,7 +120,7 @@ std::vector TensorUtils::MSTensorToTensor(const std:: auto data = const_cast(ms_tensor.Data().get()); auto data_size = ms_tensor.DataSize(); auto ref_tensor_data = std::make_shared(data, ms_tensor.ElementNum(), data_size, shape.size()); - mindspore::tensor::Tensor tensor(type_id, shape, ref_tensor_data); + mindspore::tensor::Tensor tensor(type_id, shape, MakeDeviceAddress(type_id, shape, ref_tensor_data)); auto device_address = ms_tensor.GetDeviceData(); if (device_address != nullptr) { auto lite_device_address = std::make_shared(device_address, ms_tensor.DataSize()); diff --git a/mindspore-lite/src/extendrt/utils/tensor_utils.h b/mindspore-lite/src/extendrt/utils/tensor_utils.h index 79ef5c2bb4d04b4ee29cb75caea51cafbf4def2f..f6b201735dc9adb526e353f1a627c18ec6506444 100644 --- a/mindspore-lite/src/extendrt/utils/tensor_utils.h +++ b/mindspore-lite/src/extendrt/utils/tensor_utils.h @@ -47,9 +47,7 @@ class TensorRefData : public tensor::TensorData { ssize_t nbytes() const override; ssize_t ndim() const override; void *data() override; - const void *const_data() const override; - bool is_sub_data() const override { return false; } - bool has_sub_data() const override { return false; } + void *const_data() const override; std::string ToString(TypeId type, const ShapeVector &shape, bool use_comma) const override; private: @@ -135,7 +133,7 @@ class TensorTensorImpl : public MutableTensorImpl { void *GetDeviceData() override { MS_EXCEPTION_IF_NULL(tensor_); auto device_address = tensor_->device_address(); - if (device_address == nullptr) { + if (device_address == nullptr || tensor_->device_address()->GetDeviceType() == device::DeviceType::kCPU) { return nullptr; } return device_address->GetMutablePtr(); @@ -143,7 +141,8 @@ class TensorTensorImpl : public MutableTensorImpl { bool IsDevice() const override { MS_EXCEPTION_IF_NULL(tensor_); - return tensor_->device_address() != nullptr; + return tensor_->device_address() != nullptr && + tensor_->device_address()->GetDeviceType() != device::DeviceType::kCPU; } bool IsConst() const override { return false; } diff --git a/mindspore-lite/test/common/import_from_meta_graphT.cc b/mindspore-lite/test/common/import_from_meta_graphT.cc index 72387b5599f454baf82bfbed358c751fa20bc7f9..b3d19e6a82a6be45246747ef9b99d7d7d8c5c4de 100644 --- a/mindspore-lite/test/common/import_from_meta_graphT.cc +++ b/mindspore-lite/test/common/import_from_meta_graphT.cc @@ -24,6 +24,7 @@ #include "include/errorcode.h" #include "src/common/utils.h" #include "tools/common/tensor_util.h" +#include "ir/tensor_api.h" namespace mindspore::lite { AnfNodePtr AnfImporterFromMetaGraphT::GetNode(int tensor_id) { @@ -56,7 +57,7 @@ int AnfImporterFromMetaGraphT::ConverterConstTensor() { } else { parameter->set_name("const-" + std::to_string(i)); } - tensor::TensorPtr tensor_info = std::make_shared(type_id, shape_vector); + tensor::TensorPtr tensor_info = tensor::empty(type_id, shape_vector, device::DeviceType::kCPU); if (tensor_info == nullptr) { MS_LOG(ERROR) << "create tensor info failed."; return RET_ERROR; diff --git a/mindspore-lite/tools/common/custom_ascend_utils.cc b/mindspore-lite/tools/common/custom_ascend_utils.cc index f99ff767efbbb29d388ffba61a6db349b15781aa..d67e1ecf7718c7d978afcbff7f734a83e872ec12 100644 --- a/mindspore-lite/tools/common/custom_ascend_utils.cc +++ b/mindspore-lite/tools/common/custom_ascend_utils.cc @@ -19,6 +19,7 @@ #include "tools/common/func_graph_utils.h" #include "mindspore/ops/infer/tuple_get_item.h" #include "src/common/common.h" +#include "ir/tensor_api.h" #include "tools/optimizer/common/gllo_utils.h" #include "mindspore/ops/op_def/auto_generate/gen_ops_primitive_m.h" @@ -90,7 +91,7 @@ ParameterPtr CustomAscendUtils::CreateOmParameter(const FuncGraphPtr &func_graph om_parameter->set_abstract(abstract_tensor); auto param_value = - std::make_shared(kNumberTypeUInt8, ShapeVector({static_cast(om_data.DataSize())})); + tensor::empty(kNumberTypeUInt8, ShapeVector({static_cast(om_data.DataSize())}), device::DeviceType::kCPU); MS_CHECK_TRUE_MSG(param_value != nullptr, nullptr, "param_value is nullptr."); auto tensor_data = param_value->data_c(); MS_CHECK_TRUE_MSG(tensor_data != nullptr, nullptr, "New Tensor failed."); @@ -173,7 +174,7 @@ bool CustomAscendUtils::GetZeroValueRefDatas(const ops::PrimitiveCPtr &primc, auto param_name = GetValue(value_ptr_list[i]); auto data_type = static_cast(GetValue(value_ptr_list[i + 1])); auto param_shape = GetValue(value_ptr_list[i + 2]); - auto tensor = std::make_shared(data_type, param_shape); + auto tensor = tensor::empty(data_type, param_shape, device::DeviceType::kCPU); ref_infos->push_back(std::make_pair(param_name, tensor)); } return true; diff --git a/mindspore-lite/tools/common/tensor_util.cc b/mindspore-lite/tools/common/tensor_util.cc index 319a682dc6b9dc06506ee57ab4a8027f8c55aa5b..41e21f2323d82e7d489c8bd5c2b6e47c4585b558 100644 --- a/mindspore-lite/tools/common/tensor_util.cc +++ b/mindspore-lite/tools/common/tensor_util.cc @@ -20,6 +20,7 @@ #include "tools/common/graph_util.h" #include "abstract/utils.h" #include "nnacl/op_base.h" +#include "ir/tensor_api.h" namespace mindspore::lite { namespace { @@ -76,14 +77,14 @@ tensor::TensorPtr CreateTensorInfo(const void *data, size_t data_size, const std tensor::TensorPtr tensor_info = nullptr; if (shape.empty() && data_size == mindspore::abstract::TypeIdSize(data_type)) { ShapeVector scalar_shape = {1}; - tensor_info = std::make_shared(data_type, scalar_shape); + tensor_info = tensor::empty(data_type, scalar_shape, device::DeviceType::kCPU); if (tensor_info == nullptr) { MS_LOG(ERROR) << "new tensor init failed"; return nullptr; } tensor_info->set_shape({}); } else { - tensor_info = std::make_shared(data_type, shape); + tensor_info = tensor::empty(data_type, shape, device::DeviceType::kCPU); } if (tensor_info == nullptr) { MS_LOG(ERROR) << "new tensor init failed"; @@ -97,7 +98,7 @@ tensor::TensorPtr CreateTensorInfo(const void *data, size_t data_size, const std return nullptr; } MS_CHECK_TRUE_MSG(tensor_info->Size() == data_size, nullptr, "invalid const tensor"); - auto ret = memcpy_s(tensor_info->data_c(), tensor_info->data().nbytes(), data, data_size); + auto ret = memcpy_s(tensor_info->data_c(), tensor_info->DataNBytes(), data, data_size); if (ret != EOK) { MS_LOG(ERROR) << "memcpy_s error : " << ret; return nullptr; @@ -149,7 +150,7 @@ int SetTensorData(const tensor::TensorPtr &tensor_info, const void *data, size_t return RET_ERROR; } MS_CHECK_TRUE_MSG(tensor_info->Size() == data_size, RET_ERROR, "invalid const tensor"); - auto ret = memcpy_s(tensor_info->data_c(), tensor_info->data().nbytes(), data, data_size); + auto ret = memcpy_s(tensor_info->data_c(), tensor_info->DataNBytes(), data, data_size); if (ret != EOK) { MS_LOG(ERROR) << "memcpy_s error : " << ret; return RET_ERROR; @@ -191,10 +192,12 @@ int UpdateTensorTFromTensorInfo(const tensor::TensorPtr &src_tensor, std::unique (void)std::transform(shape_vector.begin(), shape_vector.end(), std::back_inserter(dims), [](const int64_t &value) { return static_cast(value); }); schema_tensor->dims = dims; - if (src_tensor->data().data() != nullptr) { - schema_tensor->data.resize(src_tensor->data().nbytes()); - if (EOK != memcpy_s(schema_tensor->data.data(), schema_tensor->data.size(), src_tensor->data().data(), - src_tensor->data().nbytes())) { + auto src_device = src_tensor->device_address(); + if (src_device != nullptr && src_device->GetMutablePtr() != nullptr && + src_device->GetDeviceType() != device::DeviceType::kCPU) { + auto data_ptr = src_device->GetMutablePtr(); + schema_tensor->data.resize(src_tensor->DataNBytes()); + if (EOK != memcpy_s(schema_tensor->data.data(), schema_tensor->data.size(), data_ptr, src_tensor->DataNBytes())) { MS_LOG(ERROR) << "memcpy_s failed."; return RET_ERROR; } diff --git a/mindspore-lite/tools/converter/CMakeLists.txt b/mindspore-lite/tools/converter/CMakeLists.txt index bbdf5a362927bdf2d89896d9fcd96a5ae3981524..d243a751086ec4d3b1e2d25cbb86c75cb5381837 100644 --- a/mindspore-lite/tools/converter/CMakeLists.txt +++ b/mindspore-lite/tools/converter/CMakeLists.txt @@ -188,7 +188,14 @@ endif() set(MODEL_LOADER_FRAMEWORK_SRC ${MODEL_LOADER_FRAMEWORK_SRC} ${SRC_DIR}/extendrt/mindir_loader/model_loader.cc + ${SRC_DIR}/extendrt/lite_device_address.cc ) +if(NOT MSLITE_ENABLE_CONVERTER) + set(MODEL_LOADER_FRAMEWORK_SRC + ${MODEL_LOADER_FRAMEWORK_SRC} + ${CCSRC_DIR}/runtime/device/res_manager/utils/convert_tensor_utils.cc + ) +endif() if(MSLITE_ENABLE_CLOUD_FUSION_INFERENCE OR MSLITE_ENABLE_CLOUD_INFERENCE) add_compile_definitions(ENABLE_CLOUD_FUSION_INFERENCE) diff --git a/mindspore-lite/tools/converter/adapter/acl/cxx_api_lite/cxx_api/graph/ascend/ascend_graph_impl.cc b/mindspore-lite/tools/converter/adapter/acl/cxx_api_lite/cxx_api/graph/ascend/ascend_graph_impl.cc index 70009505ae9fb7e5be6e238ad679cf725c42c801..45615a3ca4a8b1b7b7052b6e2b8c9cf66767c7cd 100644 --- a/mindspore-lite/tools/converter/adapter/acl/cxx_api_lite/cxx_api/graph/ascend/ascend_graph_impl.cc +++ b/mindspore-lite/tools/converter/adapter/acl/cxx_api_lite/cxx_api/graph/ascend/ascend_graph_impl.cc @@ -153,11 +153,13 @@ Status AscendGraphImpl::ExecuteModel(const std::vector &request, std:: MS_LOG(ERROR) << "Execute Model Failed"; return kMCFailed; } + + std::vector outputs_cpu; for (const auto &out : outputs) { MS_EXCEPTION_IF_NULL(out); - out->data_sync(); + outputs_cpu.push_back(out->cpu()); } - last_outputs_ = outputs; + last_outputs_ = outputs_cpu; reply->clear(); *reply = GetOutputs(); return kSuccess; diff --git a/mindspore-lite/tools/converter/adapter/acl/cxx_api_lite/cxx_api/graph/gpu/gpu_graph_impl.cc b/mindspore-lite/tools/converter/adapter/acl/cxx_api_lite/cxx_api/graph/gpu/gpu_graph_impl.cc index 52682a572562290bd2d34a4a32bcdf0bf9921720..768e0e2170c0c4cd51c3822dbbbef04ce1a2d8ec 100644 --- a/mindspore-lite/tools/converter/adapter/acl/cxx_api_lite/cxx_api/graph/gpu/gpu_graph_impl.cc +++ b/mindspore-lite/tools/converter/adapter/acl/cxx_api_lite/cxx_api/graph/gpu/gpu_graph_impl.cc @@ -193,11 +193,13 @@ Status GPUGraphImpl::ExecuteModel(const std::vector &request, std::vec MS_LOG(ERROR) << "Execute Model Failed"; return kMCFailed; } + + std::vector outputs_cpu; for (const auto &out : outputs) { MS_EXCEPTION_IF_NULL(out); - out->data_sync(); + outputs_cpu.push_back(out->cpu()); } - last_outputs_ = outputs; + last_outputs_ = outputs_cpu; reply->clear(); *reply = GetOutputs(); return kSuccess; @@ -293,11 +295,9 @@ std::vector GPUGraphImpl::GetOutputs() { size_t data_size = tensor->Size(); if (i < last_outputs_.size()) { MS_EXCEPTION_IF_NULL(last_outputs_[i]); - if (last_outputs_[i]->NeedSyncDeviceToHost()) { - last_outputs_[i]->data_sync(false); - } - data = last_outputs_[i]->data_c(); - data_size = last_outputs_[i]->Size(); + auto cpu_tensor = last_outputs_[i]->cpu(); + data = cpu_tensor->data_c(); + data_size = cpu_tensor->Size(); } result[i] = MSTensor(output_names_[i], static_cast(tensor->data_type()), tensor->shape(), data, data_size); diff --git a/mindspore-lite/tools/converter/adapter/acl/cxx_api_lite/cxx_api/graph/graph_impl.h b/mindspore-lite/tools/converter/adapter/acl/cxx_api_lite/cxx_api/graph/graph_impl.h index 27703ed96bf35f18caa9fff4b3497349dc1a7c1c..b278f196c317ee9fb1c26e710f51784802154c24 100644 --- a/mindspore-lite/tools/converter/adapter/acl/cxx_api_lite/cxx_api/graph/graph_impl.h +++ b/mindspore-lite/tools/converter/adapter/acl/cxx_api_lite/cxx_api/graph/graph_impl.h @@ -30,6 +30,7 @@ #include "backend/ms_backend/ms_backend.h" #include "backend/backend_manager/backend_jit_config.h" +#include "ir/tensor_api.h" namespace mindspore { class GraphCell::GraphImpl { public: @@ -115,7 +116,7 @@ class GraphCell::GraphImpl { auto kernel_build_info = AnfAlgo::GetSelectKernelBuildInfo(parameter); MS_EXCEPTION_IF_NULL(kernel_build_info); auto data_type = kernel_build_info->GetOutputDeviceType(0); - auto ms_tensor = std::make_shared(data_type, input_shape); + auto ms_tensor = tensor::empty(data_type, input_shape, device::DeviceType::kCPU); inputs->push_back(ms_tensor); inputs_name->push_back(parameter->name()); } diff --git a/mindspore-lite/tools/converter/adapter/acl/cxx_api_lite/cxx_api/model/acl/acl_model_multi.cc b/mindspore-lite/tools/converter/adapter/acl/cxx_api_lite/cxx_api/model/acl/acl_model_multi.cc index fade358054e2a1e863f2a02c720df5ca4812f4b3..78475737fee061df98ea81e225f376ecbabea2ca 100644 --- a/mindspore-lite/tools/converter/adapter/acl/cxx_api_lite/cxx_api/model/acl/acl_model_multi.cc +++ b/mindspore-lite/tools/converter/adapter/acl/cxx_api_lite/cxx_api/model/acl/acl_model_multi.cc @@ -28,6 +28,7 @@ #include "cxx_api/model/acl/acl_vm/ms_tensor_ref.h" #include "cxx_api/model/acl/acl_vm/acl_vm.h" +#include "ir/tensor_api.h" namespace mindspore { API_MODEL_REG(Ascend310, AclModelMulti); @@ -184,7 +185,7 @@ void AclModelMulti::SetInputs() { auto elem = tensor_abs->element(); MS_EXCEPTION_IF_NULL(elem); auto type_id = elem->BuildType()->type_id(); - auto tensor = std::make_shared(type_id, tensor_shape->shape()); + auto tensor = tensor::empty(type_id, tensor_shape->shape(), device::DeviceType::kCPU); std::vector shape = tensor->shape_c(); auto input_tensor = MSTensor::CreateTensor(input_param->name(), static_cast(tensor->data_type_c()), diff --git a/mindspore-lite/tools/converter/adapter/acl/mapper/squeeze_mapper.cc b/mindspore-lite/tools/converter/adapter/acl/mapper/squeeze_mapper.cc index 3e418ad429164fd7de61e07fb9680af45d344103..13c5de32329b3e67bb41fd804a431bac4bc4cf07 100644 --- a/mindspore-lite/tools/converter/adapter/acl/mapper/squeeze_mapper.cc +++ b/mindspore-lite/tools/converter/adapter/acl/mapper/squeeze_mapper.cc @@ -51,7 +51,7 @@ bool SqueezeMapper::GetAxisValue(AnfNodePtr input_node, std::vector *ax } if (value->isa()) { auto tensor = value->cast(); - if (tensor == nullptr || tensor->data().const_data() == nullptr) { + if (tensor == nullptr || tensor->unsafe_data() == nullptr) { return false; } if (tensor->data_type() == kNumberTypeInt64) { diff --git a/mindspore-lite/tools/converter/export_model.cc b/mindspore-lite/tools/converter/export_model.cc index 18b38208da0065cb175a8ac08fea5742d4ca2be0..e8163158f2595d7a1c91e22bd488c0fee6c6c25c 100644 --- a/mindspore-lite/tools/converter/export_model.cc +++ b/mindspore-lite/tools/converter/export_model.cc @@ -39,6 +39,7 @@ #include "mindspore/ops/op_def/auto_generate/gen_ops_primitive_r.h" #include "mindspore/ops/op_def/auto_generate/gen_ops_primitive_t.h" +#include "ir/tensor_api.h" namespace mindspore { namespace lite { namespace { @@ -112,7 +113,7 @@ AnfNodePtr CloneParameterAndValueNode(const CNodePtr &cnode, size_t index, const } std::shared_ptr tensor_info; if (static_cast(data_info.compress_type_) == TensorCompressionType::kNoCompression) { - tensor_info = std::make_shared(static_cast(data_info.data_type_), shape_vec); + tensor_info = tensor::empty(static_cast(data_info.data_type_), shape_vec, device::DeviceType::kCPU); } else { tensor_info = std::make_shared(static_cast(data_info.data_type_), shape_vec, data_info.data_.size(), @@ -121,11 +122,11 @@ AnfNodePtr CloneParameterAndValueNode(const CNodePtr &cnode, size_t index, const MS_CHECK_TRUE_RET(tensor_info != nullptr, nullptr); if (!data_info.data_.empty()) { auto tensor_data = reinterpret_cast(tensor_info->data_c()); - if (tensor_data == nullptr || tensor_info->data().nbytes() < 0) { + if (tensor_data == nullptr || tensor_info->DataNBytes() < 0) { MS_LOG(ERROR) << "tensor info data is nullptr or the size is smaller than zero."; return nullptr; } - if (memcpy_s(tensor_data, tensor_info->data().nbytes(), data_info.data_.data(), data_info.data_.size()) != EOK) { + if (memcpy_s(tensor_data, tensor_info->DataNBytes(), data_info.data_.data(), data_info.data_.size()) != EOK) { MS_LOG(ERROR) << "memcpy_s failed"; return nullptr; } diff --git a/mindspore-lite/tools/converter/import/mindir_adjust.cc b/mindspore-lite/tools/converter/import/mindir_adjust.cc index 8c727c8fcaf35318aaad1179daf2fe59c6deb9ee..29075a35cb5234805db1099b85714071954bc7b9 100644 --- a/mindspore-lite/tools/converter/import/mindir_adjust.cc +++ b/mindspore-lite/tools/converter/import/mindir_adjust.cc @@ -31,6 +31,7 @@ #include "infer/fake_quant_param.h" #include "mindspore/ops/op_def/auto_generate/gen_ops_primitive_f.h" +#include "ir/tensor_api.h" namespace mindspore { namespace lite { namespace { @@ -204,17 +205,17 @@ int MindirAdjust::ValueNodeInt64Convert(AnfNodePtr anf_node) { MS_CHECK_TRUE_MSG(utils::cast(abstract_tensor->BuildShape()) != nullptr, RET_NULL_PTR, "Failed to cast pointer."); auto shape_vector = utils::cast(abstract_tensor->BuildShape())->shape(); - auto dest_tensor_info = std::make_shared(kNumberTypeInt32, shape_vector); + auto dest_tensor_info = tensor::empty(kNumberTypeInt32, shape_vector, device::DeviceType::kCPU); MS_CHECK_TRUE_MSG(dest_tensor_info != nullptr, RET_NULL_PTR, "dest_tensor_info is nullptr."); MS_CHECK_TRUE_MSG(dest_tensor_info->data_c() != nullptr, RET_ERROR, "dest_tensor_info->data_c() is nullptr"); - MS_CHECK_TRUE_MSG(dest_tensor_info->data().nbytes() >= static_cast(sizeof(int32_t)), RET_ERROR, + MS_CHECK_TRUE_MSG(dest_tensor_info->DataNBytes() >= static_cast(sizeof(int32_t)), RET_ERROR, "num_bits_tensor->data_c() is not longer enough for int32_t"); auto *dest_data_buf = reinterpret_cast(dest_tensor_info->data_c()); MS_CHECK_TRUE_MSG(dest_data_buf != nullptr, RET_NULL_PTR, "dest_data_buf is nullptr."); auto src_tensor_info = value->cast(); MS_CHECK_TRUE_MSG(src_tensor_info != nullptr, RET_NULL_PTR, "src_tensor_info is nullptr."); MS_CHECK_TRUE_MSG(src_tensor_info->data_c() != nullptr, RET_ERROR, "src_tensor_info->data_c() is nullptr"); - MS_CHECK_TRUE_MSG(src_tensor_info->data().nbytes() >= static_cast(sizeof(int64_t)), RET_ERROR, + MS_CHECK_TRUE_MSG(src_tensor_info->DataNBytes() >= static_cast(sizeof(int64_t)), RET_ERROR, "num_bits_tensor->data_c() is not longer enough for int64_t"); auto *src_data_buf = reinterpret_cast(src_tensor_info->data_c()); MS_CHECK_TRUE_MSG(dest_tensor_info->ElementsNum() == src_tensor_info->ElementsNum(), RET_ERROR, diff --git a/mindspore-lite/tools/converter/offline_packing_optimizer.cc b/mindspore-lite/tools/converter/offline_packing_optimizer.cc index f7d610cb02d78c6ab8c4bdf0859f2146d78742e5..c2532388f624f9613b844d4942f46f8fe44c85f8 100644 --- a/mindspore-lite/tools/converter/offline_packing_optimizer.cc +++ b/mindspore-lite/tools/converter/offline_packing_optimizer.cc @@ -155,7 +155,7 @@ STATUS CreateLiteTensor(const CNodePtr &cnode, std::vector *in_tensors auto param_node = cnode->input(i)->cast(); if (param_node->has_default()) { auto tensor_info = std::static_pointer_cast(param_node->default_param()); - tensor_data = tensor_info->data().data(); + tensor_data = tensor_info->device_address()->GetMutablePtr(); auto quantization_params = tensor_info->quant_params(); if (!quantization_params.empty()) { auto quantization_param = quantization_params.front(); diff --git a/mindspore-lite/tools/converter/parser/onnx/onnx_constant_parser.cc b/mindspore-lite/tools/converter/parser/onnx/onnx_constant_parser.cc index 9c3b0608d14b893d1cbdcde5d747b2b3e81b9357..3f9283dae1d86a64a85f51293fe7a5661df09116 100644 --- a/mindspore-lite/tools/converter/parser/onnx/onnx_constant_parser.cc +++ b/mindspore-lite/tools/converter/parser/onnx/onnx_constant_parser.cc @@ -26,6 +26,7 @@ #include "tools/common/tensor_util.h" #include "nnacl/op_base.h" +#include "ir/tensor_api.h" namespace mindspore { namespace lite { namespace { @@ -55,7 +56,7 @@ STATUS OnnxConstantParser::AddDataInfoAttr(const onnx::TensorProto &onnx_const_t return RET_ERROR; } std::vector shape_vector(onnx_const_tensor.dims().begin(), onnx_const_tensor.dims().end()); - tensor_info = std::make_shared(data_type, shape_vector); + tensor_info = tensor::empty(data_type, shape_vector, device::DeviceType::kCPU); MS_CHECK_TRUE_MSG(tensor_info != nullptr, RET_ERROR, "create tensor_info return nullptr"); std::vector shape; std::transform(shape_vector.begin(), shape_vector.end(), std::back_inserter(shape), diff --git a/mindspore-lite/tools/converter/parser/onnx/onnx_model_parser.cc b/mindspore-lite/tools/converter/parser/onnx/onnx_model_parser.cc index 3366376a29b3c70ee082f97a0f155a06a8f18752..60e0c071f7af1f4ec7a800f2edc0f0f0638d5d03 100644 --- a/mindspore-lite/tools/converter/parser/onnx/onnx_model_parser.cc +++ b/mindspore-lite/tools/converter/parser/onnx/onnx_model_parser.cc @@ -53,6 +53,7 @@ #include "tools/converter/parser/einsum_adjust.h" using mindspore::converter::kFmkTypeOnnx; +#include "ir/tensor_api.h" namespace mindspore { namespace lite { namespace { @@ -272,7 +273,7 @@ STATUS BuildParameterNode(const ParameterPtr ¶meter_node, const onnx::Tensor return RET_ERROR; } } else { - tensor_info = std::make_shared(data_type, shape_vector); + tensor_info = tensor::empty(data_type, shape_vector, device::DeviceType::kCPU); MS_CHECK_TRUE_MSG(tensor_info != nullptr, RET_NULL_PTR, "create tensor_info return nullptr"); std::vector shape; std::transform(shape_vector.begin(), shape_vector.end(), std::back_inserter(shape), diff --git a/mindspore-lite/tools/converter/parser/onnx/onnx_node_parser.cc b/mindspore-lite/tools/converter/parser/onnx/onnx_node_parser.cc index 7d914229a9bf92a6d37fb6c5856b7a673a804aeb..cc9c4bab4f93401dc034b9f645f49f1cc21bf1b5 100644 --- a/mindspore-lite/tools/converter/parser/onnx/onnx_node_parser.cc +++ b/mindspore-lite/tools/converter/parser/onnx/onnx_node_parser.cc @@ -24,6 +24,7 @@ #include "src/common/file_utils.h" #include "utils/ms_utils_secure.h" +#include "ir/tensor_api.h" namespace mindspore { namespace lite { namespace { @@ -111,7 +112,7 @@ tensor::TensorPtr OnnxNodeParser::CopyOnnxTensorData(const onnx::TensorProto &on return nullptr; } std::vector shape_vector(onnx_const_tensor.dims().begin(), onnx_const_tensor.dims().end()); - auto tensor_info = std::make_shared(data_type, shape_vector); + auto tensor_info = tensor::empty(data_type, shape_vector, device::DeviceType::kCPU); if (tensor_info == nullptr) { MS_LOG(ERROR) << "new a tensor::Tensor failed, data type: " << data_type << ", shape: " << shape_vector; return nullptr; @@ -140,8 +141,8 @@ tensor::TensorPtr OnnxNodeParser::CopyOnnxTensorData(const onnx::TensorProto &on MS_LOG(ERROR) << "Dst tensor cannot be nullptr"; return nullptr; } - auto dst_bytes_size = tensor_info->data().nbytes(); - if (dst_bytes_size != SizeToLong(data_size)) { + auto dst_bytes_size = tensor_info->DataNBytes(); + if (dst_bytes_size != data_size) { MS_LOG(ERROR) << "Calculated data size " << data_size << " != tensor bytes size " << dst_bytes_size; return nullptr; } @@ -303,10 +304,10 @@ STATUS OnnxNodeParser::LoadOnnxExternalTensorData(const onnx::TensorProto &onnx_ return RET_MEMORY_FAILED; } auto tensor_data = reinterpret_cast(tensor_info->data_c()); - if (common::huge_memcpy(tensor_data, static_cast(tensor_info->data().nbytes()), + if (common::huge_memcpy(tensor_data, static_cast(tensor_info->DataNBytes()), static_cast(onnx_data), data_size) != EOK) { MS_LOG(ERROR) << "memcpy_s from onnx tensor data to mindspore tensor data failed, dst size " - << tensor_info->data().nbytes() << ", src size " << data_size; + << tensor_info->DataNBytes() << ", src size " << data_size; return RET_ERROR; } return RET_OK; @@ -349,7 +350,7 @@ static int CopyOnnxData(void *dst_v, const void *src_v, size_t data_count) { int OnnxNodeParser::GetOnnxRawData(const onnx::TensorProto &onnx_const_tensor, size_t data_count, const tensor::TensorPtr &tensor_info) { - auto data_size = LongToSize(tensor_info->data().nbytes()); + auto data_size = LongToSize(tensor_info->DataNBytes()); auto tensor_data = tensor_info->data_c(); auto onnx_data = onnx_const_tensor.raw_data().data(); if (onnx_const_tensor.raw_data().size() != data_size) { diff --git a/mindspore-lite/tools/converter/parser/tf/tf_model_parser.cc b/mindspore-lite/tools/converter/parser/tf/tf_model_parser.cc index ce4793d7b1f596d29fb47ae846a8c773dc6b0114..fb4187cbd1994b17c864c478a3306e2c4faad0c3 100644 --- a/mindspore-lite/tools/converter/parser/tf/tf_model_parser.cc +++ b/mindspore-lite/tools/converter/parser/tf/tf_model_parser.cc @@ -55,6 +55,7 @@ #include "mindspore/ops/op_def/auto_generate/gen_ops_primitive_w.h" using mindspore::converter::kFmkTypeTf; +#include "ir/tensor_api.h" namespace mindspore { namespace lite { namespace { @@ -494,7 +495,7 @@ STATUS TFModelParser::ConvertConstTensor(const tensorflow::NodeDef &node_def, co for (int i = 0; i < tensor_shape.dim_size(); i++) { shape_vector->push_back(tensor_shape.dim(i).size()); } - auto tensor_info = std::make_shared(type, *shape_vector); + auto tensor_info = tensor::empty(type, *shape_vector, device::DeviceType::kCPU); if (tensor_info == nullptr) { MS_LOG(ERROR) << "tensor info is nullptr"; return RET_ERROR; diff --git a/mindspore-lite/tools/converter/quantizer/cluster_quantization.cc b/mindspore-lite/tools/converter/quantizer/cluster_quantization.cc index 0947bc003f96d67e0bf93b7a86a4ec382b6db346..eb06a7ef6d56c6d0e6de2b484e62d8d60611c2a7 100644 --- a/mindspore-lite/tools/converter/quantizer/cluster_quantization.cc +++ b/mindspore-lite/tools/converter/quantizer/cluster_quantization.cc @@ -189,7 +189,7 @@ int ClusterQuantization::KMeansQuantization(const CNodePtr &cnode, const std::ve MS_LOG(INFO) << "This op " << parameter->fullname_with_scope() << " is bias"; continue; } - auto data = static_cast(tensor_info->data().data()); + auto data = static_cast(tensor_info->device_address()->GetMutablePtr()); std::vector cluster_centroid; std::vector clusters; auto ret = KMeans(data, tensor_info->DataSize(), k_, max_epochs_, tol_error_, &clusters, &cluster_centroid); diff --git a/mindspore-lite/tools/converter/quantizer/gptq_quantizer.cc b/mindspore-lite/tools/converter/quantizer/gptq_quantizer.cc index bdda9f91f5af554b17401a809e3c953e91fc7883..24d5625a43ed2153a5e669457c9d68fc918cd747 100644 --- a/mindspore-lite/tools/converter/quantizer/gptq_quantizer.cc +++ b/mindspore-lite/tools/converter/quantizer/gptq_quantizer.cc @@ -211,13 +211,13 @@ int GptqQuantizer::UpdateWeightNode(const FuncGraphPtr &func_graph, MS_CHECK_TRUE_MSG(weight_tensor != nullptr, RET_ERROR, "default_param can not cast to tensor::Tensor."); weight_tensor->set_data_type(kNumberTypeInt8); size_t new_size = weights.at(weight_tensor_name)->elements_num * sizeof(int8_t); - if (new_size != static_cast(weight_tensor->data().nbytes())) { + if (new_size != static_cast(weight_tensor->DataNBytes())) { MS_LOG(ERROR) << "Data size of tensor info is error, new_size: " << new_size - << ", weight nbytes: " << static_cast(weight_tensor->data().nbytes()); + << ", weight nbytes: " << static_cast(weight_tensor->DataNBytes()); return RET_ERROR; } - if (memcpy_s(weight_tensor->data_c(), weight_tensor->data().nbytes(), - weights.at(weight_tensor_name)->quant_data, new_size) != EOK) { + if (memcpy_s(weight_tensor->data_c(), weight_tensor->DataNBytes(), weights.at(weight_tensor_name)->quant_data, + new_size) != EOK) { MS_LOG(ERROR) << "memcpy data failed."; return RET_ERROR; } diff --git a/mindspore-lite/tools/converter/quantizer/huffman_encode.cc b/mindspore-lite/tools/converter/quantizer/huffman_encode.cc index 666a0c66d72beaecafd380469dcfaac923754440..5b52a4cca2e5b7a82b8395980979ad0f2ca495f5 100644 --- a/mindspore-lite/tools/converter/quantizer/huffman_encode.cc +++ b/mindspore-lite/tools/converter/quantizer/huffman_encode.cc @@ -50,11 +50,11 @@ int HuffmanEncode::DoHuffmanEncode(const tensor::TensorPtr &weight, const Primit } size_t ch_size = huffman_encoded_str_.length(); if (ch_size < packed_size) { - if (ch_size != static_cast(weight->data().nbytes())) { + if (ch_size != static_cast(weight->DataNBytes())) { MS_LOG(ERROR) << "Data size of weight is error."; return RET_ERROR; } - if (memcpy_s(weight->data_c(), weight->data().nbytes(), huffman_encoded_str_.c_str(), ch_size) != EOK) { + if (memcpy_s(weight->data_c(), weight->DataNBytes(), huffman_encoded_str_.c_str(), ch_size) != EOK) { MS_LOG(ERROR) << "memcpy_s failed."; return RET_MEMORY_FAILED; } diff --git a/mindspore-lite/tools/converter/quantizer/quant_helper/transform_uint8_pass.cc b/mindspore-lite/tools/converter/quantizer/quant_helper/transform_uint8_pass.cc index dc7475eed2ebd3ca4d06274aa00021e403e9a9ac..a3c6da58a1279a45728219225dae3aea57692ce3 100644 --- a/mindspore-lite/tools/converter/quantizer/quant_helper/transform_uint8_pass.cc +++ b/mindspore-lite/tools/converter/quantizer/quant_helper/transform_uint8_pass.cc @@ -104,7 +104,7 @@ int TransformUint8Pass::DoParameterNodeTrans(const CNodePtr &cnode, const Parame // transform weight data size_t elem_count = tensor_info->DataSize(); - auto ret = Uint8toInt8(static_cast(tensor_info->data().data()), elem_count); + auto ret = Uint8toInt8(static_cast(tensor_info->device_address()->GetMutablePtr()), elem_count); if (ret != RET_OK) { MS_LOG(ERROR) << input_node->fullname_with_scope() << " transform data uint8 to int8 failed."; return ret; diff --git a/mindspore-lite/tools/converter/quantizer/quantize_util.cc b/mindspore-lite/tools/converter/quantizer/quantize_util.cc index 703c73bb3940dea7530a5454abc3e43325b8c9b9..aa30a6f10e34f43cb16cac3de86a3bc8e37a7750 100644 --- a/mindspore-lite/tools/converter/quantizer/quantize_util.cc +++ b/mindspore-lite/tools/converter/quantizer/quantize_util.cc @@ -454,11 +454,11 @@ int UpdateTensorDataAndSize(const AnfNodePtr &node, const tensor::TensorPtr &wei MS_CHECK_TRUE_RET(weight != nullptr, RET_NULL_PTR); MS_CHECK_TRUE_RET(new_size > 0, RET_NULL_PTR); weight->set_data_type(new_data_type); - if (new_size != static_cast(weight->data().nbytes())) { + if (new_size != static_cast(weight->DataNBytes())) { MS_LOG(ERROR) << "Data size of tensor info is error."; return RET_ERROR; } - if (memcpy_s(weight->data_c(), weight->data().nbytes(), quant_datas, new_size) != EOK) { + if (memcpy_s(weight->data_c(), weight->DataNBytes(), quant_datas, new_size) != EOK) { MS_LOG(ERROR) << "memcpy data failed."; return RET_ERROR; } diff --git a/mindspore-lite/tools/converter/quantizer/split_shared_bias.cc b/mindspore-lite/tools/converter/quantizer/split_shared_bias.cc index af42e0c5814a966675e4c4a4a3b5349432d22d61..7addf1e4063b200659e1a55a8b107ecde28a0940 100644 --- a/mindspore-lite/tools/converter/quantizer/split_shared_bias.cc +++ b/mindspore-lite/tools/converter/quantizer/split_shared_bias.cc @@ -23,6 +23,7 @@ #include "tools/converter/quantizer/quantize_util.h" #include "tools/lite_exporter/fetch_content.h" #include "mindspore/ops/op_def/auto_generate/gen_ops_primitive_o.h" +#include "ir/tensor_api.h" namespace mindspore::lite::quant { AnfNodePtr SplitSharedBias::CloneParameterNode(const CNodePtr &cnode, size_t index, const FuncGraphPtr &func_graph, @@ -50,7 +51,7 @@ AnfNodePtr SplitSharedBias::CloneParameterNode(const CNodePtr &cnode, size_t ind } std::shared_ptr tensor_info; if (static_cast(data_info.compress_type_) == TensorCompressionType::kNoCompression) { - tensor_info = std::make_shared(static_cast(data_info.data_type_), shape_vec); + tensor_info = tensor::empty(static_cast(data_info.data_type_), shape_vec, device::DeviceType::kCPU); } else { tensor_info = std::make_shared(static_cast(data_info.data_type_), shape_vec, data_info.data_.size(), @@ -59,11 +60,11 @@ AnfNodePtr SplitSharedBias::CloneParameterNode(const CNodePtr &cnode, size_t ind MS_CHECK_TRUE_RET(tensor_info != nullptr, nullptr); if (!data_info.data_.empty()) { auto tensor_data = reinterpret_cast(tensor_info->data_c()); - if (tensor_data == nullptr || tensor_info->data().nbytes() < 0) { + if (tensor_data == nullptr || tensor_info->DataNBytes() < 0) { MS_LOG(ERROR) << "tensor info data is nullptr or the size is smaller than zero."; return nullptr; } - if (memcpy_s(tensor_data, tensor_info->data().nbytes(), data_info.data_.data(), data_info.data_.size()) != EOK) { + if (memcpy_s(tensor_data, tensor_info->DataNBytes(), data_info.data_.data(), data_info.data_.size()) != EOK) { MS_LOG(ERROR) << "memcpy_s failed"; return nullptr; } diff --git a/mindspore-lite/tools/converter/quantizer/tensor_compressor.cc b/mindspore-lite/tools/converter/quantizer/tensor_compressor.cc index 92df29fa48222e1f6ea6954174b940880371bf52..5b3cae2376f9e305fb2ed223521af0b21a6d4f95 100644 --- a/mindspore-lite/tools/converter/quantizer/tensor_compressor.cc +++ b/mindspore-lite/tools/converter/quantizer/tensor_compressor.cc @@ -104,7 +104,7 @@ int TensorCompressor::SetNewCompressionTensor(const ParameterPtr &weight, const // set quant param compression_tensor->set_quant_param(tensor_info->quant_params()); // update tensor data - WriteBufferWithAlignByte(bits, static_cast(compression_tensor->data().data())); + WriteBufferWithAlignByte(bits, static_cast(compression_tensor->device_address()->GetMutablePtr())); weight->set_default_param(compression_tensor); weight->set_abstract(compression_tensor->ToAbstract()); return RET_OK; @@ -116,7 +116,7 @@ int TensorCompressor::DoBitPack(const ParameterPtr &weight, size_t bit_num) { auto elements_num = tensor_info->ElementsNum(); std::shared_ptr compression_tensor = nullptr; if (bit_num > 0 && bit_num < k8Bit) { - auto quant_data = static_cast(tensor_info->data().data()); + auto quant_data = static_cast(tensor_info->device_address()->GetMutablePtr()); std::vector origin_data(quant_data, quant_data + elements_num); std::vector pack_data{}; BitPack::BitPacking(bit_num, origin_data, &pack_data); @@ -130,7 +130,7 @@ int TensorCompressor::DoBitPack(const ParameterPtr &weight, size_t bit_num) { return RET_ERROR; } } else if (bit_num > k8Bit && bit_num < k16Bit) { - auto quant_data = static_cast(tensor_info->data().data()); + auto quant_data = static_cast(tensor_info->device_address()->GetMutablePtr()); std::vector origin_data(quant_data, quant_data + elements_num); std::vector pack_data{}; BitPack::BitPacking(bit_num, origin_data, &pack_data); diff --git a/mindspore-lite/tools/converter/quantizer/tensor_compressor.h b/mindspore-lite/tools/converter/quantizer/tensor_compressor.h index 8b8e427c27833903d63fd599c7ed2df164bcdc95..6d807e82594e1aa23039c81df2849cb72ed6a519 100644 --- a/mindspore-lite/tools/converter/quantizer/tensor_compressor.h +++ b/mindspore-lite/tools/converter/quantizer/tensor_compressor.h @@ -50,7 +50,7 @@ class TensorCompressor { return RET_OK; } auto max_size = tensor_info->Size(); - auto quant_data_array = static_cast(tensor_info->data().data()); + auto quant_data_array = static_cast(tensor_info->device_address()->GetMutablePtr()); std::vector quant_data(quant_data_array, quant_data_array + max_size / sizeof(T)); auto elem_cnt = quant_data.size(); @@ -128,7 +128,7 @@ class TensorCompressor { auto tensor_info = weight->default_param()->cast(); CHECK_NULL_RETURN(tensor_info); auto max_size = tensor_info->ElementsNum(); - auto quant_data = static_cast(tensor_info->data().data()); + auto quant_data = static_cast(tensor_info->device_address()->GetMutablePtr()); // write the index: each index has unique_value_bit unsigned for (int i = 0; i < max_size; i++) { auto quant_value = quant_data[i]; @@ -157,7 +157,7 @@ class TensorCompressor { size_t nz_cnt, size_t coor_best_bit, size_t bit_num) { auto tensor_info = weight->default_param()->cast(); CHECK_NULL_RETURN(tensor_info); - auto quant_data = static_cast(tensor_info->data().data()); + auto quant_data = static_cast(tensor_info->device_address()->GetMutablePtr()); int elem_cnt = tensor_info->DataSize(); auto channel_cnt = quant_params.size(); if (channel_cnt == 0) { diff --git a/mindspore-lite/tools/graph_kernel/converter/format_recognition.cc b/mindspore-lite/tools/graph_kernel/converter/format_recognition.cc index 7f7ff1796b6c5bca1156c7f106a212e50f63ec29..e27b0c9e224246003748c95838b3a90494ebf081 100644 --- a/mindspore-lite/tools/graph_kernel/converter/format_recognition.cc +++ b/mindspore-lite/tools/graph_kernel/converter/format_recognition.cc @@ -59,7 +59,7 @@ std::pair GetTransposeFormat(const CNodePtr &cnode) { return GetLiteFormat(cnode); } auto perm_tensor = perm_para->default_param()->cast(); - auto perm = static_cast(perm_tensor->data_ptr()->data()); + auto perm = static_cast(perm_tensor->device_address()->GetMutablePtr()); std::transform(perm, perm + perm_tensor->shape()[0], std::back_inserter(perm_list), IntToLong); } else { auto perm_value = cnode->input(perm_idx)->cast(); diff --git a/mindspore-lite/tools/graph_kernel/converter/preprocess_weight.cc b/mindspore-lite/tools/graph_kernel/converter/preprocess_weight.cc index ec982043ebd45c69cd4004827aeb89a1f20b89bd..a3202edc707e9e7567dfc1b4ec63604a898199e4 100644 --- a/mindspore-lite/tools/graph_kernel/converter/preprocess_weight.cc +++ b/mindspore-lite/tools/graph_kernel/converter/preprocess_weight.cc @@ -21,6 +21,7 @@ #include "utils/anf_utils.h" #include "backend/common/graph_kernel/core/graph_kernel_callback.h" #include "backend/common/graph_kernel/core/graph_kernel_utils.h" +#include "ir/tensor_api.h" namespace mindspore::graphkernel { constexpr size_t kConv2dDataIndex = 1; @@ -110,7 +111,7 @@ AnfNodePtr SubstituteConv2D::InferWeightValue(const AnfNodePtr &node) { if (tensor == nullptr) { return nullptr; } - if (tensor->data().const_data() == nullptr) { + if (tensor->unsafe_data() == nullptr) { return nullptr; } if (tensor->data_type() != kNumberTypeFloat32) { @@ -124,7 +125,7 @@ AnfNodePtr SubstituteConv2D::InferWeightValue(const AnfNodePtr &node) { IndexCalc old_shape_calc({c_out_o, c_out_i, h_len, w_len, c_in_o, c_in_i}); ShapeVector new_shape = {c_out_o, c_in_o, h_len, w_len, c_in_i, c_out_i}; IndexCalc new_shape_calc(new_shape); - auto new_tensor = std::make_shared(tensor->data_type(), new_shape); + auto new_tensor = tensor::empty(tensor->data_type(), new_shape, device::DeviceType::kCPU); auto new_data = new_tensor->data_c(); auto old_data = tensor->data_c(); for (int64_t coo = 0; coo < c_out_o; coo++) { @@ -182,7 +183,7 @@ AnfNodePtr MatmulPackB::InferValue(const AnfNodePtr &node) { if (tensor == nullptr) { return node; } - if (tensor->data().const_data() == nullptr) { + if (tensor->unsafe_data() == nullptr) { return node; } @@ -224,7 +225,7 @@ tensor::TensorPtr MatmulPackB::PackB(const tensor::TensorPtr &tensor, const Shap if (transpose) { std::swap(height, width); } - auto new_tensor = std::make_shared(tensor->data_type(), std::vector{height, width}); + auto new_tensor = tensor::empty(tensor->data_type(), std::vector{height, width}, device::DeviceType::kCPU); auto *new_tensor_iter = static_cast(new_tensor->data_c()); int64_t width_offset = 0; for (auto pack : pack_size) { diff --git a/mindspore-lite/tools/lite_exporter/fetch_content.cc b/mindspore-lite/tools/lite_exporter/fetch_content.cc index 97d6a97390bda75ff7d10d3a31eb16b9744f0bd0..0b904a964562519e351faf738d56257e28ecff1f 100644 --- a/mindspore-lite/tools/lite_exporter/fetch_content.cc +++ b/mindspore-lite/tools/lite_exporter/fetch_content.cc @@ -62,7 +62,10 @@ STATUS GetShapeVectorFromStringTensor(const tensor::TensorPtr &tensor_info, Shap std::string shape_size_str; *offset = 0; size_t cnt = 0; - for (; *offset < tensor_info->Size(); (*offset)++) { + MS_EXCEPTION_IF_NULL(tensor_info->device_address()); + MS_EXCEPTION_IF_NULL(tensor_info->device_address()->data()); + auto tensor_info_nbytes = static_cast(tensor_info->device_address()->data()->nbytes()); + for (; *offset < tensor_info_nbytes; (*offset)++) { if (tensor_data[*offset] == ',') { (*offset)++; break; @@ -76,7 +79,7 @@ STATUS GetShapeVectorFromStringTensor(const tensor::TensorPtr &tensor_info, Shap constexpr int kBase = 10; size_t shape_size = static_cast(std::strtol(shape_size_str.c_str(), nullptr, kBase)); MS_CHECK_TRUE_RET(shape_size != 0, RET_ERROR); - for (; *offset < tensor_info->Size(); (*offset)++) { + for (; *offset < tensor_info_nbytes; (*offset)++) { if (tensor_data[*offset] == ',') { cnt++; int64_t shape = 0; @@ -159,8 +162,11 @@ int FetchFromTensorValue(const ValueNodePtr &value_node, converter::FmkType fmk_ // process weight tensor if (copy_data) { - data_info->data_.resize(data->Size()); - if (data->Size() > 0 && memcpy_s(data_info->data_.data(), data->Size(), data->data_c(), data->Size()) != EOK) { + MS_EXCEPTION_IF_NULL(data->device_address()); + MS_EXCEPTION_IF_NULL(data->device_address()->data()); + auto data_nbytes = static_cast(data->device_address()->data()->nbytes()); + data_info->data_.resize(data_nbytes); + if (data_nbytes > 0 && memcpy_s(data_info->data_.data(), data_nbytes, data->data_c(), data_nbytes) != EOK) { MS_LOG(ERROR) << "memcpy_s error."; return RET_ERROR; } @@ -260,11 +266,14 @@ int SetTensorData(const tensor::TensorPtr &tensor_info, DataInfo *data_info, Typ bool copy_data) { MS_CHECK_TRUE_RET(data_info != nullptr, RET_NULL_PTR); MS_CHECK_TRUE_RET(tensor_info != nullptr, RET_NULL_PTR); - if (data_type == kObjectTypeTensorType && tensor_info->Size() >= kTensorListMinSize) { - data_info->data_.resize(tensor_info->Size() - offset); + MS_EXCEPTION_IF_NULL(tensor_info->device_address()); + MS_EXCEPTION_IF_NULL(tensor_info->device_address()->data()); + auto tensor_info_nbytes = static_cast(tensor_info->device_address()->data()->nbytes()); + if (data_type == kObjectTypeTensorType && tensor_info_nbytes >= kTensorListMinSize) { + data_info->data_.resize(tensor_info_nbytes - offset); if (EOK != common::huge_memcpy(data_info->data_.data(), data_info->data_.size(), static_cast(tensor_info->data_c()) + offset, - tensor_info->Size() - offset)) { + tensor_info_nbytes - offset)) { MS_LOG(ERROR) << "memcpy_s failed."; return RET_ERROR; } @@ -272,10 +281,10 @@ int SetTensorData(const tensor::TensorPtr &tensor_info, DataInfo *data_info, Typ // common node with const data if (data_type != kObjectTypeTensorType) { if (copy_data) { - data_info->data_.resize(tensor_info->Size() - offset); + data_info->data_.resize(tensor_info_nbytes - offset); if (EOK != common::huge_memcpy(data_info->data_.data(), data_info->data_.size(), static_cast(tensor_info->data_c()) + offset, - tensor_info->Size() - offset)) { + tensor_info_nbytes - offset)) { MS_LOG(ERROR) << "memcpy_s failed."; return RET_ERROR; } @@ -309,7 +318,10 @@ int FetchFromDefaultParam(const ParameterPtr ¶m_node, const converter::FmkTy } std::vector dims(shape_vector.begin(), shape_vector.end()); data_info->shape_ = dims; - if (tensor_info != nullptr && tensor_info->Size() != 0) { + MS_EXCEPTION_IF_NULL(tensor_info->device_address()); + MS_EXCEPTION_IF_NULL(tensor_info->device_address()->data()); + auto tensor_info_nbytes = static_cast(tensor_info->device_address()->data()->nbytes()); + if (tensor_info != nullptr && tensor_info_nbytes != 0) { // tensor_list tensor status = SetTensorData(tensor_info, data_info, data_type, offset, copy_data); if (status != RET_OK) { @@ -444,10 +456,12 @@ int FetchDataFromCNode(const CNodePtr &cnode, size_t index, DataInfo *data_info) } auto tensor_value = tensor_info->cast(); MS_CHECK_TRUE_MSG(tensor_value != nullptr, RET_ERROR, "cast ptr failed"); - if (tensor_value->Size() >= kTensorListMinSize) { - data_info->data_.resize(tensor_value->Size()); - if (memcpy_s(data_info->data_.data(), tensor_value->Size(), tensor_value->data_c(), tensor_value->Size()) != - EOK) { + MS_EXCEPTION_IF_NULL(tensor_value->device_address()); + MS_EXCEPTION_IF_NULL(tensor_value->device_address()->data()); + auto tensor_value_nbytes = static_cast(tensor_value->device_address()->data()->nbytes()); + if (tensor_value_nbytes >= kTensorListMinSize) { + data_info->data_.resize(tensor_value_nbytes); + if (memcpy_s(data_info->data_.data(), tensor_value_nbytes, tensor_value->data_c(), tensor_value_nbytes) != EOK) { MS_LOG(ERROR) << "memcpy data failed."; return RET_ERROR; } @@ -509,10 +523,12 @@ int FetchDataFromAbstract(const AbstractBasePtr &abstract, DataInfo *data_info) } auto tensor_value = tensor_info->cast(); MS_CHECK_TRUE_MSG(tensor_value != nullptr, RET_ERROR, "cast ptr failed"); - if (tensor_value->Size() >= kTensorListMinSize) { - data_info->data_.resize(tensor_value->Size()); - if (memcpy_s(data_info->data_.data(), tensor_value->Size(), tensor_value->data_c(), tensor_value->Size()) != - EOK) { + MS_EXCEPTION_IF_NULL(tensor_value->device_address()); + MS_EXCEPTION_IF_NULL(tensor_value->device_address()->data()); + auto tensor_value_nbytes = static_cast(tensor_value->device_address()->data()->nbytes()); + if (tensor_value_nbytes >= kTensorListMinSize) { + data_info->data_.resize(tensor_value_nbytes); + if (memcpy_s(data_info->data_.data(), tensor_value_nbytes, tensor_value->data_c(), tensor_value_nbytes) != EOK) { MS_LOG(ERROR) << "memcpy data failed."; return RET_ERROR; } diff --git a/mindspore-lite/tools/mindir_exporter/mindir_serializer.cc b/mindspore-lite/tools/mindir_exporter/mindir_serializer.cc index 068618f8badad02e96e550cff63549dd4f184e61..99fb0b50271dd9414429e2bba5bd3776cb3cf189 100644 --- a/mindspore-lite/tools/mindir_exporter/mindir_serializer.cc +++ b/mindspore-lite/tools/mindir_exporter/mindir_serializer.cc @@ -410,7 +410,7 @@ int MindIRSerializer::SaveMindIRTogether(const std::shared_ptr &p } auto data = para->default_param()->cast(); param_proto.clear_raw_data(); - param_proto.set_raw_data(data->data_c(), static_cast(data->data().nbytes())); + param_proto.set_raw_data(data->data_c(), static_cast(data->DataNBytes())); } return SaveProtoToFile(&model_proto_, save_model_path_, param); @@ -561,7 +561,7 @@ int MindIRSerializer::SplitSave(const std::shared_ptr ¶m) { continue; } auto data = para->default_param()->cast(); - int64_t data_length = static_cast(data->data().nbytes()); + int64_t data_length = static_cast(data->DataNBytes()); int64_t append_size = 0; if (data_length % OFFSET != 0) { append_size = OFFSET - (data_length % OFFSET); diff --git a/mindspore-lite/tools/optimizer/common/format_utils.cc b/mindspore-lite/tools/optimizer/common/format_utils.cc index ba90ce1853b98ed7a2e1bc9332e870d400b7d096..c2e93a2c52ae971e13c7a3307c0d5b65a718f9b4 100644 --- a/mindspore-lite/tools/optimizer/common/format_utils.cc +++ b/mindspore-lite/tools/optimizer/common/format_utils.cc @@ -82,6 +82,7 @@ #include "mindspore/ops/op_def/auto_generate/gen_ops_primitive_q.h" #include "mindspore/ops/op_def/auto_generate/gen_ops_primitive_s.h" +#include "ir/tensor_api.h" namespace mindspore { namespace opt { // treat the weight of deformableConv2d as an input instead of a const because of the ops infershape only support nchw. @@ -341,7 +342,7 @@ int SetAbstractTensorInfo(const AbstractBasePtr &abstract) { TypeId type = lite::GetAbstractTensorDtype(abstract->cast()); // For kObjectTypeTensorType, the abstract value is TensorList amd does not need to reset. if (type != kObjectTypeTensorType) { - auto tensor_info = std::make_shared(type, shape); + auto tensor_info = tensor::empty(type, shape, device::DeviceType::kCPU); if (tensor_info == nullptr) { MS_LOG(ERROR) << "new tensor::Tensor failed"; return RET_ERROR; diff --git a/mindspore-lite/tools/optimizer/common/gllo_utils.cc b/mindspore-lite/tools/optimizer/common/gllo_utils.cc index b48ff3329ef2c8d33485d06d74d18d32015d091c..18daca1550338e0dbcd0d19d7344dae2604c51e7 100644 --- a/mindspore-lite/tools/optimizer/common/gllo_utils.cc +++ b/mindspore-lite/tools/optimizer/common/gllo_utils.cc @@ -57,6 +57,7 @@ #include "mindspore/ops/op_def/auto_generate/gen_ops_primitive_s.h" #include "mindspore/ops/op_def/auto_generate/gen_ops_primitive_t.h" +#include "ir/tensor_api.h" namespace mindspore { namespace opt { namespace { @@ -860,7 +861,7 @@ ParameterPtr BuildParameterNode(const FuncGraphPtr &func_graph, const tensor::Te } param_node->set_name(node_name); param_node->debug_info()->set_name(node_name); - auto tensor_info_new = std::make_shared(data_type, shape_vector); + auto tensor_info_new = tensor::empty(data_type, shape_vector, device::DeviceType::kCPU); if (tensor_info_new == nullptr) { MS_LOG(ERROR) << "new tensor::Tensor failed."; return nullptr; diff --git a/mindspore-lite/tools/optimizer/const_fold/fold_utils.cc b/mindspore-lite/tools/optimizer/const_fold/fold_utils.cc index 3114350c650dc926a6afe8bdcd34f6e3911d38f7..3b0e0ee68483aa6f0cf4f5fc6dd3b7f188e025ad 100644 --- a/mindspore-lite/tools/optimizer/const_fold/fold_utils.cc +++ b/mindspore-lite/tools/optimizer/const_fold/fold_utils.cc @@ -43,6 +43,7 @@ using mindspore::lite::KernelRegistry; using mindspore::lite::Tensor; +#include "ir/tensor_api.h" namespace mindspore { namespace opt { namespace { @@ -57,7 +58,7 @@ ParameterPtr CreateNewParamter(const FuncGraphPtr &func_graph, Tensor *tensor) { (void)std::transform(shape.begin(), shape.end(), std::back_inserter(shape_vector), [](const int32_t &value) { return static_cast(value); }); - auto tensor_info = std::make_shared(tensor->data_type(), shape_vector); + auto tensor_info = tensor::empty(tensor->data_type(), shape_vector, device::DeviceType::kCPU); if (tensor_info == nullptr) { MS_LOG(ERROR) << "create tensor info failed."; return nullptr; diff --git a/mindspore-lite/tools/optimizer/fusion/batchmatmul_fusion.cc b/mindspore-lite/tools/optimizer/fusion/batchmatmul_fusion.cc index bfad59fe17aa260f1a0a235097bf5f761abef715..3986b0607024f5adf3c7e1631dfe3a262d735a76 100644 --- a/mindspore-lite/tools/optimizer/fusion/batchmatmul_fusion.cc +++ b/mindspore-lite/tools/optimizer/fusion/batchmatmul_fusion.cc @@ -34,6 +34,7 @@ #include "mindspore/ops/op_def/auto_generate/gen_ops_primitive_r.h" #include "mindspore/ops/op_def/auto_generate/gen_ops_primitive_s.h" #include "mindspore/ops/op_def/auto_generate/gen_ops_primitive_t.h" +#include "ir/tensor_api.h" namespace mindspore::opt { namespace { @@ -240,7 +241,7 @@ int ResetReshapeParameters(const AnfNodePtr &reshape_node) { shape[0] = rmatmul_input_shape[0] + 1; } - auto tensor_info = std::make_shared(shape_tensor->data_type(), shape); + auto tensor_info = tensor::empty(shape_tensor->data_type(), shape, device::DeviceType::kCPU); if (tensor_info == nullptr) { MS_LOG(ERROR) << "Create tensor info failed"; return RET_ERROR; diff --git a/mindspore-lite/tools/optimizer/fusion/decoder_layer_fusion.cc b/mindspore-lite/tools/optimizer/fusion/decoder_layer_fusion.cc index a95d560a2b720630ec1a42b51e78e4a4b9f0c069..22b9684dda082628243c8787d6e9522fe6406462 100644 --- a/mindspore-lite/tools/optimizer/fusion/decoder_layer_fusion.cc +++ b/mindspore-lite/tools/optimizer/fusion/decoder_layer_fusion.cc @@ -364,7 +364,7 @@ STATUS DecoderLayerFusion::GetEps(const EquivPtr &equiv, VarPtr node_name, float if (value_node->isa()) { auto tensor = value_node->cast(); MS_EXCEPTION_IF_NULL(tensor); - *eps = *reinterpret_cast(tensor->data().data()); + *eps = *reinterpret_cast(tensor->device_address()->GetMutablePtr()); return RET_OK; } } diff --git a/mindspore-lite/tools/optimizer/fusion/encoder_layer_fusion.cc b/mindspore-lite/tools/optimizer/fusion/encoder_layer_fusion.cc index 3bbb9fdad31909ade7ca90b8a5e18c83fbd61ea2..74b1796e1e1bdf27ddf7551ec3a4b3109137b528 100644 --- a/mindspore-lite/tools/optimizer/fusion/encoder_layer_fusion.cc +++ b/mindspore-lite/tools/optimizer/fusion/encoder_layer_fusion.cc @@ -928,7 +928,7 @@ STATUS EncoderLayerFusion::GetEps(const EquivPtr &equiv, VarPtr node_name, float if (value_node->isa()) { auto tensor = value_node->cast(); MS_EXCEPTION_IF_NULL(tensor); - *eps = *reinterpret_cast(tensor->data().data()); + *eps = *reinterpret_cast(tensor->device_address()->GetMutablePtr()); return RET_OK; } } @@ -1045,7 +1045,7 @@ STATUS EncoderLayerFusion::InitAttributes(AnfNodePtr k_past, AnfNodePtr begin_ex auto expert_capacity_value_node = utils::cast(utils::cast(expert_capacity_node)->value()); if (expert_capacity_value_node->isa()) { auto tensor = expert_capacity_value_node->cast(); - auto expert_capacity = *(reinterpret_cast(tensor->data().data())); + auto expert_capacity = *(reinterpret_cast(tensor->device_address()->GetMutablePtr())); float cast_expert_capacity = Float16::ToFloat32(expert_capacity); *capacity_factor = (cast_expert_capacity) * (*expert_num) / seq; } diff --git a/mindspore-lite/tools/optimizer/fusion/kv_cache_mgr_one_branch_fusion.cc b/mindspore-lite/tools/optimizer/fusion/kv_cache_mgr_one_branch_fusion.cc index 4734e0659e318b05d32f41903b3f4f5f484ebbe0..af5eadf1a1456160c960c0e4887c5c3f584806e8 100644 --- a/mindspore-lite/tools/optimizer/fusion/kv_cache_mgr_one_branch_fusion.cc +++ b/mindspore-lite/tools/optimizer/fusion/kv_cache_mgr_one_branch_fusion.cc @@ -73,7 +73,7 @@ const BaseRef KVCacheMgrOneBranchFusion::DefinePattern() const { tensor::TensorPtr KVCacheMgrOneBranchFusion::ConstData(int32_t padding_length) const { std::vector shp = {padding_length}; - tensor::TensorPtr const_data = std::make_shared(kInt32->type_id(), shp); + tensor::TensorPtr const_data = tensor::empty(kInt32->type_id(), shp, device::DeviceType::kCPU); MS_CHECK_TRUE_RET(const_data != nullptr && const_data->data_c() != nullptr, nullptr); auto *val = static_cast(const_data->data_c()); for (int i = 0; i < padding_length; ++i) { diff --git a/mindspore-lite/tools/optimizer/fusion/multi_head_attention_fusion.cc b/mindspore-lite/tools/optimizer/fusion/multi_head_attention_fusion.cc index 71f13cd2d97592f02d52c95b6a9db7a6d5cba23e..22f348f29adc680a27b280f3d0c9f8295b638a1e 100644 --- a/mindspore-lite/tools/optimizer/fusion/multi_head_attention_fusion.cc +++ b/mindspore-lite/tools/optimizer/fusion/multi_head_attention_fusion.cc @@ -42,6 +42,7 @@ #include "mindspore/ops/op_def/auto_generate/gen_ops_primitive_r.h" #include "mindspore/ops/op_def/auto_generate/gen_ops_primitive_s.h" #include "mindspore/ops/op_def/auto_generate/gen_ops_primitive_t.h" +#include "ir/tensor_api.h" namespace mindspore::opt { namespace { @@ -656,12 +657,12 @@ std::shared_ptr ConcatTensors(const std::vector(base_data_type, new_shape); + auto concat_tensor = tensor::empty(base_data_type, new_shape, device::DeviceType::kCPU); MS_CHECK_TRUE_RET(concat_tensor != nullptr, nullptr); std::size_t offset = 0; for (const auto &tensor : tensors) { void *ptr = reinterpret_cast(concat_tensor->data_c()) + offset; - auto transpose_tensor = std::make_shared(base_data_type, tensor->shape()); + auto transpose_tensor = tensor::empty(base_data_type, tensor->shape(), device::DeviceType::kCPU); if (transpose && !transpose_b) { switch (base_data_type) { case kNumberTypeFloat32: { @@ -692,7 +693,7 @@ std::shared_ptr ConcatTensors(const std::vector tshape = {new_shape[1], new_shape[0]}; - auto transposed_tensor = std::make_shared(base_data_type, tshape); + auto transposed_tensor = tensor::empty(base_data_type, tshape, device::DeviceType::kCPU); switch (base_data_type) { case kNumberTypeFloat32: { auto status = TransposeMatrix(concat_tensor, transposed_tensor); diff --git a/mindspore-lite/tools/optimizer/fusion/reduce_same_op_in_horizon.cc b/mindspore-lite/tools/optimizer/fusion/reduce_same_op_in_horizon.cc index 82cc54b91b5ea29d685b8fe09753007dc5d4a154..c38cad835e2345c125b9f40579f11a5237d1b237 100644 --- a/mindspore-lite/tools/optimizer/fusion/reduce_same_op_in_horizon.cc +++ b/mindspore-lite/tools/optimizer/fusion/reduce_same_op_in_horizon.cc @@ -39,10 +39,7 @@ bool CheckValueIsEqual(const ValuePtr &left, const ValuePtr &right) { auto left_tensor = left->cast(); auto right_tensor = right->cast(); MS_CHECK_TRUE_RET(left_tensor != nullptr && right_tensor != nullptr, false); - auto left_data = left_tensor->data_ptr(); - auto right_data = right_tensor->data_ptr(); - MS_CHECK_TRUE_RET(left_data != nullptr && right_data != nullptr, false); - return left_tensor->tensor::MetaTensor::operator==(*right_tensor) && left_data->equals(*right_data); + return left_tensor->ValueEqual(*right_tensor); } return *left == *right; } diff --git a/mindspore-lite/tools/optimizer/fusion/tf_bidirection_gru_fusion.cc b/mindspore-lite/tools/optimizer/fusion/tf_bidirection_gru_fusion.cc index 81c66cac65c75c6e8eaa8b3a25e33e32f9b05653..54859d04830712905b0d805e1709ac3ccf6e9459 100644 --- a/mindspore-lite/tools/optimizer/fusion/tf_bidirection_gru_fusion.cc +++ b/mindspore-lite/tools/optimizer/fusion/tf_bidirection_gru_fusion.cc @@ -42,6 +42,7 @@ #include "mindspore/ops/op_def/auto_generate/gen_ops_primitive_t.h" #include "mindspore/ops/op_def/auto_generate/gen_ops_primitive_w.h" +#include "ir/tensor_api.h" namespace mindspore { namespace opt { namespace { @@ -456,7 +457,7 @@ ParameterPtr TfBidirectionGruFusion::AddDefaultParameter(const FuncGraphPtr &fun } parameter->set_abstract(abstract); - auto gate_weight_default = std::make_shared(type, shape_vector); + auto gate_weight_default = tensor::empty(type, shape_vector, device::DeviceType::kCPU); if (gate_weight_default == nullptr) { MS_LOG(ERROR) << "gate_weight_default is nullptr"; return nullptr; diff --git a/mindspore-lite/tools/optimizer/graph/grouped_matmul_op_pass.cc b/mindspore-lite/tools/optimizer/graph/grouped_matmul_op_pass.cc index a92998978adffc5a73c50ae01821dc3e044633ce..066cb09d2e5cebd5572d0f909837b214e481bf53 100644 --- a/mindspore-lite/tools/optimizer/graph/grouped_matmul_op_pass.cc +++ b/mindspore-lite/tools/optimizer/graph/grouped_matmul_op_pass.cc @@ -34,6 +34,7 @@ #include "mindspore/ops/op_def/auto_generate/gen_ops_primitive_g.h" #include "mindspore/ops/op_def/auto_generate/gen_ops_primitive_m.h" #include "mindspore/ops/op_def/auto_generate/gen_ops_primitive_t.h" +#include "ir/tensor_api.h" namespace mindspore::opt { #if !defined(_WIN32) && !defined(_WIN64) @@ -104,7 +105,7 @@ void GroupedMatmulOpPass::UseEmptyNodeReplaceNone(const FuncGraphPtr &graph, con // create empty tensor auto tensor_type = OpInputDtypeMap.at(cnode_name).at(input_idx); std::vector tensor_shape = {0}; - auto empty_tensor = std::make_shared(tensor_type, tensor_shape); + auto empty_tensor = tensor::empty(tensor_type, tensor_shape, device::DeviceType::kCPU); // create node auto empty_node = std::make_shared(empty_tensor); ValueNodePtr empty_value_node = empty_node->cast(); diff --git a/mindspore-lite/tools/optimizer/graph/input_and_output_variable_pass.cc b/mindspore-lite/tools/optimizer/graph/input_and_output_variable_pass.cc index df781e77a0c5266758f2e7242032e7b619ca0bb5..63f0e7f446f69fce219296790be555e22a165724 100644 --- a/mindspore-lite/tools/optimizer/graph/input_and_output_variable_pass.cc +++ b/mindspore-lite/tools/optimizer/graph/input_and_output_variable_pass.cc @@ -29,6 +29,7 @@ #include "mindspore/ops/op_def/sequence_ops.h" #include "tools/common/func_graph_utils.h" #include "mindspore/ops/op_def/auto_generate/gen_ops_primitive_m.h" +#include "ir/tensor_api.h" namespace mindspore::opt { @@ -154,7 +155,7 @@ CNodePtr InputAndOutputVariablePass::CreateAssign(const AnfNodePtr &anf_node, co MS_LOG(ERROR) << "type ptr is nullptr"; return nullptr; } - tensor::TensorPtr tensor_data = std::make_shared(type_ptr->type_id(), shape); + tensor::TensorPtr tensor_data = tensor::empty(type_ptr->type_id(), shape, device::DeviceType::kCPU); float *val = static_cast(tensor_data->data_c()); for (size_t i = 0; i < tensor_data->DataSize(); ++i) { *(val + i) = 0; diff --git a/mindspore-lite/tools/optimizer/graph/lite_tensor_extractor.cc b/mindspore-lite/tools/optimizer/graph/lite_tensor_extractor.cc index 7d2539af9bc54bc0724a82aea208258ae9f12c2d..b86ae3b14460db18cb1dbcb4bfa333ec2421977f 100644 --- a/mindspore-lite/tools/optimizer/graph/lite_tensor_extractor.cc +++ b/mindspore-lite/tools/optimizer/graph/lite_tensor_extractor.cc @@ -216,7 +216,7 @@ int LiteTensorExtractor::GetCNodeConstInputToAbstract(const CNodePtr &cnode, con } auto input_tensor = shape_value->cast(); MS_CHECK_FALSE(input_tensor == nullptr, RET_ERROR); - if (input_tensor->data().const_data() != nullptr) { + if (input_tensor->unsafe_data() != nullptr) { MS_LOG(DEBUG) << "abstract already have const data."; continue; } diff --git a/mindspore-lite/tools/optimizer/graph/miniaturization_pass.cc b/mindspore-lite/tools/optimizer/graph/miniaturization_pass.cc index 892be53bd8024a0095bceadd9e52b44aa90c62a7..155cca61d7f3585fa5e77227e7b47db9a057ef4e 100644 --- a/mindspore-lite/tools/optimizer/graph/miniaturization_pass.cc +++ b/mindspore-lite/tools/optimizer/graph/miniaturization_pass.cc @@ -38,23 +38,22 @@ static inline tensor::TensorPtr GetTensorFromNode(const AnfNodePtr &node) { return nullptr; } auto tensor = value->cast(); - if (tensor == nullptr || tensor->data_ptr() == nullptr || tensor->data_c() == nullptr) { + if (tensor == nullptr || tensor->device_address() == nullptr || tensor->data_c() == nullptr) { return nullptr; } return tensor; } bool MiniaturizationPass::NeedCompress(const tensor::TensorPtr &tensor) { - auto tensor_data_ptr = tensor->data_ptr(); - auto item_size = tensor_data_ptr->itemsize(); - auto item_num = tensor_data_ptr->size(); - auto data_ptr = tensor_data_ptr->data(); + auto item_size = tensor->DataItemSize(); + auto item_num = tensor->DataSize(); + auto data_ptr = tensor->device_address()->GetMutablePtr(); // No need cast to fill ops while tensor data size is small. if (item_num < COMPRESS_TRIGGER_SIZE_) { return false; } int ret = 0; - for (ssize_t idx = 1; idx < item_num; idx++) { + for (size_t idx = 1; idx < item_num; idx++) { auto offset = idx * item_size; // No memcmp_s provide in secure lib of huawei ret = memcmp(static_cast(data_ptr) + offset, static_cast(data_ptr) + offset - item_size, @@ -67,15 +66,14 @@ bool MiniaturizationPass::NeedCompress(const tensor::TensorPtr &tensor) { } static inline ValuePtr GetFirstVal(const tensor::TensorPtr &tensor) { - auto tensor_data_ptr = tensor->data_ptr(); + auto tensor_data_ptr = tensor->device_address()->GetMutablePtr(); auto data_type = tensor->data_type(); - auto data_ptr = tensor_data_ptr->data(); if (data_type == kNumberTypeFloat32) { - float val = static_cast(data_ptr)[0]; + float val = static_cast(tensor_data_ptr)[0]; return MakeValue(val); } if (data_type == kNumberTypeUInt32) { - int32_t val = static_cast(data_ptr)[0]; + int32_t val = static_cast(tensor_data_ptr)[0]; return MakeValue(val); } return nullptr; diff --git a/mindspore-lite/tools/optimizer/graph/node_infershape.cc b/mindspore-lite/tools/optimizer/graph/node_infershape.cc index 69d3edf9e778617a911d7f3d318ecd3b379328a0..0ddc5d928ffbcd5a6cbbe304746a881416a49d10 100644 --- a/mindspore-lite/tools/optimizer/graph/node_infershape.cc +++ b/mindspore-lite/tools/optimizer/graph/node_infershape.cc @@ -58,6 +58,7 @@ #include "mindspore/ops/op_def/auto_generate/gen_ops_primitive_t.h" #include "mindspore/ops/op_def/auto_generate/gen_ops_primitive_z.h" +#include "ir/tensor_api.h" namespace mindspore { namespace opt { static const std::unordered_set kNNACLToOpsInfer = { @@ -168,7 +169,7 @@ void RectifyFormat(const std::vector &inputs, FmkType fmk_type) tensor::TensorPtr NewTensorInfo(const lite::Tensor *tensor) { std::vector shape(tensor->shape()); std::vector shape_vector(shape.begin(), shape.end()); - auto tensor_info = std::make_shared(tensor->data_type(), shape_vector); + auto tensor_info = tensor::empty(tensor->data_type(), shape_vector, device::DeviceType::kCPU); if (tensor_info == nullptr) { MS_LOG(ERROR) << "new tensor::Tensor failed"; return nullptr; diff --git a/mindspore-lite/tools/optimizer/graph/output_variable_pass.cc b/mindspore-lite/tools/optimizer/graph/output_variable_pass.cc index 0fb225aba73dd2b5cfa8149d6482736ff9e4d4a6..5ce172030ce7db90477d3ea0f8a83aad95e4e460 100644 --- a/mindspore-lite/tools/optimizer/graph/output_variable_pass.cc +++ b/mindspore-lite/tools/optimizer/graph/output_variable_pass.cc @@ -29,6 +29,7 @@ #include "mindspore/ops/op_def/sequence_ops.h" #include "tools/common/func_graph_utils.h" #include "mindspore/ops/op_def/auto_generate/gen_ops_primitive_m.h" +#include "ir/tensor_api.h" namespace mindspore::opt { namespace { @@ -78,7 +79,7 @@ bool OutputVariablePass::Run(const FuncGraphPtr &graph) { } abstract::ShapePtr shape = dyn_cast(make_tuple_input->Shape()); MS_CHECK_TRUE_MSG(shape != nullptr, false, "shape is nullptr!"); - tensor::TensorPtr tensor_data = std::make_shared(type_ptr->type_id(), shape->shape()); + tensor::TensorPtr tensor_data = tensor::empty(type_ptr->type_id(), shape->shape(), device::DeviceType::kCPU); float *data_addr = static_cast(tensor_data->data_c()); for (size_t j = 0; i < tensor_data->DataSize(); ++j) { diff --git a/mindspore-lite/tools/optimizer/graph/scalar_op_pass.cc b/mindspore-lite/tools/optimizer/graph/scalar_op_pass.cc index acf9c93080deb046ef13e4ec6b7efc973816d563..82aa86fc6b1acd8414a524888021fb16b6639f08 100644 --- a/mindspore-lite/tools/optimizer/graph/scalar_op_pass.cc +++ b/mindspore-lite/tools/optimizer/graph/scalar_op_pass.cc @@ -35,6 +35,7 @@ #include "mindspore/ops/op_def/auto_generate/gen_ops_primitive_r.h" #include "mindspore/ops/op_def/auto_generate/gen_ops_primitive_s.h" #include "mindspore/ops/op_def/auto_generate/gen_ops_primitive_t.h" +#include "ir/tensor_api.h" /* This pass changes the following pattern(s). @@ -119,7 +120,7 @@ ValueNodePtr ScalarOpPass::GenerateScalarValueTensor(const FuncGraphPtr &func_gr } int32_t scalar_value = *reinterpret_cast(data_info.data_.data()); ShapeVector const_data_shape = {1}; - tensor::TensorPtr const_data_tensor = std::make_shared(kNumberTypeInt32, const_data_shape); + tensor::TensorPtr const_data_tensor = tensor::empty(kNumberTypeInt32, const_data_shape, device::DeviceType::kCPU); auto *val = static_cast(const_data_tensor->data_c()); *val = scalar_value; auto const_value_node = NewValueNode(const_data_tensor); diff --git a/mindspore-lite/tools/optimizer/parallel/depthwise_conv2d_info.cc b/mindspore-lite/tools/optimizer/parallel/depthwise_conv2d_info.cc index d997fcd7288e3779adea6306d358c0d6b256a41e..6f988070fe0c2ef256957622424ed3cc9ccdf4ed 100644 --- a/mindspore-lite/tools/optimizer/parallel/depthwise_conv2d_info.cc +++ b/mindspore-lite/tools/optimizer/parallel/depthwise_conv2d_info.cc @@ -38,6 +38,7 @@ #include "mindspore/ops/op_def/auto_generate/gen_ops_primitive_c.h" using mindspore::schema::PrimitiveType_Conv2DFusion; +#include "ir/tensor_api.h" namespace mindspore { namespace opt { namespace { @@ -83,7 +84,7 @@ void CreateSplitConstantTensors(const tensor::TensorPtr &constant_tensor, const } auto cur_shape = UP_DIV(split_dim_size * visited_block, total_block_count); split_constant_shapes.at(i).at(split_dim) = cur_shape; - auto tensor = std::make_shared(weight_type_id, split_constant_shapes.at(i)); + auto tensor = tensor::empty(weight_type_id, split_constant_shapes.at(i), device::DeviceType::kCPU); if (tensor == nullptr) { MS_LOG(ERROR) << "make shared failed."; split_constant_tensors->clear();