diff --git a/include/api/types.h b/include/api/types.h index 25eb1ebffc253e739614d3b3378fd790ab6ea756..0666f53025b5b8786bdb07b999c69feb5e1974e8 100644 --- a/include/api/types.h +++ b/include/api/types.h @@ -422,6 +422,7 @@ struct MSCallBackParam { std::string node_name; /**< node name argument */ std::string node_type; /**< node type argument */ double execute_time; /**< gpu execute time */ + int arch; /**< node arch argument */ }; /// \brief KernelCallBack defined the function pointer for callBack. diff --git a/mindspore-lite/src/common/common.h b/mindspore-lite/src/common/common.h index dc1c806741906328246e7f1e92b49009600f5d8b..be52850b7aa5466d897fdea44c9f6efa786d3077 100644 --- a/mindspore-lite/src/common/common.h +++ b/mindspore-lite/src/common/common.h @@ -92,6 +92,8 @@ static const char *const kCommonContextSection = "common_context"; static const char *const kCompileGraphParallel = "compile_graph_parallel"; static const char *const kEnableValue = "on"; static const char *const kDisableValue = "off"; +static const char *const kEnableTracing = "enableTracing"; +static const char *const kKernelTracingSection = "kernel_tracing"; // gpu context static const char *const kGPUContextSection = "gpu_context"; static const char *const kInputShapeKey = "input_shape"; diff --git a/mindspore-lite/src/common/utils.cc b/mindspore-lite/src/common/utils.cc index 29966b7575db4dbfae6e19970a8d01af2cf3ae7e..dd3c1e7e6014338bd35c18b5c198168ea10c9c1b 100644 --- a/mindspore-lite/src/common/utils.cc +++ b/mindspore-lite/src/common/utils.cc @@ -26,6 +26,10 @@ #include #include #include +#include +#include +#include +#include #endif namespace mindspore { @@ -338,5 +342,25 @@ int GetCoreNum() { } bool IsPowerOfTwo(int n) { return n > 0 && (n & (n - 1)) == 0; } + +int GetCurrentPid() { + int pid = 0; +#if defined(_MSC_VER) || defined(_WIN32) + pid = GetCurrentProcessId(); +#else + pid = getpid(); +#endif + return pid; +} + +int GetCurrentTid() { + int tid = 0; +#if defined(_MSC_VER) || defined(_WIN32) + tid = GetCurrentThreadId(); +#else + tid = syscall(SYS_gettid); +#endif + return tid; +} } // namespace lite } // namespace mindspore diff --git a/mindspore-lite/src/common/utils.h b/mindspore-lite/src/common/utils.h index 7957781f9e2e61ccafbeecc2786c4ec9d33237a1..ad2e7558f1789a75a8b5a39d2b05da64ca09069c 100644 --- a/mindspore-lite/src/common/utils.h +++ b/mindspore-lite/src/common/utils.h @@ -32,6 +32,7 @@ #include "tools/common/option.h" #include "include/errorcode.h" #include "ir/dtype/type_id.h" +#include "include/api/data_type.h" namespace mindspore { namespace lite { @@ -63,6 +64,9 @@ int GetCoreNum(); bool IsPowerOfTwo(int n); +int GetCurrentPid(); + +int GetCurrentTid(); #ifdef __ANDROID__ uint32_t getHwCap(int hwcap_type); #endif @@ -384,6 +388,45 @@ inline std::string StringTolower(const std::string &str) { std::transform(ret.begin(), ret.end(), ret.begin(), [](unsigned char c) { return std::tolower(c); }); return ret; } + +inline std::string DataTypeToString(DataType dtype) { + switch (dtype) { + case DataType::kObjectTypeString: + return "String"; + case DataType::kNumberTypeBool: + return "Bool"; + case DataType::kNumberTypeInt8: + return "Int8"; + case DataType::kNumberTypeInt16: + return "Int16"; + case DataType::kNumberTypeInt32: + return "Int32"; + case DataType::kNumberTypeInt64: + return "Int64"; + case DataType::kNumberTypeUInt8: + return "UInt8"; + case DataType::kNumberTypeUInt16: + return "UInt16"; + case DataType::kNumberTypeUInt32: + return "UInt32"; + case DataType::kNumberTypeUInt64: + return "UInt64"; + case DataType::kNumberTypeFloat16: + return "Float16"; + case DataType::kNumberTypeFloat32: + return "Float32"; + case DataType::kNumberTypeFloat64: + return "Float64"; + case DataType::kNumberTypeBFloat16: + return "BFloat16"; + case DataType::kNumberTypeComplex64: + return "Complex64"; + case DataType::kNumberTypeComplex128: + return "Complex128"; + default: + return "Unknown_" + std::to_string(static_cast(dtype)); + } +} } // namespace lite } // namespace mindspore diff --git a/mindspore-lite/src/executor/kernel_exec.h b/mindspore-lite/src/executor/kernel_exec.h index 480cbc01c59078a6e4ac0eb29705ab3639bfff88..36c34fed01bc07a6135196572a6d47ef4bfa5db1 100644 --- a/mindspore-lite/src/executor/kernel_exec.h +++ b/mindspore-lite/src/executor/kernel_exec.h @@ -119,7 +119,12 @@ class KernelExec { auto ret = DoExecute(); if (after != nullptr) { - if (!after(this->in_tensors(), this->out_tensors(), {this->name(), TypeName(type())})) { + auto desc = this->desc(); + MSCallBackParam opInfo; + opInfo.node_name = this->name(); + opInfo.node_type = TypeName(type()); + opInfo.arch = desc.arch; + if (!after(this->in_tensors(), this->out_tensors(), opInfo)) { MS_LOG(WARNING) << "run kernel after_callback failed, name: " << this->name(); } } diff --git a/mindspore-lite/src/litert/cxx_api/model/model_impl.cc b/mindspore-lite/src/litert/cxx_api/model/model_impl.cc index 272c01ddc726a19a7bbbca1d102d104dde48bbaf..d5efa5e04d0b8ffc91c14c480841a3ed2d6308a9 100644 --- a/mindspore-lite/src/litert/cxx_api/model/model_impl.cc +++ b/mindspore-lite/src/litert/cxx_api/model/model_impl.cc @@ -15,6 +15,8 @@ */ #include "src/litert/cxx_api/model/model_impl.h" +#include +#include #include #include #include @@ -33,12 +35,14 @@ #include "src/litert/lite_session.h" #include "src/litert/model_manager.h" #include "src/common/file_utils.h" +#include "src/common/utils.h" #if defined(ENABLE_PRE_INFERENCE) && defined(__linux__) && !defined(Debug) #include "src/common/random_data_generator.h" #endif #include "src/common/config_file.h" #include "src/litert/cpu_info.h" #include "src/litert/pack_weight_manager.h" +#include "nlohmann/json.hpp" namespace mindspore { namespace { const char *const kExecutionPlan = "execution_plan"; @@ -52,6 +56,9 @@ constexpr auto kObfRatioKey = "obf_ratio"; constexpr auto kObfNodeName = "obf_op-obf_mul"; constexpr size_t kFloatSize = 4; constexpr int kDataIndex = 1; +constexpr float FloatMSEC = 1000.0f; +constexpr int DSPTid = 20; +constexpr int PNNATid = 60; #if defined(ENABLE_PRE_INFERENCE) && defined(__linux__) && !defined(Debug) constexpr auto kCommonSection = "common"; // support external user configuration constexpr auto kEnablePreInferenceKey = "enable_pre_inference"; @@ -415,6 +422,150 @@ Status ModelImpl::UpdateConfig(const std::string §ion, const std::pair lock(mutex_); + auto iter = config_info_.find(section); + if (iter == config_info_.end()) { + return ""; + } + auto elem_iter = iter->second.find(key); + if (elem_iter == iter->second.end()) { + return ""; + } + return elem_iter->second; +} + +Status ModelImpl::ExportTraceData(const std::vector &kernel_infos, uint64_t first_op_start_time) { + std::vector trace_events; + auto pid = lite::GetCurrentPid(); + for (const auto &kernel : kernel_infos) { + uint64_t relative_start_time = kernel.start_time - first_op_start_time; + uint64_t relative_end_time = kernel.end_time - first_op_start_time; + uint64_t op_time = relative_end_time - relative_start_time; + std::string backend; + switch (kernel.arch) { + case kernel::kCPU: + backend = "CPU"; + break; + case kernel::kGPU: + backend = "GPU"; + break; + case kernel::kDSP: + backend = "DSP"; + break; + case kernel::kDelegate: + backend = "NPU"; + break; + default: + backend = "Unknown!"; + break; + } + nlohmann::json event; + event["name"] = kernel.name + "[" + std::to_string(op_time / FloatMSEC) + "ms]"; + event["ph"] = "X"; + event["ts"] = relative_start_time; + event["dur"] = op_time; + event["pid"] = pid; + event["tid"] = kernel.tid; + event["cat"] = "Operator"; + nlohmann::json input_shapes_json = nlohmann::json::array(); + std::transform(kernel.input_shape.begin(), kernel.input_shape.end(), std::back_inserter(input_shapes_json), + [](const std::vector &shape) { return lite::ShapeVectorToStr(shape); }); + nlohmann::json input_dtypes_json = nlohmann::json::array(); + std::transform(kernel.input_dtype.begin(), kernel.input_dtype.end(), std::back_inserter(input_dtypes_json), + [](const DataType &dtype) { return lite::DataTypeToString(dtype); }); + event["args"] = {{"start_time", relative_start_time}, + {"end_time", relative_end_time}, + {"backend", backend}, + {"op_type", kernel.type}, + {"input_shapes", input_shapes_json}, + {"input_dtypes", input_dtypes_json}}; + trace_events.push_back(event); + } + nlohmann::json output_json; + output_json["traceEvents"] = trace_events; + std::string filename = std::to_string(first_op_start_time) + "_tracing.json"; + std::ofstream json_file(filename); + if (json_file.is_open()) { + json_file << output_json.dump(4); + json_file.close(); + MS_LOG(INFO) << "Trace file generated: " << filename; + } else { + MS_LOG(ERROR) << "Failed to open trace file: " << filename; + return kLiteError; + } + return kSuccess; +} + +Status ModelImpl::RunGraphWithTracing() { + MSKernelCallBack before = nullptr; + MSKernelCallBack after = nullptr; + std::vector kernel_infos; + std::unordered_map op_start_times; + uint64_t first_op_start_time = 0; + bool first_op_set = false; + before = [&](const std::vector &before_inputs, + const std::vector &before_outputs, const MSCallBackParam &call_param) { + if (before_inputs.empty()) { + MS_LOG(INFO) << "The num of beforeInputs is empty"; + } + if (before_outputs.empty()) { + MS_LOG(INFO) << "The num of beforeOutputs is empty"; + } + auto op_start_time = lite::GetTimeUs(); + std::string op_name = call_param.node_name; + if (!first_op_set) { + first_op_start_time = op_start_time; + first_op_set = true; + } + op_start_times[op_name] = op_start_time; + return true; + }; + after = [&](const std::vector &after_inputs, + const std::vector &after_outputs, const MSCallBackParam &call_param) { + if (after_inputs.empty()) { + MS_LOG(INFO) << "The num of afterInputs is empty"; + } + if (after_outputs.empty()) { + MS_LOG(INFO) << "The num of afterOutputs is empty"; + } + std::string op_name = call_param.node_name; + std::string op_type = call_param.node_type; + auto arch = call_param.arch; + auto it = op_start_times.find(op_name); + if (it != op_start_times.end()) { + uint64_t op_start_time = it->second; + auto op_end_time = lite::GetTimeUs(); + std::vector> input_shapes; + std::vector input_dtypes; + for (const auto &input : after_inputs) { + input_shapes.push_back(input.Shape()); + input_dtypes.push_back(input.DataType()); + } + int tid; + if (arch == kernel::kDSP) { + tid = DSPTid; + } else if (arch == kernel::kDelegate) { + tid = PNNATid; + } else { + tid = lite::GetCurrentTid(); + } + kernel_infos.push_back({op_name, op_type, arch, op_start_time, op_end_time, input_shapes, input_dtypes, tid}); + op_start_times.erase(it); + } + return true; + }; + auto result = RunGraph(before, after); + if (result == kSuccess) { + auto ret = ExportTraceData(kernel_infos, first_op_start_time); + if (ret != kSuccess) { + MS_LOG(ERROR) << "Failed to export trace data!"; + return ret; + } + } + return result; +} + Status ModelImpl::Predict(const std::vector &inputs, std::vector *outputs, const MSKernelCallBack &before, const MSKernelCallBack &after) { std::lock_guard lock(mutex_); @@ -495,11 +646,17 @@ Status ModelImpl::Predict(const std::vector &inputs, std::vector *v) { namespace mindspore { +struct KernelInfo { + std::string name; + std::string type; + int arch; + uint64_t start_time; + uint64_t end_time; + std::vector> input_shape; + std::vector input_dtype; + int tid; +}; + typedef std::shared_ptr(CreateTrainSessionProto)(std::shared_ptr graph_data, std::shared_ptr cfg, const std::shared_ptr &context); @@ -85,6 +96,7 @@ class ModelImpl { Status LoadConfig(const std::string &config_path); Status UpdateConfig(const std::string §ion, const std::pair &config); + std::string GetConfig(const std::string §ion, const std::string &key); std::vector GetInputs(); std::vector GetOutputs(); std::vector GetGradients() const; @@ -136,6 +148,8 @@ class ModelImpl { void SetContext(const std::shared_ptr &context) { context_ = context; } void SetConfig(const std::shared_ptr cfg) { cfg_ = cfg; } Status RunGraph(const MSKernelCallBack &before, const MSKernelCallBack &after); + Status RunGraphWithTracing(); + Status ExportTraceData(const std::vector &kernel_infos, uint64_t first_op_start_time); bool IsEnableModelSharing(const std::string &model_path); bool IsEnableModelSharing(const std::pair &model_buff); bool IsValidDoubleNum(const std::string &num_str);