diff --git a/cv/detection/ssd/MindSpore/Dockerfile b/cv/detection/ssd/MindSpore/Dockerfile
new file mode 100755
index 0000000000000000000000000000000000000000..fcb31f207f23664ca2d60bda9a15463af1042dd9
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/Dockerfile
@@ -0,0 +1,6 @@
+ARG FROM_IMAGE_NAME
+FROM ${FROM_IMAGE_NAME}
+
+RUN apt install libgl1-mesa-glx -y
+COPY requirements.txt .
+RUN pip3.7 install -r requirements.txt
diff --git a/cv/detection/ssd/MindSpore/README.md b/cv/detection/ssd/MindSpore/README.md
new file mode 100755
index 0000000000000000000000000000000000000000..92eb19e788f86f05c9995914af1b092c01d335f4
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/README.md
@@ -0,0 +1,75 @@
+# SSD
+## Model description
+SSD discretizes the output space of bounding boxes into a set of default boxes over different aspect ratios and scales per feature map location. At prediction time, the network generates scores for the presence of each object category in each default box and produces adjustments to the box to better match the object shape.Additionally, the network combines predictions from multiple feature maps with different resolutions to naturally handle objects of various sizes.
+
+[Paper](https://arxiv.org/abs/1512.02325): Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, Cheng-Yang Fu, Alexander C. Berg.European Conference on Computer Vision (ECCV), 2016 (In press).
+## Step 1: Installing
+```
+pip3 install -r requirements.txt
+pip3 install easydict
+```
+## Step 2: Prepare Datasets
+download dataset in /home/datasets/cv/coco2017
+
+Note that you can run the scripts based on the dataset mentioned in original paper or widely used in relevant domain/network architecture. In the following sections, we will introduce how to run the scripts using the related dataset below.
+
+Dataset used: [COCO2017]()
+
+- Dataset size:19G
+ - Train:18G,118000 images
+ - Val:1G,5000 images
+ - Annotations:241M,instances,captions,person_keypoints etc
+- Data format:image and json files
+ - Note:Data will be processed in dataset.py
+
+ Change the `coco_root` and other settings you need in `src/config.py`. The directory structure is as follows:
+
+ ```shell
+ .
+ └─coco_dataset
+ ├─annotations
+ ├─instance_train2017.json
+ └─instance_val2017.json
+ ├─val2017
+ └─train2017
+ ```
+ If your own dataset is used. **Select dataset to other when run script.**
+ Organize the dataset information into a TXT file, each row in the file is as follows:
+
+ ```shell
+ train2017/0000001.jpg 0,259,401,459,7 35,28,324,201,2 0,30,59,80,2
+ ```
+
+ Each row is an image annotation which split by space, the first column is a relative path of image, the others are box and class infomations of the format [xmin,ymin,xmax,ymax,class]. We read image from an image path joined by the `image_dir`(dataset directory) and the relative path in `anno_path`(the TXT file path), `image_dir` and `anno_path` are setting in `src/config.py`.
+# [Pretrained models](#contents)
+Please [resnet50](https://pan.baidu.com/s/1rrhsZqDVmNxR-bCnMPvFIw?pwd=8766) download resnet50.ckpt here
+```
+mv resnet50.ckpt ./ckpt
+```
+
+## Step 3: Training
+```
+mpirun -allow-run-as-root -n 8 --output-filename log_output --merge-stderr-to-stdout \
+python3 train.py \
+--run_distribute=True \
+--lr=0.05 \
+--dataset=coco \
+--device_num=8 \
+--loss_scale=1 \
+--device_target="GPU" \
+--epoch_size=60 \
+--config_path=./config/ssd_resnet50_fpn_config_gpu.yaml \
+--output_path './output' > log.txt 2>&1 &
+```
+### [Evaluation result]
+## Results on BI-V100
+
+| GPUs | per step time | MAP |
+|------|-------------- |-------|
+| 1*8 | 0.814s | 0.374 |
+
+## Results on NV-V100s
+
+| GPUs | per step time | MAP |
+|------|-------------- |-------|
+| 1*8 | 0.797s | 0.369 |
\ No newline at end of file
diff --git a/cv/detection/ssd/MindSpore/ascend310_infer/CMakeLists.txt b/cv/detection/ssd/MindSpore/ascend310_infer/CMakeLists.txt
new file mode 100755
index 0000000000000000000000000000000000000000..ee3c85447340e0449ff2b70ed24f60a17e07b2b6
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/ascend310_infer/CMakeLists.txt
@@ -0,0 +1,14 @@
+cmake_minimum_required(VERSION 3.14.1)
+project(Ascend310Infer)
+add_compile_definitions(_GLIBCXX_USE_CXX11_ABI=0)
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -g -std=c++17 -Werror -Wall -fPIE -Wl,--allow-shlib-undefined")
+set(PROJECT_SRC_ROOT ${CMAKE_CURRENT_LIST_DIR}/)
+option(MINDSPORE_PATH "mindspore install path" "")
+include_directories(${MINDSPORE_PATH})
+include_directories(${MINDSPORE_PATH}/include)
+include_directories(${PROJECT_SRC_ROOT})
+find_library(MS_LIB libmindspore.so ${MINDSPORE_PATH}/lib)
+file(GLOB_RECURSE MD_LIB ${MINDSPORE_PATH}/_c_dataengine*)
+
+add_executable(main src/main.cc src/utils.cc)
+target_link_libraries(main ${MS_LIB} ${MD_LIB} gflags)
diff --git a/cv/detection/ssd/MindSpore/ascend310_infer/aipp.cfg b/cv/detection/ssd/MindSpore/ascend310_infer/aipp.cfg
new file mode 100755
index 0000000000000000000000000000000000000000..363d5d36fd1f24e3a6e880745d7150076f777bd0
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/ascend310_infer/aipp.cfg
@@ -0,0 +1,26 @@
+aipp_op {
+ aipp_mode : static
+ input_format : YUV420SP_U8
+ related_input_rank : 0
+ csc_switch : true
+ rbuv_swap_switch : false
+ matrix_r0c0 : 256
+ matrix_r0c1 : 0
+ matrix_r0c2 : 359
+ matrix_r1c0 : 256
+ matrix_r1c1 : -88
+ matrix_r1c2 : -183
+ matrix_r2c0 : 256
+ matrix_r2c1 : 454
+ matrix_r2c2 : 0
+ input_bias_0 : 0
+ input_bias_1 : 128
+ input_bias_2 : 128
+
+ mean_chn_0 : 124
+ mean_chn_1 : 117
+ mean_chn_2 : 104
+ var_reci_chn_0 : 0.0171247538316637
+ var_reci_chn_1 : 0.0175070028011204
+ var_reci_chn_2 : 0.0174291938997821
+}
\ No newline at end of file
diff --git a/cv/detection/ssd/MindSpore/ascend310_infer/build.sh b/cv/detection/ssd/MindSpore/ascend310_infer/build.sh
new file mode 100755
index 0000000000000000000000000000000000000000..713d7f657ddfa5f75b069351c55f8447f77c72d0
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/ascend310_infer/build.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+if [ -d out ]; then
+ rm -rf out
+fi
+
+mkdir out
+cd out || exit
+
+if [ -f "Makefile" ]; then
+ make clean
+fi
+
+cmake .. \
+ -DMINDSPORE_PATH="`pip show mindspore-ascend | grep Location | awk '{print $2"/mindspore"}' | xargs realpath`"
+make
diff --git a/cv/detection/ssd/MindSpore/ascend310_infer/inc/utils.h b/cv/detection/ssd/MindSpore/ascend310_infer/inc/utils.h
new file mode 100755
index 0000000000000000000000000000000000000000..efebe03a8c1179f5a1f9d5f7ee07e0352a9937c6
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/ascend310_infer/inc/utils.h
@@ -0,0 +1,32 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_INFERENCE_UTILS_H_
+#define MINDSPORE_INFERENCE_UTILS_H_
+
+#include
+#include
+#include
+#include
+#include
+#include "include/api/types.h"
+
+std::vector GetAllFiles(std::string_view dirName);
+DIR *OpenDir(std::string_view dirName);
+std::string RealPath(std::string_view path);
+mindspore::MSTensor ReadFileToTensor(const std::string &file);
+int WriteResult(const std::string& imageFile, const std::vector &outputs);
+#endif
diff --git a/cv/detection/ssd/MindSpore/ascend310_infer/src/main.cc b/cv/detection/ssd/MindSpore/ascend310_infer/src/main.cc
new file mode 100755
index 0000000000000000000000000000000000000000..967635de2f57695d29739ae462300daaac5d3621
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/ascend310_infer/src/main.cc
@@ -0,0 +1,164 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "include/api/model.h"
+#include "include/api/context.h"
+#include "include/api/types.h"
+#include "include/api/serialization.h"
+#include "include/dataset/vision_ascend.h"
+#include "include/dataset/execute.h"
+#include "include/dataset/vision.h"
+#include "inc/utils.h"
+
+using mindspore::Context;
+using mindspore::Serialization;
+using mindspore::Model;
+using mindspore::Status;
+using mindspore::ModelType;
+using mindspore::GraphCell;
+using mindspore::kSuccess;
+using mindspore::MSTensor;
+using mindspore::dataset::Execute;
+using mindspore::dataset::TensorTransform;
+using mindspore::dataset::vision::DvppDecodeResizeJpeg;
+using mindspore::dataset::vision::Resize;
+using mindspore::dataset::vision::HWC2CHW;
+using mindspore::dataset::vision::Normalize;
+using mindspore::dataset::vision::Decode;
+
+DEFINE_string(mindir_path, "", "mindir path");
+DEFINE_string(dataset_path, ".", "dataset path");
+DEFINE_int32(device_id, 0, "device id");
+DEFINE_string(aipp_path, "./aipp.cfg", "aipp path");
+DEFINE_string(cpu_dvpp, "DVPP", "cpu or dvpp process");
+DEFINE_int32(image_height, 640, "image height");
+DEFINE_int32(image_width, 640, "image width");
+
+int main(int argc, char **argv) {
+ gflags::ParseCommandLineFlags(&argc, &argv, true);
+ if (RealPath(FLAGS_mindir_path).empty()) {
+ std::cout << "Invalid mindir" << std::endl;
+ return 1;
+ }
+
+ auto context = std::make_shared();
+ auto ascend310 = std::make_shared();
+ ascend310->SetDeviceID(FLAGS_device_id);
+ context->MutableDeviceInfo().push_back(ascend310);
+ mindspore::Graph graph;
+ Serialization::Load(FLAGS_mindir_path, ModelType::kMindIR, &graph);
+ if (FLAGS_cpu_dvpp == "DVPP") {
+ if (RealPath(FLAGS_aipp_path).empty()) {
+ std::cout << "Invalid aipp path" << std::endl;
+ return 1;
+ } else {
+ ascend310->SetInsertOpConfigPath(FLAGS_aipp_path);
+ }
+ }
+
+ Model model;
+ Status ret = model.Build(GraphCell(graph), context);
+ if (ret != kSuccess) {
+ std::cout << "ERROR: Build failed." << std::endl;
+ return 1;
+ }
+
+ auto all_files = GetAllFiles(FLAGS_dataset_path);
+ if (all_files.empty()) {
+ std::cout << "ERROR: no input data." << std::endl;
+ return 1;
+ }
+
+ std::map costTime_map;
+ size_t size = all_files.size();
+
+ for (size_t i = 0; i < size; ++i) {
+ struct timeval start = {0};
+ struct timeval end = {0};
+ double startTimeMs;
+ double endTimeMs;
+ std::vector inputs;
+ std::vector outputs;
+ std::cout << "Start predict input files:" << all_files[i] << std::endl;
+ if (FLAGS_cpu_dvpp == "DVPP") {
+ auto resizeShape = {static_cast (FLAGS_image_height), static_cast (FLAGS_image_width)};
+ Execute resize_op(std::shared_ptr(new DvppDecodeResizeJpeg(resizeShape)));
+ auto imgDvpp = std::make_shared();
+ resize_op(ReadFileToTensor(all_files[i]), imgDvpp.get());
+ inputs.emplace_back(imgDvpp->Name(), imgDvpp->DataType(), imgDvpp->Shape(),
+ imgDvpp->Data().get(), imgDvpp->DataSize());
+ } else {
+ std::shared_ptr decode(new Decode());
+ std::shared_ptr hwc2chw(new HWC2CHW());
+ std::shared_ptr normalize(
+ new Normalize({123.675, 116.28, 103.53}, {58.395, 57.120, 57.375}));
+ auto resizeShape = {FLAGS_image_height, FLAGS_image_width};
+ std::shared_ptr resize(new Resize(resizeShape));
+ Execute composeDecode({decode, resize, normalize, hwc2chw});
+ auto img = MSTensor();
+ auto image = ReadFileToTensor(all_files[i]);
+ composeDecode(image, &img);
+ std::vector model_inputs = model.GetInputs();
+ if (model_inputs.empty()) {
+ std::cout << "Invalid model, inputs is empty." << std::endl;
+ return 1;
+ }
+ inputs.emplace_back(model_inputs[0].Name(), model_inputs[0].DataType(), model_inputs[0].Shape(),
+ img.Data().get(), img.DataSize());
+ }
+
+ gettimeofday(&start, nullptr);
+ ret = model.Predict(inputs, &outputs);
+ gettimeofday(&end, nullptr);
+ if (ret != kSuccess) {
+ std::cout << "Predict " << all_files[i] << " failed." << std::endl;
+ return 1;
+ }
+ startTimeMs = (1.0 * start.tv_sec * 1000000 + start.tv_usec) / 1000;
+ endTimeMs = (1.0 * end.tv_sec * 1000000 + end.tv_usec) / 1000;
+ costTime_map.insert(std::pair(startTimeMs, endTimeMs));
+ WriteResult(all_files[i], outputs);
+ }
+ double average = 0.0;
+ int inferCount = 0;
+
+ for (auto iter = costTime_map.begin(); iter != costTime_map.end(); iter++) {
+ double diff = 0.0;
+ diff = iter->second - iter->first;
+ average += diff;
+ inferCount++;
+ }
+ average = average / inferCount;
+ std::stringstream timeCost;
+ timeCost << "NN inference cost average time: "<< average << " ms of infer_count " << inferCount << std::endl;
+ std::cout << "NN inference cost average time: "<< average << "ms of infer_count " << inferCount << std::endl;
+ std::string fileName = "./time_Result" + std::string("/test_perform_static.txt");
+ std::ofstream fileStream(fileName.c_str(), std::ios::trunc);
+ fileStream << timeCost.str();
+ fileStream.close();
+ costTime_map.clear();
+ return 0;
+}
diff --git a/cv/detection/ssd/MindSpore/ascend310_infer/src/utils.cc b/cv/detection/ssd/MindSpore/ascend310_infer/src/utils.cc
new file mode 100755
index 0000000000000000000000000000000000000000..c947e4d5f451b90bd4728aa3a92c4cfab174f5e6
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/ascend310_infer/src/utils.cc
@@ -0,0 +1,129 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+#include
+#include
+#include "inc/utils.h"
+
+using mindspore::MSTensor;
+using mindspore::DataType;
+
+std::vector GetAllFiles(std::string_view dirName) {
+ struct dirent *filename;
+ DIR *dir = OpenDir(dirName);
+ if (dir == nullptr) {
+ return {};
+ }
+ std::vector res;
+ while ((filename = readdir(dir)) != nullptr) {
+ std::string dName = std::string(filename->d_name);
+ if (dName == "." || dName == ".." || filename->d_type != DT_REG) {
+ continue;
+ }
+ res.emplace_back(std::string(dirName) + "/" + filename->d_name);
+ }
+ std::sort(res.begin(), res.end());
+ for (auto &f : res) {
+ std::cout << "image file: " << f << std::endl;
+ }
+ return res;
+}
+
+int WriteResult(const std::string& imageFile, const std::vector &outputs) {
+ std::string homePath = "./result_Files";
+ for (size_t i = 0; i < outputs.size(); ++i) {
+ size_t outputSize;
+ std::shared_ptr netOutput;
+ netOutput = outputs[i].Data();
+ outputSize = outputs[i].DataSize();
+ int pos = imageFile.rfind('/');
+ std::string fileName(imageFile, pos + 1);
+ fileName.replace(fileName.find('.'), fileName.size() - fileName.find('.'), '_' + std::to_string(i) + ".bin");
+ std::string outFileName = homePath + "/" + fileName;
+ FILE * outputFile = fopen(outFileName.c_str(), "wb");
+ fwrite(netOutput.get(), outputSize, sizeof(char), outputFile);
+ fclose(outputFile);
+ outputFile = nullptr;
+ }
+ return 0;
+}
+
+mindspore::MSTensor ReadFileToTensor(const std::string &file) {
+ if (file.empty()) {
+ std::cout << "Pointer file is nullptr" << std::endl;
+ return mindspore::MSTensor();
+ }
+
+ std::ifstream ifs(file);
+ if (!ifs.good()) {
+ std::cout << "File: " << file << " is not exist" << std::endl;
+ return mindspore::MSTensor();
+ }
+
+ if (!ifs.is_open()) {
+ std::cout << "File: " << file << "open failed" << std::endl;
+ return mindspore::MSTensor();
+ }
+
+ ifs.seekg(0, std::ios::end);
+ size_t size = ifs.tellg();
+ mindspore::MSTensor buffer(file, mindspore::DataType::kNumberTypeUInt8, {static_cast(size)}, nullptr, size);
+
+ ifs.seekg(0, std::ios::beg);
+ ifs.read(reinterpret_cast(buffer.MutableData()), size);
+ ifs.close();
+
+ return buffer;
+}
+
+
+DIR *OpenDir(std::string_view dirName) {
+ if (dirName.empty()) {
+ std::cout << " dirName is null ! " << std::endl;
+ return nullptr;
+ }
+ std::string realPath = RealPath(dirName);
+ struct stat s;
+ lstat(realPath.c_str(), &s);
+ if (!S_ISDIR(s.st_mode)) {
+ std::cout << "dirName is not a valid directory !" << std::endl;
+ return nullptr;
+ }
+ DIR *dir;
+ dir = opendir(realPath.c_str());
+ if (dir == nullptr) {
+ std::cout << "Can not open dir " << dirName << std::endl;
+ return nullptr;
+ }
+ std::cout << "Successfully opened the dir " << dirName << std::endl;
+ return dir;
+}
+
+std::string RealPath(std::string_view path) {
+ char realPathMem[PATH_MAX] = {0};
+ char *realPathRet = nullptr;
+ realPathRet = realpath(path.data(), realPathMem);
+
+ if (realPathRet == nullptr) {
+ std::cout << "File: " << path << " is not exist.";
+ return "";
+ }
+
+ std::string realPath(realPathMem);
+ std::cout << path << " realpath is: " << realPath << std::endl;
+ return realPath;
+}
diff --git a/cv/detection/ssd/MindSpore/config/ssd300_config.yaml b/cv/detection/ssd/MindSpore/config/ssd300_config.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..1310d22251b5f0b7f5f5aa78d1222bae130e23da
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/config/ssd300_config.yaml
@@ -0,0 +1,126 @@
+# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
+enable_modelarts: False
+# Url for modelarts
+data_url: ""
+train_url: ""
+checkpoint_url: ""
+# Path for local
+run_distribute: False
+enable_profiling: False
+data_path: "/cache/data"
+output_path: "/cache/train"
+load_path: "/cache/checkpoint_path/"
+device_target: "Ascend"
+checkpoint_path: "./checkpoint/"
+checkpoint_file_path: "ssd-500_458.ckpt"
+
+# ==============================================================================
+# Training options
+model_name: "ssd300"
+img_shape: [300, 300]
+num_ssd_boxes: 1917
+match_threshold: 0.5
+nms_threshold: 0.6
+min_score: 0.1
+max_boxes: 100
+all_reduce_fusion_config: [29, 58, 89]
+
+# learing rate settings
+lr_init: 0.001
+lr_end_rate: 0.001
+warmup_epochs: 2
+momentum: 0.9
+weight_decay: 0.00015
+ssd_vgg_bn: False
+pretrain_vgg_bn: False
+
+
+# network
+num_default: [3, 6, 6, 6, 6, 6]
+extras_in_channels: [256, 576, 1280, 512, 256, 256]
+extras_out_channels: [576, 1280, 512, 256, 256, 128]
+extras_strides: [1, 1, 2, 2, 2, 2]
+extras_ratio: [0.2, 0.2, 0.2, 0.25, 0.5, 0.25]
+feature_size: [19, 10, 5, 3, 2, 1]
+min_scale: 0.2
+max_scale: 0.95
+aspect_ratios: [[], [2, 3], [2, 3], [2, 3], [2, 3], [2, 3]]
+steps: [16, 32, 64, 100, 150, 300]
+prior_scaling: [0.1, 0.2]
+gamma: 2.0
+alpha: 0.75
+
+dataset: "coco"
+lr: 0.05
+mode_sink: "sink"
+device_id: 0
+device_num: 1
+epoch_size: 500
+batch_size: 32
+loss_scale: 1024
+pre_trained: ""
+pre_trained_epoch_size: 0
+save_checkpoint_epochs: 10
+only_create_dataset: False
+eval_start_epoch: 40
+eval_interval: 1
+run_eval: False
+filter_weight: False
+freeze_layer: None
+save_best_ckpt: True
+
+result_path: ""
+img_path: ""
+drop: False
+
+# `mindrecord_dir` and `coco_root` are better to use absolute path.
+feature_extractor_base_param: ""
+checkpoint_filter_list: ['multi_loc_layers', 'multi_cls_layers']
+mindrecord_dir: "MindRecord_COCO"
+coco_root: "coco_ori"
+train_data_type: "train2017"
+val_data_type: "val2017"
+instances_set: "annotations/instances_{}.json"
+classes: ['background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
+ 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
+ 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
+ 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra',
+ 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
+ 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
+ 'kite', 'baseball bat', 'baseball glove', 'skateboard',
+ 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
+ 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
+ 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
+ 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
+ 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
+ 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
+ 'refrigerator', 'book', 'clock', 'vase', 'scissors',
+ 'teddy bear', 'hair drier', 'toothbrush']
+num_classes: 81
+# The annotation.json position of voc validation dataset.
+voc_json: "annotations/voc_instances_val.json"
+# voc original dataset.
+voc_root: "/data/voc_dataset"
+# if coco or voc used, `image_dir` and `anno_path` are useless.
+image_dir: ""
+anno_path: ""
+file_name: "ssd"
+file_format: 'MINDIR'
+
+---
+# Help description for each configuration
+enable_modelarts: "Whether training on modelarts, default: False"
+data_url: "Dataset url for obs"
+train_url: "Training output url for obs"
+checkpoint_url: "The location of checkpoint for obs"
+data_path: "Dataset path for local"
+output_path: "Training output path for local"
+load_path: "The location of checkpoint for obs"
+device_target: "Target device type, available: [Ascend, GPU, CPU]"
+enable_profiling: "Whether enable profiling while training, default: False"
+num_classes: "Class for dataset"
+batch_size: "Batch size for training and evaluation"
+epoch_size: "Total training epochs."
+keep_checkpoint_max: "keep the last keep_checkpoint_max checkpoint"
+checkpoint_path: "The location of the checkpoint file."
+checkpoint_file_path: "The location of the checkpoint file."
diff --git a/cv/detection/ssd/MindSpore/config/ssd300_config_gpu.yaml b/cv/detection/ssd/MindSpore/config/ssd300_config_gpu.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..80abf1b12ac9111cc3cf6b0c37a45ccb7bad2de6
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/config/ssd300_config_gpu.yaml
@@ -0,0 +1,127 @@
+# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
+enable_modelarts: False
+# Url for modelarts
+data_url: ""
+train_url: ""
+checkpoint_url: ""
+# Path for local
+run_distribute: False
+enable_profiling: False
+data_path: "/cache/data"
+output_path: "/cache/train"
+load_path: "/cache/checkpoint_path/"
+device_target: "GPU"
+checkpoint_path: "./checkpoint/"
+checkpoint_file_path: "ssd-500_458.ckpt"
+
+# ==============================================================================
+# Training options
+model_name: "ssd300"
+img_shape: [300, 300]
+num_ssd_boxes: 1917
+match_threshold: 0.5
+nms_threshold: 0.6
+min_score: 0.1
+max_boxes: 100
+all_reduce_fusion_config: [29, 58, 89]
+use_float16: True
+
+# learing rate settings
+lr_init: 0.001
+lr_end_rate: 0.001
+warmup_epochs: 2
+momentum: 0.9
+weight_decay: 0.00015
+ssd_vgg_bn: False
+pretrain_vgg_bn: False
+
+
+# network
+num_default: [3, 6, 6, 6, 6, 6]
+extras_in_channels: [256, 576, 1280, 512, 256, 256]
+extras_out_channels: [576, 1280, 512, 256, 256, 128]
+extras_strides: [1, 1, 2, 2, 2, 2]
+extras_ratio: [0.2, 0.2, 0.2, 0.25, 0.5, 0.25]
+feature_size: [19, 10, 5, 3, 2, 1]
+min_scale: 0.2
+max_scale: 0.95
+aspect_ratios: [[], [2, 3], [2, 3], [2, 3], [2, 3], [2, 3]]
+steps: [16, 32, 64, 100, 150, 300]
+prior_scaling: [0.1, 0.2]
+gamma: 2.0
+alpha: 0.75
+
+dataset: "coco"
+lr: 0.05
+mode_sink: "sink"
+device_id: 0
+device_num: 1
+epoch_size: 500
+batch_size: 32
+loss_scale: 1024
+pre_trained: ""
+pre_trained_epoch_size: 0
+save_checkpoint_epochs: 10
+only_create_dataset: False
+eval_start_epoch: 40
+eval_interval: 1
+run_eval: False
+filter_weight: False
+freeze_layer: None
+save_best_ckpt: True
+
+result_path: ""
+img_path: ""
+drop: False
+
+# `mindrecord_dir` and `coco_root` are better to use absolute path.
+feature_extractor_base_param: ""
+checkpoint_filter_list: ['multi_loc_layers', 'multi_cls_layers']
+mindrecord_dir: "MindRecord_COCO"
+coco_root: "coco_ori"
+train_data_type: "train2017"
+val_data_type: "val2017"
+instances_set: "annotations/instances_{}.json"
+classes: ['background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
+ 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
+ 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
+ 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra',
+ 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
+ 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
+ 'kite', 'baseball bat', 'baseball glove', 'skateboard',
+ 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
+ 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
+ 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
+ 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
+ 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
+ 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
+ 'refrigerator', 'book', 'clock', 'vase', 'scissors',
+ 'teddy bear', 'hair drier', 'toothbrush']
+num_classes: 81
+# The annotation.json position of voc validation dataset.
+voc_json: "annotations/voc_instances_val.json"
+# voc original dataset.
+voc_root: "/data/voc_dataset"
+# if coco or voc used, `image_dir` and `anno_path` are useless.
+image_dir: ""
+anno_path: ""
+file_name: "ssd"
+file_format: 'MINDIR'
+
+---
+# Help description for each configuration
+enable_modelarts: "Whether training on modelarts, default: False"
+data_url: "Dataset url for obs"
+train_url: "Training output url for obs"
+checkpoint_url: "The location of checkpoint for obs"
+data_path: "Dataset path for local"
+output_path: "Training output path for local"
+load_path: "The location of checkpoint for obs"
+device_target: "Target device type, available: [Ascend, GPU, CPU]"
+enable_profiling: "Whether enable profiling while training, default: False"
+num_classes: "Class for dataset"
+batch_size: "Batch size for training and evaluation"
+epoch_size: "Total training epochs."
+keep_checkpoint_max: "keep the last keep_checkpoint_max checkpoint"
+checkpoint_path: "The location of the checkpoint file."
+checkpoint_file_path: "The location of the checkpoint file."
diff --git a/cv/detection/ssd/MindSpore/config/ssd_mobilenet_v1_300_config_gpu.yaml b/cv/detection/ssd/MindSpore/config/ssd_mobilenet_v1_300_config_gpu.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..4fe82126ebc851c4ed6dffcde4621e9f190ec377
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/config/ssd_mobilenet_v1_300_config_gpu.yaml
@@ -0,0 +1,129 @@
+# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
+enable_modelarts: False
+# Url for modelarts
+data_url: ""
+train_url: ""
+checkpoint_url: ""
+# Path for local
+run_distribute: False
+enable_profiling: False
+data_path: "/DATA_1/cyf"
+output_path: "/cache/train"
+load_path: "/cache/checkpoint_path/"
+device_target: "GPU"
+checkpoint_path: "./checkpoint/"
+checkpoint_file_path: "ssd-500_458.ckpt"
+
+# ==============================================================================
+# Training options
+model_name: "ssd_mobilenet_v1"
+img_shape: [300, 300]
+num_ssd_boxes: 1917
+match_threshold: 0.5
+nms_threshold: 0.6
+min_score: 0.1
+max_boxes: 100
+all_reduce_fusion_config: [29, 58, 89]
+use_float16: True
+
+# learing rate settings
+lr_init: 0.001
+lr_end_rate: 0.001
+warmup_epochs: 2
+momentum: 0.9
+weight_decay: 0.00015
+ssd_vgg_bn: False
+pretrain_vgg_bn: False
+
+
+# network
+num_addition_layers: 4
+extras_strides: [1, 1, 2, 2, 2, 2]
+num_default: [3, 6, 6, 6, 6, 6]
+extras_in_channels: [256, 512, 1024, 512, 256, 256]
+extras_out_channels: [512, 1024, 512, 256, 256, 128]
+extras_strides: [1, 1, 2, 2, 2, 2]
+extras_ratio: [0.2, 0.2, 0.2, 0.25, 0.5, 0.25]
+feature_size: [19, 10, 5, 3, 2, 1]
+min_scale: 0.2
+max_scale: 0.95
+aspect_ratios: [[], [2, 3], [2, 3], [2, 3], [2, 3], [2, 3]]
+steps: [16, 32, 64, 100, 150, 300]
+prior_scaling: [0.1, 0.2]
+gamma: 2.0
+alpha: 0.75
+
+dataset: "coco"
+lr: 0.05
+mode_sink: "sink"
+device_id: 0
+device_num: 1
+epoch_size: 500
+batch_size: 32
+loss_scale: 1024
+pre_trained: ""
+pre_trained_epoch_size: 0
+save_checkpoint_epochs: 10
+only_create_dataset: False
+eval_start_epoch: 40
+eval_interval: 1
+run_eval: False
+filter_weight: False
+freeze_layer: None
+save_best_ckpt: True
+
+result_path: ""
+img_path: ""
+drop: False
+
+# `mindrecord_dir` and `coco_root` are better to use absolute path.
+feature_extractor_base_param: ""
+checkpoint_filter_list: ['multi_loc_layers', 'multi_cls_layers']
+mindrecord_dir: "/DATA_1/cyf/MindRecord_COCO"
+coco_root: "/DATA_1/cyf/"
+train_data_type: "train2017"
+val_data_type: "val2017"
+instances_set: "annotations/instances_{}.json"
+classes: ['background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
+ 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
+ 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
+ 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra',
+ 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
+ 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
+ 'kite', 'baseball bat', 'baseball glove', 'skateboard',
+ 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
+ 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
+ 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
+ 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
+ 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
+ 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
+ 'refrigerator', 'book', 'clock', 'vase', 'scissors',
+ 'teddy bear', 'hair drier', 'toothbrush']
+num_classes: 81
+# The annotation.json position of voc validation dataset.
+voc_json: "annotations/voc_instances_val.json"
+# voc original dataset.
+voc_root: "/data/voc_dataset"
+# if coco or voc used, `image_dir` and `anno_path` are useless.
+image_dir: ""
+anno_path: ""
+file_name: "ssd"
+file_format: 'MINDIR'
+
+---
+# Help description for each configuration
+enable_modelarts: "Whether training on modelarts, default: False"
+data_url: "Dataset url for obs"
+train_url: "Training output url for obs"
+checkpoint_url: "The location of checkpoint for obs"
+data_path: "Dataset path for local"
+output_path: "Training output path for local"
+load_path: "The location of checkpoint for obs"
+device_target: "Target device type, available: [Ascend, GPU, CPU]"
+enable_profiling: "Whether enable profiling while training, default: False"
+num_classes: "Class for dataset"
+batch_size: "Batch size for training and evaluation"
+epoch_size: "Total training epochs."
+keep_checkpoint_max: "keep the last keep_checkpoint_max checkpoint"
+checkpoint_path: "The location of the checkpoint file."
+checkpoint_file_path: "The location of the checkpoint file."
diff --git a/cv/detection/ssd/MindSpore/config/ssd_mobilenet_v1_fpn_config.yaml b/cv/detection/ssd/MindSpore/config/ssd_mobilenet_v1_fpn_config.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..099ea3758203638f96c6cae7886c69047b21b8f5
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/config/ssd_mobilenet_v1_fpn_config.yaml
@@ -0,0 +1,129 @@
+# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
+enable_modelarts: False
+# Url for modelarts
+data_url: ""
+train_url: ""
+checkpoint_url: ""
+# Path for local
+run_distribute: False
+enable_profiling: False
+data_path: "/cache/data"
+output_path: "/cache/train"
+load_path: "/cache/checkpoint_path/"
+device_target: "Ascend"
+checkpoint_path: "./checkpoint/"
+checkpoint_file_path: "mobilenet_v1.ckpt"
+
+# ==============================================================================
+# Training options
+model_name: "ssd_mobilenet_v1_fpn"
+img_shape: [640, 640]
+num_ssd_boxes: -1
+match_threshold: 0.5
+nms_threshold: 0.6
+min_score: 0.1
+max_boxes: 100
+all_reduce_fusion_config: [29, 58, 89]
+
+# learning rate settings
+lr_init: 0.01333
+lr_end_rate: 0.0
+warmup_epochs: 2
+weight_decay: 0.00004
+momentum: 0.9
+ssd_vgg_bn: False
+pretrain_vgg_bn: False
+
+# network
+num_default: [6, 6, 6, 6, 6]
+extras_in_channels: [256, 512, 1024, 256, 256]
+extras_out_channels: [256, 256, 256, 256, 256]
+extras_strides: [1, 1, 2, 2, 2, 2]
+extras_ratio: [0.2, 0.2, 0.2, 0.25, 0.5, 0.25]
+feature_size: [80, 40, 20, 10, 5]
+min_scale: 0.2
+max_scale: 0.95
+aspect_ratios: [[2, 3], [2, 3], [2, 3], [2, 3], [2, 3], [2, 3]]
+steps: [8, 16, 32, 64, 128]
+prior_scaling: [0.1, 0.2]
+gamma: 2.0
+alpha: 0.25
+num_addition_layers: 4
+use_anchor_generator: True
+use_global_norm: True
+
+dataset: "coco"
+lr: 0.05
+mode_sink: "sink"
+device_id: 0
+device_num: 1
+epoch_size: 500
+batch_size: 32
+loss_scale: 1024
+pre_trained: ""
+pre_trained_epoch_size: 0
+save_checkpoint_epochs: 10
+only_create_dataset: False
+eval_start_epoch: 40
+eval_interval: 1
+run_eval: False
+filter_weight: False
+freeze_layer: None
+save_best_ckpt: True
+
+result_path: ""
+img_path: ""
+drop: False
+
+# `mindrecord_dir` and `coco_root` are better to use absolute path.
+feature_extractor_base_param: "/ckpt/mobilenet_v1.ckpt"
+checkpoint_filter_list: ['network.multi_box.cls_layers.0.weight', 'network.multi_box.cls_layers.0.bias',
+ 'network.multi_box.loc_layers.0.weight', 'network.multi_box.loc_layers.0.bias']
+mindrecord_dir: "MindRecord_COCO"
+coco_root: "coco_ori"
+train_data_type: "train2017"
+val_data_type: "val2017"
+instances_set: "annotations/instances_{}.json"
+classes: ['background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
+ 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
+ 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
+ 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra',
+ 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
+ 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
+ 'kite', 'baseball bat', 'baseball glove', 'skateboard',
+ 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
+ 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
+ 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
+ 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
+ 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
+ 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
+ 'refrigerator', 'book', 'clock', 'vase', 'scissors',
+ 'teddy bear', 'hair drier', 'toothbrush']
+num_classes: 81
+# The annotation.json position of voc validation dataset.
+voc_json: "annotations/voc_instances_val.json"
+# voc original dataset.
+voc_root: "/data/voc_dataset"
+# if coco or voc used, `image_dir` and `anno_path` are useless.
+image_dir: ""
+anno_path: ""
+file_name: "ssd"
+file_format: 'MINDIR'
+
+---
+# Help description for each configuration
+enable_modelarts: 'Whether training on modelarts, default: False'
+data_url: 'Dataset url for obs'
+train_url: 'Training output url for obs'
+checkpoint_url: 'The location of checkpoint for obs'
+data_path: 'Dataset path for local'
+output_path: 'Training output path for local'
+load_path: 'The location of checkpoint for obs'
+device_target: 'Target device type, available: [Ascend, GPU, CPU]'
+enable_profiling: 'Whether enable profiling while training, default: False'
+num_classes: 'Class for dataset'
+batch_size: "Batch size for training and evaluation"
+epoch_size: "Total training epochs."
+keep_checkpoint_max: "keep the last keep_checkpoint_max checkpoint"
+checkpoint_path: "The location of the checkpoint file."
+checkpoint_file_path: "The location of the checkpoint file."
diff --git a/cv/detection/ssd/MindSpore/config/ssd_mobilenet_v1_fpn_config_gpu.yaml b/cv/detection/ssd/MindSpore/config/ssd_mobilenet_v1_fpn_config_gpu.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..79d10f00d9112a43fccbd0093237dfbaa9a16094
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/config/ssd_mobilenet_v1_fpn_config_gpu.yaml
@@ -0,0 +1,130 @@
+# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
+enable_modelarts: False
+# Url for modelarts
+data_url: ""
+train_url: ""
+checkpoint_url: ""
+# Path for local
+run_distribute: False
+enable_profiling: False
+data_path: "/cache/data"
+output_path: "/cache/train"
+load_path: "/cache/checkpoint_path/"
+device_target: "GPU"
+checkpoint_path: "./checkpoint/"
+checkpoint_file_path: "mobilenet_v1.ckpt"
+
+# ==============================================================================
+# Training options
+model_name: "ssd_mobilenet_v1_fpn"
+img_shape: [640, 640]
+num_ssd_boxes: -1
+match_threshold: 0.5
+nms_threshold: 0.6
+min_score: 0.1
+max_boxes: 100
+all_reduce_fusion_config: [29, 58, 89]
+use_float16: False
+
+# learning rate settings
+lr_init: 0.01333
+lr_end_rate: 0.0
+warmup_epochs: 2
+weight_decay: 0.00004
+momentum: 0.9
+ssd_vgg_bn: False
+pretrain_vgg_bn: False
+
+# network
+num_default: [6, 6, 6, 6, 6]
+extras_in_channels: [256, 512, 1024, 256, 256]
+extras_out_channels: [256, 256, 256, 256, 256]
+extras_strides: [1, 1, 2, 2, 2, 2]
+extras_ratio: [0.2, 0.2, 0.2, 0.25, 0.5, 0.25]
+feature_size: [80, 40, 20, 10, 5]
+min_scale: 0.2
+max_scale: 0.95
+aspect_ratios: [[2, 3], [2, 3], [2, 3], [2, 3], [2, 3], [2, 3]]
+steps: [8, 16, 32, 64, 128]
+prior_scaling: [0.1, 0.2]
+gamma: 2.0
+alpha: 0.25
+num_addition_layers: 4
+use_anchor_generator: True
+use_global_norm: True
+
+dataset: "coco"
+lr: 0.05
+mode_sink: "sink"
+device_id: 0
+device_num: 1
+epoch_size: 500
+batch_size: 16
+loss_scale: 1024
+pre_trained: ""
+pre_trained_epoch_size: 0
+save_checkpoint_epochs: 10
+only_create_dataset: False
+eval_start_epoch: 40
+eval_interval: 1
+run_eval: False
+filter_weight: False
+freeze_layer: None
+save_best_ckpt: True
+
+result_path: ""
+img_path: ""
+drop: False
+
+# `mindrecord_dir` and `coco_root` are better to use absolute path.
+feature_extractor_base_param: "/ckpt/mobilenet_v1.ckpt"
+checkpoint_filter_list: ['network.multi_box.cls_layers.0.weight', 'network.multi_box.cls_layers.0.bias',
+ 'network.multi_box.loc_layers.0.weight', 'network.multi_box.loc_layers.0.bias']
+mindrecord_dir: "MindRecord_COCO"
+coco_root: "coco_ori"
+train_data_type: "train2017"
+val_data_type: "val2017"
+instances_set: "annotations/instances_{}.json"
+classes: ['background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
+ 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
+ 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
+ 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra',
+ 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
+ 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
+ 'kite', 'baseball bat', 'baseball glove', 'skateboard',
+ 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
+ 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
+ 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
+ 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
+ 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
+ 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
+ 'refrigerator', 'book', 'clock', 'vase', 'scissors',
+ 'teddy bear', 'hair drier', 'toothbrush']
+num_classes: 81
+# The annotation.json position of voc validation dataset.
+voc_json: "annotations/voc_instances_val.json"
+# voc original dataset.
+voc_root: "/data/voc_dataset"
+# if coco or voc used, `image_dir` and `anno_path` are useless.
+image_dir: ""
+anno_path: ""
+file_name: "ssd"
+file_format: 'MINDIR'
+
+---
+# Help description for each configuration
+enable_modelarts: 'Whether training on modelarts, default: False'
+data_url: 'Dataset url for obs'
+train_url: 'Training output url for obs'
+checkpoint_url: 'The location of checkpoint for obs'
+data_path: 'Dataset path for local'
+output_path: 'Training output path for local'
+load_path: 'The location of checkpoint for obs'
+device_target: 'Target device type, available: [Ascend, GPU, CPU]'
+enable_profiling: 'Whether enable profiling while training, default: False'
+num_classes: 'Class for dataset'
+batch_size: "Batch size for training and evaluation"
+epoch_size: "Total training epochs."
+keep_checkpoint_max: "keep the last keep_checkpoint_max checkpoint"
+checkpoint_path: "The location of the checkpoint file."
+checkpoint_file_path: "The location of the checkpoint file."
diff --git a/cv/detection/ssd/MindSpore/config/ssd_resnet50_fpn_config.yaml b/cv/detection/ssd/MindSpore/config/ssd_resnet50_fpn_config.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..8a093419d3f0f8908bbc687d1e1849ede2ce77a3
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/config/ssd_resnet50_fpn_config.yaml
@@ -0,0 +1,130 @@
+# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
+enable_modelarts: False
+# Url for modelarts
+data_url: ""
+train_url: ""
+checkpoint_url: ""
+# Path for local
+run_distribute: False
+enable_profiling: False
+data_path: "/cache/data"
+output_path: "/cache/train"
+load_path: "/cache/checkpoint_path/"
+device_target: "Ascend"
+checkpoint_path: "./checkpoint/"
+checkpoint_file_path: "resnet50.ckpt"
+
+# ==============================================================================
+# Training options
+model_name: "ssd_resnet50_fpn"
+img_shape: [640, 640]
+num_ssd_boxes: -1
+match_threshold: 0.5
+nms_threshold: 0.6
+min_score: 0.1
+max_boxes: 100
+all_reduce_fusion_config: [90, 183, 279]
+
+# learning rate settings
+lr_init: 0.01333
+lr_end_rate: 0.0
+warmup_epochs: 2
+weight_decay: 0.0004
+momentum: 0.9
+ssd_vgg_bn: False
+pretrain_vgg_bn: False
+
+# network
+num_default: [6, 6, 6, 6, 6]
+extras_in_channels: [256, 512, 1024, 256, 256]
+extras_out_channels: [256, 256, 256, 256, 256]
+extras_strides: [1, 1, 2, 2, 2, 2]
+extras_ratio: [0.2, 0.2, 0.2, 0.25, 0.5, 0.25]
+feature_size: [80, 40, 20, 10, 5]
+min_scale: 0.2
+max_scale: 0.95
+aspect_ratios: [[2, 3], [2, 3], [2, 3], [2, 3], [2, 3], [2, 3]]
+steps: [8, 16, 32, 64, 128]
+prior_scaling: [0.1, 0.2]
+gamma: 2.0
+alpha: 0.25
+num_addition_layers: 4
+use_anchor_generator: True
+use_global_norm: True
+use_float16: True
+
+dataset: "coco"
+lr: 0.05
+mode_sink: "sink"
+device_id: 0
+device_num: 1
+epoch_size: 500
+batch_size: 32
+loss_scale: 1024
+pre_trained: ""
+pre_trained_epoch_size: 0
+save_checkpoint_epochs: 10
+only_create_dataset: False
+eval_start_epoch: 40
+eval_interval: 1
+run_eval: False
+filter_weight: False
+freeze_layer: None
+save_best_ckpt: True
+
+result_path: ""
+img_path: ""
+drop: False
+
+# `mindrecord_dir` and `coco_root` are better to use absolute path.
+feature_extractor_base_param: "/ckpt/resnet50.ckpt"
+checkpoint_filter_list: ['network.multi_box.cls_layers.0.weight', 'network.multi_box.cls_layers.0.bias',
+ 'network.multi_box.loc_layers.0.weight', 'network.multi_box.loc_layers.0.bias']
+mindrecord_dir: "MindRecord_COCO"
+coco_root: "coco_ori"
+train_data_type: "train2017"
+val_data_type: "val2017"
+instances_set: "annotations/instances_{}.json"
+classes: ['background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
+ 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
+ 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
+ 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra',
+ 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
+ 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
+ 'kite', 'baseball bat', 'baseball glove', 'skateboard',
+ 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
+ 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
+ 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
+ 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
+ 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
+ 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
+ 'refrigerator', 'book', 'clock', 'vase', 'scissors',
+ 'teddy bear', 'hair drier', 'toothbrush']
+num_classes: 81
+# The annotation.json position of voc validation dataset.
+voc_json: "annotations/voc_instances_val.json"
+# voc original dataset.
+voc_root: "/data/voc_dataset"
+# if coco or voc used, `image_dir` and `anno_path` are useless.
+image_dir: ""
+anno_path: ""
+file_name: "ssd"
+file_format: 'MINDIR'
+
+---
+# Help description for each configuration
+enable_modelarts: "Whether training on modelarts, default: False"
+data_url: "Dataset url for obs"
+train_url: "Training output url for obs"
+checkpoint_url: "The location of checkpoint for obs"
+data_path: "Dataset path for local"
+output_path: "Training output path for local"
+load_path: "The location of checkpoint for obs"
+device_target: "Target device type, available: [Ascend, GPU, CPU]"
+enable_profiling: "Whether enable profiling while training, default: False"
+num_classes: "Class for dataset"
+batch_size: "Batch size for training and evaluation"
+epoch_size: "Total training epochs."
+keep_checkpoint_max: "keep the last keep_checkpoint_max checkpoint"
+checkpoint_path: "The location of the checkpoint file."
+checkpoint_file_path: "The location of the checkpoint file."
diff --git a/cv/detection/ssd/MindSpore/config/ssd_resnet50_fpn_config_gpu.yaml b/cv/detection/ssd/MindSpore/config/ssd_resnet50_fpn_config_gpu.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..84019b0902a886657cfb850ef4b42ae4eb504c29
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/config/ssd_resnet50_fpn_config_gpu.yaml
@@ -0,0 +1,132 @@
+# Copyright (c) 2023, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
+enable_modelarts: False
+# Url for modelarts
+data_url: ""
+train_url: ""
+checkpoint_url: ""
+# Path for local
+run_distribute: False
+enable_profiling: False
+data_path: "/home/datasets/cv/"
+output_path: "/cache/train"
+load_path: "/cache/checkpoint_path/"
+device_target: "GPU"
+checkpoint_path: "./checkpoint/"
+checkpoint_file_path: "resnet50.ckpt"
+
+# ==============================================================================
+# Training options
+model_name: "ssd_resnet50_fpn"
+img_shape: [640, 640]
+num_ssd_boxes: -1
+match_threshold: 0.5
+nms_threshold: 0.6
+min_score: 0.1
+max_boxes: 100
+all_reduce_fusion_config: [90, 183, 279]
+use_float16: False
+
+# learning rate settings
+lr_init: 0.01333
+lr_end_rate: 0.0
+warmup_epochs: 2
+weight_decay: 0.0004
+momentum: 0.9
+ssd_vgg_bn: False
+pretrain_vgg_bn: False
+
+# network
+num_default: [6, 6, 6, 6, 6]
+extras_in_channels: [256, 512, 1024, 256, 256]
+extras_out_channels: [256, 256, 256, 256, 256]
+extras_strides: [1, 1, 2, 2, 2, 2]
+extras_ratio: [0.2, 0.2, 0.2, 0.25, 0.5, 0.25]
+feature_size: [80, 40, 20, 10, 5]
+min_scale: 0.2
+max_scale: 0.95
+aspect_ratios: [[2, 3], [2, 3], [2, 3], [2, 3], [2, 3], [2, 3]]
+steps: [8, 16, 32, 64, 128]
+prior_scaling: [0.1, 0.2]
+gamma: 2.0
+alpha: 0.25
+num_addition_layers: 4
+use_anchor_generator: True
+use_global_norm: True
+
+dataset: "coco"
+lr: 0.05
+mode_sink: "sink"
+device_id: 0
+device_num: 1
+epoch_size: 500
+batch_size: 16
+loss_scale: 1024
+pre_trained: ""
+pre_trained_epoch_size: 0
+save_checkpoint_epochs: 10
+only_create_dataset: False
+eval_start_epoch: 40
+eval_interval: 1
+run_eval: False
+filter_weight: False
+freeze_layer: None
+save_best_ckpt: True
+
+result_path: ""
+img_path: ""
+drop: False
+
+# `mindrecord_dir` and `coco_root` are better to use absolute path.
+feature_extractor_base_param: "./ckpt/resnet50.ckpt"
+checkpoint_filter_list: ['network.multi_box.cls_layers.0.weight', 'network.multi_box.cls_layers.0.bias',
+ 'network.multi_box.loc_layers.0.weight', 'network.multi_box.loc_layers.0.bias']
+mindrecord_dir: "ssd_mindrecord"
+coco_root: "coco2017"
+train_data_type: "train2017"
+val_data_type: "val2017"
+instances_set: "annotations/instances_{}.json"
+classes: ['background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
+ 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
+ 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
+ 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra',
+ 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
+ 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
+ 'kite', 'baseball bat', 'baseball glove', 'skateboard',
+ 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
+ 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
+ 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
+ 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
+ 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
+ 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
+ 'refrigerator', 'book', 'clock', 'vase', 'scissors',
+ 'teddy bear', 'hair drier', 'toothbrush']
+num_classes: 81
+# The annotation.json position of voc validation dataset.
+voc_json: "annotations/voc_instances_val.json"
+# voc original dataset.
+voc_root: "/data/voc_dataset"
+# if coco or voc used, `image_dir` and `anno_path` are useless.
+image_dir: ""
+anno_path: ""
+file_name: "ssd"
+file_format: 'MINDIR'
+
+---
+# Help description for each configuration
+enable_modelarts: "Whether training on modelarts, default: False"
+data_url: "Dataset url for obs"
+train_url: "Training output url for obs"
+checkpoint_url: "The location of checkpoint for obs"
+data_path: "Dataset path for local"
+output_path: "Training output path for local"
+load_path: "The location of checkpoint for obs"
+device_target: "Target device type, available: [Ascend, GPU, CPU]"
+enable_profiling: "Whether enable profiling while training, default: False"
+num_classes: "Class for dataset"
+batch_size: "Batch size for training and evaluation"
+epoch_size: "Total training epochs."
+keep_checkpoint_max: "keep the last keep_checkpoint_max checkpoint"
+checkpoint_path: "The location of the checkpoint file."
+checkpoint_file_path: "The location of the checkpoint file."
diff --git a/cv/detection/ssd/MindSpore/config/ssd_vgg16_config.yaml b/cv/detection/ssd/MindSpore/config/ssd_vgg16_config.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..7c214b7aa05f15ccbd3402fe19a61b621712e23f
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/config/ssd_vgg16_config.yaml
@@ -0,0 +1,125 @@
+# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
+enable_modelarts: False
+# Url for modelarts
+data_url: ""
+train_url: ""
+checkpoint_url: ""
+# Path for local
+run_distribute: False
+enable_profiling: False
+data_path: "/cache/data"
+output_path: "/cache/train"
+load_path: "/cache/checkpoint_path/"
+device_target: "Ascend"
+checkpoint_path: "./checkpoint/"
+checkpoint_file_path: "ssd-500_458.ckpt"
+
+# ==============================================================================
+# Training options
+model_name: "ssd_vgg16"
+img_shape: [300, 300]
+num_ssd_boxes: 7308
+match_threshold: 0.5
+nms_threshold: 0.6
+min_score: 0.1
+max_boxes: 100
+all_reduce_fusion_config: [20, 41, 62]
+
+# learing rate settings
+lr_init: 0.001
+lr_end_rate: 0.001
+warmup_epochs: 2
+momentum: 0.9
+weight_decay: 0.00015
+ssd_vgg_bn: False
+pretrain_vgg_bn: False
+
+# network
+num_default: [3, 6, 6, 6, 6, 6]
+extras_in_channels: [256, 512, 1024, 512, 256, 256]
+extras_out_channels: [512, 1024, 512, 256, 256, 256]
+extras_strides: [1, 1, 2, 2, 2, 2]
+extras_ratio: [0.2, 0.2, 0.2, 0.25, 0.5, 0.25]
+feature_size: [38, 19, 10, 5, 3, 1]
+min_scale: 0.2
+max_scale: 0.95
+aspect_ratios: [[], [2, 3], [2, 3], [2, 3], [2, 3], [2, 3]]
+steps: [8, 16, 32, 64, 100, 300]
+prior_scaling: [0.1, 0.2]
+gamma: 2.0
+alpha: 0.75
+
+dataset: "coco"
+lr: 0.05
+mode_sink: "sink"
+device_id: 0
+device_num: 1
+epoch_size: 500
+batch_size: 32
+loss_scale: 1024
+pre_trained: ""
+pre_trained_epoch_size: 0
+save_checkpoint_epochs: 10
+only_create_dataset: False
+eval_start_epoch: 40
+eval_interval: 1
+run_eval: False
+filter_weight: False
+freeze_layer: None
+save_best_ckpt: True
+
+result_path: ""
+img_path: ""
+drop: False
+
+# `mindrecord_dir` and `coco_root` are better to use absolute path.
+feature_extractor_base_param: ""
+checkpoint_filter_list: ['multi_loc_layers', 'multi_cls_layers']
+mindrecord_dir: "MindRecord_COCO"
+coco_root: "coco_ori"
+train_data_type: "train2017"
+val_data_type: "val2017"
+instances_set: "annotations/instances_{}.json"
+classes: ['background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
+ 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
+ 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
+ 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra',
+ 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
+ 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
+ 'kite', 'baseball bat', 'baseball glove', 'skateboard',
+ 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
+ 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
+ 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
+ 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
+ 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
+ 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
+ 'refrigerator', 'book', 'clock', 'vase', 'scissors',
+ 'teddy bear', 'hair drier', 'toothbrush']
+num_classes: 81
+# The annotation.json position of voc validation dataset.
+voc_json: "annotations/voc_instances_val.json"
+# voc original dataset.
+voc_root: "/data/voc_dataset"
+# if coco or voc used, `image_dir` and `anno_path` are useless.
+image_dir: ""
+anno_path: ""
+file_name: "ssd"
+file_format: 'MINDIR'
+
+---
+# Help description for each configuration
+enable_modelarts: "Whether training on modelarts, default: False"
+data_url: "Dataset url for obs"
+train_url: "Training output url for obs"
+checkpoint_url: "The location of checkpoint for obs"
+data_path: "Dataset path for local"
+output_path: "Training output path for local"
+load_path: "The location of checkpoint for obs"
+device_target: "Target device type, available: [Ascend, GPU, CPU]"
+enable_profiling: "Whether enable profiling while training, default: False"
+num_classes: "Class for dataset"
+batch_size: "Batch size for training and evaluation"
+epoch_size: "Total training epochs."
+keep_checkpoint_max: "keep the last keep_checkpoint_max checkpoint"
+checkpoint_path: "The location of the checkpoint file."
+checkpoint_file_path: "The location of the checkpoint file."
diff --git a/cv/detection/ssd/MindSpore/config/ssd_vgg16_config_gpu.yaml b/cv/detection/ssd/MindSpore/config/ssd_vgg16_config_gpu.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..c19ae902b044dbf4fa81bfb0eb2d71769a6f77b5
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/config/ssd_vgg16_config_gpu.yaml
@@ -0,0 +1,126 @@
+# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
+enable_modelarts: False
+# Url for modelarts
+data_url: ""
+train_url: ""
+checkpoint_url: ""
+# Path for local
+run_distribute: False
+enable_profiling: False
+data_path: "/cache/data"
+output_path: "/cache/train"
+load_path: "/cache/checkpoint_path/"
+device_target: "GPU"
+checkpoint_path: "./checkpoint/"
+checkpoint_file_path: "ssd-500_458.ckpt"
+
+# ==============================================================================
+# Training options
+model_name: "ssd_vgg16"
+img_shape: [300, 300]
+num_ssd_boxes: 7308
+match_threshold: 0.5
+nms_threshold: 0.6
+min_score: 0.1
+max_boxes: 100
+all_reduce_fusion_config: []
+use_float16: False
+
+# learing rate settings
+lr_init: 0.001
+lr_end_rate: 0.001
+warmup_epochs: 2
+momentum: 0.9
+weight_decay: 0.00015
+ssd_vgg_bn: False
+pretrain_vgg_bn: False
+
+# network
+num_default: [3, 6, 6, 6, 6, 6]
+extras_in_channels: [256, 512, 1024, 512, 256, 256]
+extras_out_channels: [512, 1024, 512, 256, 256, 256]
+extras_strides: [1, 1, 2, 2, 2, 2]
+extras_ratio: [0.2, 0.2, 0.2, 0.25, 0.5, 0.25]
+feature_size: [38, 19, 10, 5, 3, 1]
+min_scale: 0.1
+max_scale: 0.95
+aspect_ratios: [[], [2, 3], [2, 3], [2, 3], [2, 3], [2, 3]]
+steps: [8, 16, 32, 64, 100, 300]
+prior_scaling: [0.1, 0.2]
+gamma: 2.0
+alpha: 0.75
+
+dataset: "coco"
+lr: 0.05
+mode_sink: "sink"
+device_id: 0
+device_num: 1
+epoch_size: 500
+batch_size: 32
+loss_scale: 1024
+pre_trained: ""
+pre_trained_epoch_size: 0
+save_checkpoint_epochs: 10
+only_create_dataset: False
+eval_start_epoch: 40
+eval_interval: 1
+run_eval: False
+filter_weight: False
+freeze_layer: None
+save_best_ckpt: True
+
+result_path: ""
+img_path: ""
+drop: False
+
+# `mindrecord_dir` and `coco_root` are better to use absolute path.
+feature_extractor_base_param: ""
+checkpoint_filter_list: ['multi_loc_layers', 'multi_cls_layers']
+mindrecord_dir: "MindRecord_COCO"
+coco_root: "coco_ori"
+train_data_type: "train2017"
+val_data_type: "val2017"
+instances_set: "annotations/instances_{}.json"
+classes: ['background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
+ 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
+ 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
+ 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra',
+ 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
+ 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
+ 'kite', 'baseball bat', 'baseball glove', 'skateboard',
+ 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
+ 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
+ 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
+ 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
+ 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
+ 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
+ 'refrigerator', 'book', 'clock', 'vase', 'scissors',
+ 'teddy bear', 'hair drier', 'toothbrush']
+num_classes: 81
+# The annotation.json position of voc validation dataset.
+voc_json: "annotations/voc_instances_val.json"
+# voc original dataset.
+voc_root: "/data/voc_dataset"
+# if coco or voc used, `image_dir` and `anno_path` are useless.
+image_dir: ""
+anno_path: ""
+file_name: "ssd"
+file_format: 'MINDIR'
+
+---
+# Help description for each configuration
+enable_modelarts: "Whether training on modelarts, default: False"
+data_url: "Dataset url for obs"
+train_url: "Training output url for obs"
+checkpoint_url: "The location of checkpoint for obs"
+data_path: "Dataset path for local"
+output_path: "Training output path for local"
+load_path: "The location of checkpoint for obs"
+device_target: "Target device type, available: [Ascend, GPU, CPU]"
+enable_profiling: "Whether enable profiling while training, default: False"
+num_classes: "Class for dataset"
+batch_size: "Batch size for training and evaluation"
+epoch_size: "Total training epochs."
+keep_checkpoint_max: "keep the last keep_checkpoint_max checkpoint"
+checkpoint_path: "The location of the checkpoint file."
+checkpoint_file_path: "The location of the checkpoint file."
diff --git a/cv/detection/ssd/MindSpore/eval.py b/cv/detection/ssd/MindSpore/eval.py
new file mode 100755
index 0000000000000000000000000000000000000000..0fe0e6ce47e67ea23337ad40af4ca4a16fad90a3
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/eval.py
@@ -0,0 +1,88 @@
+# Copyright 2020-2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Evaluation for SSD"""
+
+import os
+import mindspore as ms
+from mindspore import Tensor
+from src.ssd import SSD300, SsdInferWithDecoder, ssd_mobilenet_v2, ssd_mobilenet_v1_fpn, ssd_mobilenet_v1, ssd_resnet50_fpn, ssd_vgg16
+from src.dataset import create_ssd_dataset, create_mindrecord
+from src.eval_utils import apply_eval
+from src.box_utils import default_boxes
+from src.model_utils.config import config
+from src.model_utils.moxing_adapter import moxing_wrapper
+
+def ssd_eval(dataset_path, ckpt_path, anno_json):
+ """SSD evaluation."""
+ batch_size = 1
+ ds = create_ssd_dataset(dataset_path, batch_size=batch_size,
+ is_training=False, use_multiprocessing=False)
+ if config.model_name == "ssd300":
+ net = SSD300(ssd_mobilenet_v2(), config, is_training=False)
+ elif config.model_name == "ssd_vgg16":
+ net = ssd_vgg16(config=config)
+ elif config.model_name == "ssd_mobilenet_v1_fpn":
+ net = ssd_mobilenet_v1_fpn(config=config)
+ elif config.model_name == "ssd_resnet50_fpn":
+ net = ssd_resnet50_fpn(config=config)
+ elif config.model_name == "ssd_mobilenet_v1":
+ net = ssd_mobilenet_v1(config=config)
+ else:
+ raise ValueError(f'config.model: {config.model_name} is not supported')
+ net = SsdInferWithDecoder(net, Tensor(default_boxes), config)
+
+ print("Load Checkpoint!")
+ param_dict = ms.load_checkpoint(ckpt_path)
+ net.init_parameters_data()
+ ms.load_param_into_net(net, param_dict)
+
+ net.set_train(False)
+ total = ds.get_dataset_size() * batch_size
+ print("\n========================================\n")
+ print("total images num: ", total)
+ print("Processing, please wait a moment.")
+ eval_param_dict = {"net": net, "dataset": ds, "anno_json": anno_json}
+ mAP = apply_eval(eval_param_dict)
+ print("\n========================================\n")
+ print(f"mAP: {mAP}")
+
+@moxing_wrapper()
+def eval_net():
+ if hasattr(config, 'num_ssd_boxes') and config.num_ssd_boxes == -1:
+ num = 0
+ h, w = config.img_shape
+ for i in range(len(config.steps)):
+ num += (h // config.steps[i]) * (w // config.steps[i]) * config.num_default[i]
+ config.num_ssd_boxes = num
+
+ if config.dataset == "coco":
+ coco_root = os.path.join(config.data_path, config.coco_root)
+ json_path = os.path.join(coco_root, config.instances_set.format(config.val_data_type))
+ elif config.dataset == "voc":
+ voc_root = os.path.join(config.data_path, config.voc_root)
+ json_path = os.path.join(voc_root, config.voc_json)
+ else:
+ raise ValueError('SSD eval only support dataset mode is coco and voc!')
+
+ ms.set_context(mode=ms.GRAPH_MODE, device_target=config.device_target, device_id=config.device_id)
+
+ mindrecord_file = create_mindrecord(config.dataset, "ssd_eval.mindrecord", False)
+
+ print("Start Eval!")
+ ssd_eval(mindrecord_file, config.checkpoint_file_path, json_path)
+
+if __name__ == '__main__':
+ eval_net()
diff --git a/cv/detection/ssd/MindSpore/eval_onnx.py b/cv/detection/ssd/MindSpore/eval_onnx.py
new file mode 100755
index 0000000000000000000000000000000000000000..4ab00f3d71405d7e9a18d56b34f9a8f40bc92c21
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/eval_onnx.py
@@ -0,0 +1,107 @@
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""Run evaluation for a model exported to ONNX"""
+
+import os
+import numpy as np
+import onnxruntime as ort
+from mindspore import context
+
+from src.dataset import create_ssd_dataset, create_mindrecord
+from src.eval_utils import COCOMetrics
+from src.model_utils.config import config
+
+
+def create_session(checkpoint_path, target_device):
+ """Create onnxruntime session"""
+ if target_device == 'GPU':
+ providers = ['CUDAExecutionProvider']
+ elif target_device == 'CPU':
+ providers = ['CPUExecutionProvider']
+ else:
+ raise ValueError(
+ f'Unsupported target device {target_device}, '
+ f'Expected one of: "CPU", "GPU"'
+ )
+ session = ort.InferenceSession(checkpoint_path, providers=providers)
+ input_name = session.get_inputs()[0].name
+ return session, input_name
+
+
+def ssd_eval(dataset_path, ckpt_path, anno_json):
+ """SSD evaluation."""
+ # Silence false positive
+ # pylint: disable=unexpected-keyword-arg
+ ds = create_ssd_dataset(dataset_path, batch_size=config.batch_size,
+ is_training=False, use_multiprocessing=False)
+
+ session, input_name = create_session(ckpt_path, config.device_target)
+ total = ds.get_dataset_size() * config.batch_size
+ print("\n========================================\n")
+ print("total images num: ", total)
+ print("Processing, please wait a moment.")
+
+ metrics = COCOMetrics(anno_json=anno_json,
+ classes=config.classes,
+ num_classes=config.num_classes,
+ max_boxes=config.max_boxes,
+ nms_threshold=config.nms_threshold,
+ min_score=config.min_score)
+
+ for batch in ds.create_dict_iterator(output_numpy=True, num_epochs=1):
+ img_id = batch['img_id']
+ img_np = batch['image']
+ image_shape = batch['image_shape']
+
+ output = session.run(None, {input_name: batch['image']})
+
+ for batch_idx in range(img_np.shape[0]):
+ pred = {"boxes": output[0][batch_idx],
+ "box_scores": output[1][batch_idx],
+ "img_id": int(np.squeeze(img_id[batch_idx])),
+ "image_shape": image_shape[batch_idx]
+ }
+ metrics.update(pred)
+ print(f"mAP: {metrics.get_metrics()}")
+
+
+def eval_net():
+ """Eval ssd model"""
+ if hasattr(config, 'num_ssd_boxes') and config.num_ssd_boxes == -1:
+ num = 0
+ h, w = config.img_shape
+ for i in range(len(config.steps)):
+ num += (h // config.steps[i]) * (w // config.steps[i]) * config.num_default[i]
+ config.num_ssd_boxes = num
+
+ if config.dataset == "coco":
+ coco_root = os.path.join(config.data_path, config.coco_root)
+ json_path = os.path.join(coco_root, config.instances_set.format(config.val_data_type))
+ elif config.dataset == "voc":
+ voc_root = os.path.join(config.data_path, config.voc_root)
+ json_path = os.path.join(voc_root, config.voc_json)
+ else:
+ raise ValueError('SSD eval only support dataset mode is coco and voc!')
+
+ context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target, device_id=config.device_id)
+
+ mindrecord_file = create_mindrecord(config.dataset, "ssd_eval.mindrecord", False)
+
+ print("Start Eval!")
+ ssd_eval(mindrecord_file, config.file_name, json_path)
+
+
+if __name__ == '__main__':
+ eval_net()
diff --git a/cv/detection/ssd/MindSpore/export.py b/cv/detection/ssd/MindSpore/export.py
new file mode 100755
index 0000000000000000000000000000000000000000..9917d18d94ac51508e710afe39ce35276f50ee94
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/export.py
@@ -0,0 +1,69 @@
+# Copyright 2020-2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import os
+import numpy as np
+
+import mindspore as ms
+from mindspore import Tensor
+from src.ssd import SSD300, SsdInferWithDecoder, ssd_mobilenet_v2, ssd_mobilenet_v1_fpn, ssd_mobilenet_v1, ssd_resnet50_fpn, ssd_vgg16
+from src.model_utils.config import config
+from src.model_utils.moxing_adapter import moxing_wrapper
+from src.box_utils import default_boxes
+
+ms.set_context(mode=ms.GRAPH_MODE, device_target=config.device_target)
+if config.device_target == "Ascend":
+ ms.set_context(device_id=config.device_id)
+
+def modelarts_pre_process():
+ '''modelarts pre process function.'''
+ config.file_name = os.path.join(config.output_path, config.file_name)
+
+@moxing_wrapper(pre_process=modelarts_pre_process)
+def run_export():
+ """run export."""
+ if hasattr(config, 'num_ssd_boxes') and config.num_ssd_boxes == -1:
+ num = 0
+ h, w = config.img_shape
+ for i in range(len(config.steps)):
+ num += (h // config.steps[i]) * (w // config.steps[i]) * config.num_default[i]
+ config.num_ssd_boxes = num
+
+ if config.model_name == "ssd300":
+ net = SSD300(ssd_mobilenet_v2(), config, is_training=False)
+ elif config.model_name == "ssd_vgg16":
+ net = ssd_vgg16(config=config)
+ elif config.model_name == "ssd_mobilenet_v1_fpn":
+ net = ssd_mobilenet_v1_fpn(config=config)
+ elif config.model_name == "ssd_resnet50_fpn":
+ net = ssd_resnet50_fpn(config=config)
+ elif config.model_name == "ssd_mobilenet_v1":
+ net = ssd_mobilenet_v1(config=config)
+ else:
+ raise ValueError(f'config.model: {config.model_name} is not supported')
+
+ net = SsdInferWithDecoder(net, Tensor(default_boxes), config)
+
+ param_dict = ms.load_checkpoint(config.checkpoint_file_path)
+ net.init_parameters_data()
+ ms.load_param_into_net(net, param_dict)
+ net.set_train(False)
+
+ input_shp = [config.batch_size, 3] + config.img_shape
+ input_array = Tensor(np.random.uniform(-1.0, 1.0, size=input_shp), ms.float32)
+ ms.export(net, input_array, file_name=config.file_name, file_format=config.file_format)
+
+if __name__ == '__main__':
+ run_export()
diff --git a/cv/detection/ssd/MindSpore/mindspore_hub_conf.py b/cv/detection/ssd/MindSpore/mindspore_hub_conf.py
new file mode 100755
index 0000000000000000000000000000000000000000..0f2020b74db0ea833a3589ba25e70999680924c8
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/mindspore_hub_conf.py
@@ -0,0 +1,24 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""hub config."""
+from src.ssd import SSD300, ssd_mobilenet_v2
+from src.config import config
+
+def create_network(name, *args, **kwargs):
+ if name == "ssd300":
+ backbone = ssd_mobilenet_v2()
+ ssd = SSD300(backbone=backbone, config=config, *args, **kwargs)
+ return ssd
+ raise NotImplementedError(f"{name} is not implemented in the repo")
diff --git a/cv/detection/ssd/MindSpore/postprocess.py b/cv/detection/ssd/MindSpore/postprocess.py
new file mode 100755
index 0000000000000000000000000000000000000000..6c99a439f5960e1911ae87191e85fdd701bcb8cc
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/postprocess.py
@@ -0,0 +1,89 @@
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""post process for 310 inference"""
+import os
+import numpy as np
+from PIL import Image
+
+from src.model_utils.config import config
+from src.eval_utils import COCOMetrics
+
+batch_size = 1
+
+def get_imgSize(file_name):
+ img = Image.open(file_name)
+ return img.size
+
+def get_result(result_path, img_id_file_path):
+ anno_json = os.path.join(config.coco_root, config.instances_set.format(config.val_data_type))
+
+ if config.drop:
+ from pycocotools.coco import COCO
+ train_cls = config.classes
+ train_cls_dict = {}
+ for i, cls in enumerate(train_cls):
+ train_cls_dict[cls] = i
+ coco = COCO(anno_json)
+ classs_dict = {}
+ cat_ids = coco.loadCats(coco.getCatIds())
+ for cat in cat_ids:
+ classs_dict[cat["id"]] = cat["name"]
+
+ files = os.listdir(img_id_file_path)
+ eval_metrics = COCOMetrics(anno_json=anno_json,
+ classes=config.classes,
+ num_classes=config.num_classes,
+ max_boxes=config.max_boxes,
+ nms_threshold=config.nms_threshold,
+ min_score=config.min_score)
+
+ for file in files:
+ img_ids_name = file.split('.')[0]
+ img_id = int(np.squeeze(img_ids_name))
+ if config.drop:
+ anno_ids = coco.getAnnIds(imgIds=img_id, iscrowd=None)
+ anno = coco.loadAnns(anno_ids)
+ annos = []
+ iscrowd = False
+ for label in anno:
+ bbox = label["bbox"]
+ class_name = classs_dict[label["category_id"]]
+ iscrowd = iscrowd or label["iscrowd"]
+ if class_name in train_cls:
+ x_min, x_max = bbox[0], bbox[0] + bbox[2]
+ y_min, y_max = bbox[1], bbox[1] + bbox[3]
+ annos.append(list(map(round, [y_min, x_min, y_max, x_max])) + [train_cls_dict[class_name]])
+ if iscrowd or (not annos):
+ continue
+
+ img_size = get_imgSize(os.path.join(img_id_file_path, file))
+ image_shape = np.array([img_size[1], img_size[0]])
+ result_path_0 = os.path.join(result_path, img_ids_name + "_0.bin")
+ result_path_1 = os.path.join(result_path, img_ids_name + "_1.bin")
+ boxes = np.fromfile(result_path_0, dtype=np.float32).reshape(config.num_ssd_boxes, 4)
+ box_scores = np.fromfile(result_path_1, dtype=np.float32).reshape(config.num_ssd_boxes, config.num_classes)
+
+ eval_metrics.update({
+ "boxes": boxes,
+ "box_scores": box_scores,
+ "img_id": img_id,
+ "image_shape": image_shape
+ })
+
+ mAP = eval_metrics.get_metrics()
+ print(f" mAP:{mAP}")
+
+if __name__ == '__main__':
+ get_result(config.result_path, config.img_path)
diff --git a/cv/detection/ssd/MindSpore/requirements.txt b/cv/detection/ssd/MindSpore/requirements.txt
new file mode 100755
index 0000000000000000000000000000000000000000..37482455857ea89188387c1b5b453668dcb3f98d
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/requirements.txt
@@ -0,0 +1,8 @@
+pycocotools >= 2.0.1
+opencv-python
+xml-python
+Pillow
+numpy
+pyyaml
+onnxruntime-gpu
+
diff --git a/cv/detection/ssd/MindSpore/scripts/run_distribute_train.sh b/cv/detection/ssd/MindSpore/scripts/run_distribute_train.sh
new file mode 100755
index 0000000000000000000000000000000000000000..4d37196688ce069f7c06831268e3bbd76b82f6bb
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/scripts/run_distribute_train.sh
@@ -0,0 +1,96 @@
+#!/bin/bash
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+echo "=============================================================================================================="
+echo "Please run the script as: "
+echo "bash run_distribute_train.sh DEVICE_NUM EPOCH_SIZE LR DATASET RANK_TABLE_FILE CONFIG_PATH PRE_TRAINED PRE_TRAINED_EPOCH_SIZE"
+echo "for example: sh run_distribute_train.sh 8 500 0.2 coco /data/hccl.json /config_path /opt/ssd-300.ckpt(optional) 200(optional)"
+echo "It is better to use absolute path."
+echo "================================================================================================================="
+
+if [ $# != 6 ] && [ $# != 8 ]
+then
+ echo "Usage: bash run_distribute_train.sh [DEVICE_NUM] [EPOCH_SIZE] [LR] [DATASET] \
+[RANK_TABLE_FILE] [CONFIG_PATH] [PRE_TRAINED](optional) [PRE_TRAINED_EPOCH_SIZE](optional)"
+ exit 1
+fi
+
+get_real_path(){
+ if [ "${1:0:1}" == "/" ]; then
+ echo "$1"
+ else
+ echo "$(realpath -m $PWD/$1)"
+ fi
+}
+
+CONFIG_PATH=$(get_real_path $6)
+# Before start distribute train, first create mindrecord files.
+BASE_PATH=$(cd "`dirname $0`" || exit; pwd)
+cd $BASE_PATH/../ || exit
+python train.py --only_create_dataset=True --dataset=$4 --config_path=$CONFIG_PATH
+
+echo "After running the script, the network runs in the background. The log will be generated in LOGx/log.txt"
+
+export RANK_SIZE=$1
+EPOCH_SIZE=$2
+LR=$3
+DATASET=$4
+PRE_TRAINED=$7
+PRE_TRAINED_EPOCH_SIZE=$8
+export RANK_TABLE_FILE=$5
+
+for((i=0;i env.log
+ if [ $# == 6 ]
+ then
+ python train.py \
+ --run_distribute=True \
+ --lr=$LR \
+ --dataset=$DATASET \
+ --device_num=$RANK_SIZE \
+ --device_id=$DEVICE_ID \
+ --epoch_size=$EPOCH_SIZE \
+ --config_path=$CONFIG_PATH \
+ --output_path './output' > log.txt 2>&1 &
+ fi
+
+ if [ $# == 8 ]
+ then
+ python train.py \
+ --run_distribute=True \
+ --lr=$LR \
+ --dataset=$DATASET \
+ --device_num=$RANK_SIZE \
+ --device_id=$DEVICE_ID \
+ --pre_trained=$PRE_TRAINED \
+ --pre_trained_epoch_size=$PRE_TRAINED_EPOCH_SIZE \
+ --epoch_size=$EPOCH_SIZE \
+ --config_path=$CONFIG_PATH \
+ --output_path './output' > log.txt 2>&1 &
+ fi
+
+ cd ../
+done
diff --git a/cv/detection/ssd/MindSpore/scripts/run_distribute_train_gpu.sh b/cv/detection/ssd/MindSpore/scripts/run_distribute_train_gpu.sh
new file mode 100755
index 0000000000000000000000000000000000000000..b62f4e968dfa5f9cd20e524b91316e1f902dd5ab
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/scripts/run_distribute_train_gpu.sh
@@ -0,0 +1,91 @@
+#!/bin/bash
+# Copyright 2020-2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+echo "=============================================================================================================="
+echo "Please run the script as: "
+echo "bash run_distribute_train_gpu.sh DEVICE_NUM EPOCH_SIZE LR DATASET CONFIG_PATH PRE_TRAINED PRE_TRAINED_EPOCH_SIZE"
+echo "for example: bash run_distribute_train_gpu.sh 8 500 0.2 coco /config_path /opt/ssd-300.ckpt(optional) 200(optional)"
+echo "It is better to use absolute path."
+echo "================================================================================================================="
+
+if [ $# != 5 ] && [ $# != 7 ]
+then
+ echo "Usage: bash run_distribute_train_gpu.sh [DEVICE_NUM] [EPOCH_SIZE] [LR] [DATASET] \
+[CONFIG_PATH] [PRE_TRAINED](optional) [PRE_TRAINED_EPOCH_SIZE](optional)"
+ exit 1
+fi
+
+get_real_path(){
+ if [ "${1:0:1}" == "/" ]; then
+ echo "$1"
+ else
+ echo "$(realpath -m $PWD/$1)"
+ fi
+}
+
+CONFIG_PATH=$(get_real_path $5)
+# Before start distribute train, first create mindrecord files.
+BASE_PATH=$(cd "`dirname $0`" || exit; pwd)
+cd $BASE_PATH/../ || exit
+python3 train.py --only_create_dataset=True --device_target="GPU" --dataset=$4 --config_path=$CONFIG_PATH
+
+echo "After running the script, the network runs in the background. The log will be generated in LOG/log.txt"
+
+export RANK_SIZE=$1
+EPOCH_SIZE=$2
+LR=$3
+DATASET=$4
+PRE_TRAINED=$6
+PRE_TRAINED_EPOCH_SIZE=$7
+
+rm -rf LOG
+mkdir ./LOG
+cp ./*.py ./LOG
+cp ./config/*.yaml ./LOG
+cp -r ./src ./LOG
+cd ./LOG || exit
+
+if [ $# == 5 ]
+then
+ mpirun -allow-run-as-root -n $RANK_SIZE --output-filename log_output --merge-stderr-to-stdout \
+ python3 train.py \
+ --run_distribute=True \
+ --lr=$LR \
+ --dataset=$DATASET \
+ --device_num=$RANK_SIZE \
+ --loss_scale=1 \
+ --device_target="GPU" \
+ --epoch_size=$EPOCH_SIZE \
+ --config_path=$CONFIG_PATH \
+ --output_path './output' > log.txt 2>&1 &
+fi
+
+if [ $# == 7 ]
+then
+ mpirun -allow-run-as-root -n $RANK_SIZE --output-filename log_output --merge-stderr-to-stdout \
+ python3 train.py \
+ --run_distribute=True \
+ --lr=$LR \
+ --dataset=$DATASET \
+ --device_num=$RANK_SIZE \
+ --pre_trained=$PRE_TRAINED \
+ --pre_trained_epoch_size=$PRE_TRAINED_EPOCH_SIZE \
+ --loss_scale=1 \
+ --device_target="GPU" \
+ --epoch_size=$EPOCH_SIZE \
+ --config_path=$CONFIG_PATH \
+ --output_path './output' > log.txt 2>&1 &
+fi
diff --git a/cv/detection/ssd/MindSpore/scripts/run_eval.sh b/cv/detection/ssd/MindSpore/scripts/run_eval.sh
new file mode 100755
index 0000000000000000000000000000000000000000..27887434bfad450a578c3da6435efa2814e69275
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/scripts/run_eval.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+if [ $# != 4 ]
+then
+ echo "Usage: bash run_eval.sh [DATASET] [CHECKPOINT_PATH] [DEVICE_ID] [CONFIG_PATH]"
+exit 1
+fi
+
+get_real_path(){
+ if [ "${1:0:1}" == "/" ]; then
+ echo "$1"
+ else
+ echo "$(realpath -m $PWD/$1)"
+ fi
+}
+
+DATASET=$1
+CHECKPOINT_PATH=$(get_real_path $2)
+CONFIG_PATH=$(get_real_path $4)
+echo $DATASET
+echo $CONFIG_PATH
+echo $CHECKPOINT_PATH
+
+if [ ! -f $CHECKPOINT_PATH ]
+then
+ echo "error: CHECKPOINT_PATH=$PATH2 is not a file"
+exit 1
+fi
+
+export DEVICE_NUM=1
+export DEVICE_ID=$3
+export RANK_SIZE=$DEVICE_NUM
+export RANK_ID=0
+
+BASE_PATH=$(cd "`dirname $0`" || exit; pwd)
+cd $BASE_PATH/../ || exit
+
+if [ -d "eval$3" ];
+then
+ rm -rf ./eval$3
+fi
+
+mkdir ./eval$3
+cp ./*.py ./eval$3
+cp ./config/*.yaml ./eval$3
+cp -r ./src ./eval$3
+cd ./eval$3 || exit
+env > env.log
+echo "start inferring for device $DEVICE_ID"
+python eval.py \
+ --dataset=$DATASET \
+ --checkpoint_file_path=$CHECKPOINT_PATH \
+ --device_id=$3 \
+ --config_path=$CONFIG_PATH > log.txt 2>&1 &
+cd ..
diff --git a/cv/detection/ssd/MindSpore/scripts/run_eval_gpu.sh b/cv/detection/ssd/MindSpore/scripts/run_eval_gpu.sh
new file mode 100755
index 0000000000000000000000000000000000000000..777747209a8eccb414b1df15a4f7e788a28e6de1
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/scripts/run_eval_gpu.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+if [ $# != 4 ]
+then
+ echo "Usage: bash run_eval_gpu.sh [DATASET] [CHECKPOINT_PATH] [DEVICE_ID] [CONFIG_PATH]"
+exit 1
+fi
+
+get_real_path(){
+ if [ "${1:0:1}" == "/" ]; then
+ echo "$1"
+ else
+ echo "$(realpath -m $PWD/$1)"
+ fi
+}
+
+DATASET=$1
+CHECKPOINT_PATH=$(get_real_path $2)
+CONFIG_PATH=$(get_real_path $4)
+echo $DATASET
+echo $CHECKPOINT_PATH
+echo $CONFIG_PATH
+
+if [ ! -f $CHECKPOINT_PATH ]
+then
+ echo "error: CHECKPOINT_PATH=$PATH2 is not a file"
+exit 1
+fi
+
+export DEVICE_NUM=1
+export DEVICE_ID=$3
+export RANK_SIZE=$DEVICE_NUM
+export RANK_ID=0
+
+BASE_PATH=$(cd "`dirname $0`" || exit; pwd)
+cd $BASE_PATH/../ || exit
+
+if [ -d "eval$3" ];
+then
+ rm -rf ./eval$3
+fi
+
+mkdir ./eval$3
+cp ./*.py ./eval$3
+cp ./config/*.yaml ./eval$3
+cp -r ./src ./eval$3
+cd ./eval$3 || exit
+env > env.log
+echo "start inferring for device $DEVICE_ID"
+python eval.py \
+ --dataset=$DATASET \
+ --checkpoint_file_path=$CHECKPOINT_PATH \
+ --device_target="GPU" \
+ --device_id=$3 \
+ --config_path=$CONFIG_PATH > log.txt 2>&1 &
+cd ..
diff --git a/cv/detection/ssd/MindSpore/scripts/run_eval_onnx.sh b/cv/detection/ssd/MindSpore/scripts/run_eval_onnx.sh
new file mode 100755
index 0000000000000000000000000000000000000000..4fca9487fa39ad46b9a3c8030005187bc22775a2
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/scripts/run_eval_onnx.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+if [ $# -lt 3 ]
+then
+ usage="Usage: bash scripts/run_eval_onnx.sh \
+ \
+[] [] []"
+ echo "$usage"
+exit 1
+fi
+
+get_real_path(){
+ if [ "${1:0:1}" == "/" ]; then
+ echo "$1"
+ else
+ echo "$(realpath -m $PWD/$1)"
+ fi
+}
+
+DATA_PATH=$1
+COCO_ROOT=$2
+ONNX_MODEL_PATH=$3
+INSTANCES_SET=${4:-'annotations/instances_{}.json'}
+DEVICE_TARGET=${5:-"GPU"}
+CONFIG_PATH=${6:-"config/ssd300_config_gpu.yaml"}
+
+python eval_onnx.py \
+ --dataset coco \
+ --data_path $DATA_PATH \
+ --coco_root $COCO_ROOT \
+ --instances_set $INSTANCES_SET \
+ --file_name $ONNX_MODEL_PATH \
+ --device_target $DEVICE_TARGET \
+ --config_path $CONFIG_PATH \
+ --batch_size 1 \
+ &> eval.log &
diff --git a/cv/detection/ssd/MindSpore/scripts/run_infer_310.sh b/cv/detection/ssd/MindSpore/scripts/run_infer_310.sh
new file mode 100755
index 0000000000000000000000000000000000000000..17a1209bfa13ddfcfc04dd7f39f8d164796148b6
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/scripts/run_infer_310.sh
@@ -0,0 +1,103 @@
+#!/bin/bash
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+if [[ $# -lt 4 || $# -gt 5 ]]; then
+ echo "Usage: bash run_infer_310.sh [MINDIR_PATH] [DATA_PATH] [DVPP] [CONFIG_PATH] [DEVICE_ID]
+ DVPP is mandatory, and must choose from [DVPP|CPU], it's case-insensitive
+ DEVICE_ID is optional, it can be set by environment variable device_id, otherwise the value is zero"
+exit 1
+fi
+
+get_real_path(){
+ if [ "${1:0:1}" == "/" ]; then
+ echo "$1"
+ else
+ echo "$(realpath -m $PWD/$1)"
+ fi
+}
+model=$(get_real_path $1)
+data_path=$(get_real_path $2)
+cfg_path=$4
+
+device_id=0
+if [ $# == 5 ]; then
+ device_id=$5
+fi
+
+echo "mindir name: "$model
+echo "dataset path: "$data_path
+echo "config path: " $cfg_path
+echo "device id: "$device_id
+
+export ASCEND_HOME=/usr/local/Ascend/
+if [ -d ${ASCEND_HOME}/ascend-toolkit ]; then
+ export PATH=$ASCEND_HOME/fwkacllib/bin:$ASCEND_HOME/fwkacllib/ccec_compiler/bin:$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin:$ASCEND_HOME/ascend-toolkit/latest/atc/bin:$PATH
+ export LD_LIBRARY_PATH=$ASCEND_HOME/fwkacllib/lib64:/usr/local/lib:$ASCEND_HOME/ascend-toolkit/latest/atc/lib64:$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/lib64:$ASCEND_HOME/driver/lib64:$ASCEND_HOME/add-ons:$LD_LIBRARY_PATH
+ export TBE_IMPL_PATH=$ASCEND_HOME/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe
+ export PYTHONPATH=$ASCEND_HOME/fwkacllib/python/site-packages:${TBE_IMPL_PATH}:$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/python/site-packages:$PYTHONPATH
+ export ASCEND_OPP_PATH=$ASCEND_HOME/ascend-toolkit/latest/opp
+else
+ export ASCEND_HOME=/usr/local/Ascend/latest/
+ export PATH=$ASCEND_HOME/fwkacllib/bin:$ASCEND_HOME/fwkacllib/ccec_compiler/bin:$ASCEND_HOME/atc/ccec_compiler/bin:$ASCEND_HOME/atc/bin:$PATH
+ export LD_LIBRARY_PATH=$ASCEND_HOME/fwkacllib/lib64:/usr/local/lib:$ASCEND_HOME/atc/lib64:$ASCEND_HOME/acllib/lib64:$ASCEND_HOME/driver/lib64:$ASCEND_HOME/add-ons:$LD_LIBRARY_PATH
+ export PYTHONPATH=$ASCEND_HOME/fwkacllib/python/site-packages:$ASCEND_HOME/atc/python/site-packages:$PYTHONPATH
+ export ASCEND_OPP_PATH=$ASCEND_HOME/opp
+fi
+
+function compile_app()
+{
+ cd ../ascend310_infer || exit
+ bash build.sh &> build.log
+}
+
+function infer()
+{
+ cd - || exit
+ if [ -d result_Files ]; then
+ rm -rf ./result_Files
+ fi
+ if [ -d time_Result ]; then
+ rm -rf ./time_Result
+ fi
+ mkdir result_Files
+ mkdir time_Result
+ image_shape=`cat ${cfg_path} | grep img_shape`
+ height=${image_shape:12:3}
+ width=${image_shape:17:3}
+
+ ../ascend310_infer/out/main --mindir_path=$model --dataset_path=$data_path --cpu_dvpp=CPU --device_id=$device_id --image_height=$height --image_width=$width &> infer.log
+}
+
+function cal_acc()
+{
+ python ../postprocess.py --result_path=./result_Files --img_path=$data_path --config_path=${cfg_path} --drop=True &> acc.log &
+}
+
+compile_app
+if [ $? -ne 0 ]; then
+ echo "compile app code failed"
+ exit 1
+fi
+infer
+if [ $? -ne 0 ]; then
+ echo " execute inference failed"
+ exit 1
+fi
+cal_acc
+if [ $? -ne 0 ]; then
+ echo "calculate accuracy failed"
+ exit 1
+fi
\ No newline at end of file
diff --git a/cv/detection/ssd/MindSpore/scripts/run_standalone_train.sh b/cv/detection/ssd/MindSpore/scripts/run_standalone_train.sh
new file mode 100755
index 0000000000000000000000000000000000000000..33f8a970c0873f130a9c869f0d05a1b7a808834b
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/scripts/run_standalone_train.sh
@@ -0,0 +1,86 @@
+#!/bin/bash
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Copyright (c) 2023, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+echo "=============================================================================================================="
+echo "Please run the script as: "
+echo "bash run_standalone_train.sh DEVICE_ID EPOCH_SIZE LR DATASET CONFIG_PATH PRE_TRAINED PRE_TRAINED_EPOCH_SIZE"
+echo "for example: sh run_standalone_train.sh 0 500 0.2 coco /config_path /opt/ssd-300.ckpt(optional) 200(optional)"
+echo "It is better to use absolute path."
+echo "================================================================================================================="
+
+if [ $# != 5 ] && [ $# != 7 ]
+then
+ echo "Usage: bash run_standalone_train.sh [DEVICE_ID] [EPOCH_SIZE] [LR] [DATASET] [CONFIG_PATH] \
+ [PRE_TRAINED](optional) [PRE_TRAINED_EPOCH_SIZE](optional)"
+ exit 1
+fi
+
+get_real_path(){
+ if [ "${1:0:1}" == "/" ]; then
+ echo "$1"
+ else
+ echo "$(realpath -m $PWD/$1)"
+ fi
+}
+
+CONFIG_PATH=$(get_real_path $5)
+# Before start training, first create mindrecord files.
+BASE_PATH=$(cd "`dirname $0`" || exit; pwd)
+cd $BASE_PATH/../ || exit
+python train.py --only_create_dataset=True --dataset=$4 --config_path=$CONFIG_PATH
+
+echo "After running the script, the network runs in the background. The log will be generated in LOGx/log.txt"
+DEVICE_ID=$1
+EPOCH_SIZE=$2
+LR=$3
+DATASET=$4
+PRE_TRAINED=$6
+PRE_TRAINED_EPOCH_SIZE=$7
+
+export DEVICE_ID=$DEVICE_ID
+rm -rf LOG$DEVICE_ID
+mkdir ./LOG$DEVICE_ID
+cp ./*.py ./LOG$DEVICE_ID
+cp -r ./src ./LOG$DEVICE_ID
+cd ./LOG$DEVICE_ID || exit
+
+echo "start training with device $DEVICE_ID"
+env > env.log
+if [ $# == 5 ]
+then
+ python train.py \
+ --lr=$LR \
+ --dataset=$DATASET \
+ --device_id=$DEVICE_ID \
+ --config_path=$CONFIG_PATH \
+ --epoch_size=$EPOCH_SIZE > log.txt 2>&1 &
+fi
+
+if [ $# == 7 ]
+then
+ python train.py \
+ --lr=$LR \
+ --dataset=$DATASET \
+ --device_id=$DEVICE_ID \
+ --pre_trained=$PRE_TRAINED \
+ --pre_trained_epoch_size=$PRE_TRAINED_EPOCH_SIZE \
+ --config_path=$CONFIG_PATH \
+ --epoch_size=$EPOCH_SIZE > log.txt 2>&1 &
+fi
+
+cd ../
diff --git a/cv/detection/ssd/MindSpore/scripts/run_standalone_train_gpu.sh b/cv/detection/ssd/MindSpore/scripts/run_standalone_train_gpu.sh
new file mode 100755
index 0000000000000000000000000000000000000000..f482aa5ff8fa0577893f774728ecc8eb083a954f
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/scripts/run_standalone_train_gpu.sh
@@ -0,0 +1,86 @@
+#!/bin/bash
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+echo "=============================================================================================================="
+echo "Please run the script as: "
+echo "bash run_standalone_train.sh DEVICE_ID EPOCH_SIZE LR DATASET CONFIG_PATH PRE_TRAINED PRE_TRAINED_EPOCH_SIZE"
+echo "for example: sh run_standalone_train.sh 0 500 0.2 coco /config_path /opt/ssd-300.ckpt(optional) 200(optional)"
+echo "It is better to use absolute path."
+echo "================================================================================================================="
+
+if [ $# != 5 ] && [ $# != 7 ]
+then
+ echo "Usage: bash run_standalone_train.sh [DEVICE_ID] [EPOCH_SIZE] [LR] [DATASET] [CONFIG_PATH] \
+ [PRE_TRAINED](optional) [PRE_TRAINED_EPOCH_SIZE](optional)"
+ exit 1
+fi
+
+get_real_path(){
+ if [ "${1:0:1}" == "/" ]; then
+ echo "$1"
+ else
+ echo "$(realpath -m $PWD/$1)"
+ fi
+}
+
+CONFIG_PATH=$(get_real_path $5)
+# Before start training, first create mindrecord files.
+BASE_PATH=$(cd "`dirname $0`" || exit; pwd)
+cd $BASE_PATH/../ || exit
+python3 train.py --only_create_dataset=True --dataset=$4 --config_path=$CONFIG_PATH
+
+echo "After running the script, the network runs in the background. The log will be generated in LOGx/log.txt"
+DEVICE_ID=$1
+EPOCH_SIZE=$2
+LR=$3
+DATASET=$4
+PRE_TRAINED=$6
+PRE_TRAINED_EPOCH_SIZE=$7
+
+export DEVICE_ID=$DEVICE_ID
+rm -rf LOG$DEVICE_ID
+mkdir ./LOG$DEVICE_ID
+cp ./*.py ./LOG$DEVICE_ID
+cp -r ./src ./LOG$DEVICE_ID
+cd ./LOG$DEVICE_ID || exit
+
+echo "start training with device $DEVICE_ID"
+env > env.log
+if [ $# == 5 ]
+then
+ python3 train.py \
+ --lr=$LR \
+ --dataset=$DATASET \
+ --device_id=$DEVICE_ID \
+ --device_target="GPU" \
+ --config_path=$CONFIG_PATH \
+ --epoch_size=$EPOCH_SIZE > log.txt 2>&1 &
+fi
+
+if [ $# == 7 ]
+then
+ python3 train.py \
+ --lr=$LR \
+ --dataset=$DATASET \
+ --device_id=$DEVICE_ID \
+ --device_target="GPU" \
+ --pre_trained=$PRE_TRAINED \
+ --pre_trained_epoch_size=$PRE_TRAINED_EPOCH_SIZE \
+ --config_path=$CONFIG_PATH \
+ --epoch_size=$EPOCH_SIZE > log.txt 2>&1 &
+fi
+
+cd ../
diff --git a/cv/detection/ssd/MindSpore/src/__init__.py b/cv/detection/ssd/MindSpore/src/__init__.py
new file mode 100755
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/cv/detection/ssd/MindSpore/src/anchor_generator.py b/cv/detection/ssd/MindSpore/src/anchor_generator.py
new file mode 100755
index 0000000000000000000000000000000000000000..9941032f3a2201b5cc5e72ac19477dc885e9e77e
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/src/anchor_generator.py
@@ -0,0 +1,92 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Anchor Generator"""
+
+import numpy as np
+
+
+class GridAnchorGenerator:
+ """
+ Anchor Generator
+ """
+ def __init__(self, image_shape, scale, scales_per_octave, aspect_ratios):
+ super(GridAnchorGenerator, self).__init__()
+ self.scale = scale
+ self.scales_per_octave = scales_per_octave
+ self.aspect_ratios = aspect_ratios
+ self.image_shape = image_shape
+
+
+ def generate(self, step):
+ scales = np.array([2**(float(scale) / self.scales_per_octave)
+ for scale in range(self.scales_per_octave)]).astype(np.float32)
+ aspects = np.array(list(self.aspect_ratios)).astype(np.float32)
+
+ scales_grid, aspect_ratios_grid = np.meshgrid(scales, aspects)
+ scales_grid = scales_grid.reshape([-1])
+ aspect_ratios_grid = aspect_ratios_grid.reshape([-1])
+
+ feature_size = [self.image_shape[0] / step, self.image_shape[1] / step]
+ grid_height, grid_width = feature_size
+
+ base_size = np.array([self.scale * step, self.scale * step]).astype(np.float32)
+ anchor_offset = step / 2.0
+
+ ratio_sqrt = np.sqrt(aspect_ratios_grid)
+ heights = scales_grid / ratio_sqrt * base_size[0]
+ widths = scales_grid * ratio_sqrt * base_size[1]
+
+ y_centers = np.arange(grid_height).astype(np.float32)
+ y_centers = y_centers * step + anchor_offset
+ x_centers = np.arange(grid_width).astype(np.float32)
+ x_centers = x_centers * step + anchor_offset
+ x_centers, y_centers = np.meshgrid(x_centers, y_centers)
+
+ x_centers_shape = x_centers.shape
+ y_centers_shape = y_centers.shape
+
+ widths_grid, x_centers_grid = np.meshgrid(widths, x_centers.reshape([-1]))
+ heights_grid, y_centers_grid = np.meshgrid(heights, y_centers.reshape([-1]))
+
+ x_centers_grid = x_centers_grid.reshape(*x_centers_shape, -1)
+ y_centers_grid = y_centers_grid.reshape(*y_centers_shape, -1)
+ widths_grid = widths_grid.reshape(-1, *x_centers_shape)
+ heights_grid = heights_grid.reshape(-1, *y_centers_shape)
+
+
+ bbox_centers = np.stack([y_centers_grid, x_centers_grid], axis=3)
+ bbox_sizes = np.stack([heights_grid, widths_grid], axis=3)
+ bbox_centers = bbox_centers.reshape([-1, 2])
+ bbox_sizes = bbox_sizes.reshape([-1, 2])
+ bbox_corners = np.concatenate([bbox_centers - 0.5 * bbox_sizes, bbox_centers + 0.5 * bbox_sizes], axis=1)
+ self.bbox_corners = bbox_corners / np.array([*self.image_shape, *self.image_shape]).astype(np.float32)
+ self.bbox_centers = np.concatenate([bbox_centers, bbox_sizes], axis=1)
+ self.bbox_centers = self.bbox_centers / np.array([*self.image_shape, *self.image_shape]).astype(np.float32)
+
+ print(self.bbox_centers.shape)
+ return self.bbox_centers, self.bbox_corners
+
+ def generate_multi_levels(self, steps):
+ bbox_centers_list = []
+ bbox_corners_list = []
+ for step in steps:
+ bbox_centers, bbox_corners = self.generate(step)
+ bbox_centers_list.append(bbox_centers)
+ bbox_corners_list.append(bbox_corners)
+
+ self.bbox_centers = np.concatenate(bbox_centers_list, axis=0)
+ self.bbox_corners = np.concatenate(bbox_corners_list, axis=0)
+ return self.bbox_centers, self.bbox_corners
diff --git a/cv/detection/ssd/MindSpore/src/box_utils.py b/cv/detection/ssd/MindSpore/src/box_utils.py
new file mode 100755
index 0000000000000000000000000000000000000000..f7509c18049a6df28d17d6c90c30957cf294958c
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/src/box_utils.py
@@ -0,0 +1,170 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Bbox utils"""
+
+import math
+import itertools as it
+import numpy as np
+from src.model_utils.config import config
+from .anchor_generator import GridAnchorGenerator
+
+
+class GeneratDefaultBoxes():
+ """
+ Generate Default boxes for SSD, follows the order of (W, H, archor_sizes).
+ `self.default_boxes` has a shape of [archor_sizes, H, W, 4], the last dimension is [y, x, h, w].
+ `self.default_boxes_tlbr` has a shape as `self.default_boxes`, the last dimension is [y1, x1, y2, x2].
+ """
+ def __init__(self):
+ fk = config.img_shape[0] / np.array(config.steps)
+ scale_rate = (config.max_scale - config.min_scale) / (len(config.num_default) - 1)
+ scales = [config.min_scale + scale_rate * i for i in range(len(config.num_default))] + [1.0]
+ self.default_boxes = []
+ for idex, feature_size in enumerate(config.feature_size):
+ sk1 = scales[idex]
+ sk2 = scales[idex + 1]
+ sk3 = math.sqrt(sk1 * sk2)
+ if idex == 0 and not config.aspect_ratios[idex]:
+ w, h = sk1 * math.sqrt(2), sk1 / math.sqrt(2)
+ all_sizes = [(0.1, 0.1), (w, h), (h, w)]
+ else:
+ all_sizes = [(sk1, sk1)]
+ for aspect_ratio in config.aspect_ratios[idex]:
+ w, h = sk1 * math.sqrt(aspect_ratio), sk1 / math.sqrt(aspect_ratio)
+ all_sizes.append((w, h))
+ all_sizes.append((h, w))
+ all_sizes.append((sk3, sk3))
+
+ assert len(all_sizes) == config.num_default[idex]
+
+ for i, j in it.product(range(feature_size), repeat=2):
+ for w, h in all_sizes:
+ cx, cy = (j + 0.5) / fk[idex], (i + 0.5) / fk[idex]
+ self.default_boxes.append([cy, cx, h, w])
+
+ def to_tlbr(cy, cx, h, w):
+ return cy - h / 2, cx - w / 2, cy + h / 2, cx + w / 2
+
+ # For IoU calculation
+ self.default_boxes_tlbr = np.array(tuple(to_tlbr(*i) for i in self.default_boxes), dtype='float32')
+ self.default_boxes = np.array(self.default_boxes, dtype='float32')
+
+if hasattr(config, 'use_anchor_generator') and config.use_anchor_generator:
+ generator = GridAnchorGenerator(config.img_shape, 4, 2, [1.0, 2.0, 0.5])
+ default_boxes, default_boxes_tlbr = generator.generate_multi_levels(config.steps)
+else:
+ default_boxes_tlbr = GeneratDefaultBoxes().default_boxes_tlbr
+ default_boxes = GeneratDefaultBoxes().default_boxes
+y1, x1, y2, x2 = np.split(default_boxes_tlbr[:, :4], 4, axis=-1)
+vol_anchors = (x2 - x1) * (y2 - y1)
+matching_threshold = config.match_threshold
+
+
+def ssd_bboxes_encode(boxes):
+ """
+ Labels anchors with ground truth inputs.
+
+ Args:
+ boxex: ground truth with shape [N, 5], for each row, it stores [y, x, h, w, cls].
+
+ Returns:
+ gt_loc: location ground truth with shape [num_anchors, 4].
+ gt_label: class ground truth with shape [num_anchors, 1].
+ num_matched_boxes: number of positives in an image.
+ """
+
+ def jaccard_with_anchors(bbox):
+ """Compute jaccard score a box and the anchors."""
+ # Intersection bbox and volume.
+ ymin = np.maximum(y1, bbox[0])
+ xmin = np.maximum(x1, bbox[1])
+ ymax = np.minimum(y2, bbox[2])
+ xmax = np.minimum(x2, bbox[3])
+ w = np.maximum(xmax - xmin, 0.)
+ h = np.maximum(ymax - ymin, 0.)
+
+ # Volumes.
+ inter_vol = h * w
+ union_vol = vol_anchors + (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) - inter_vol
+ jaccard = inter_vol / union_vol
+ return np.squeeze(jaccard)
+
+ pre_scores = np.zeros((config.num_ssd_boxes), dtype=np.float32)
+ t_boxes = np.zeros((config.num_ssd_boxes, 4), dtype=np.float32)
+ t_label = np.zeros((config.num_ssd_boxes), dtype=np.int64)
+ for bbox in boxes:
+ label = int(bbox[4])
+ scores = jaccard_with_anchors(bbox)
+ idx = np.argmax(scores)
+ scores[idx] = 2.0
+ mask = (scores > matching_threshold)
+ mask = mask & (scores > pre_scores)
+ pre_scores = np.maximum(pre_scores, scores * mask)
+ t_label = mask * label + (1 - mask) * t_label
+ for i in range(4):
+ t_boxes[:, i] = mask * bbox[i] + (1 - mask) * t_boxes[:, i]
+
+ index = np.nonzero(t_label)
+
+ # Transform to tlbr.
+ bboxes = np.zeros((config.num_ssd_boxes, 4), dtype=np.float32)
+ bboxes[:, [0, 1]] = (t_boxes[:, [0, 1]] + t_boxes[:, [2, 3]]) / 2
+ bboxes[:, [2, 3]] = t_boxes[:, [2, 3]] - t_boxes[:, [0, 1]]
+
+ # Encode features.
+ bboxes_t = bboxes[index]
+ default_boxes_t = default_boxes[index]
+ bboxes_t[:, :2] = (bboxes_t[:, :2] - default_boxes_t[:, :2]) / (default_boxes_t[:, 2:] * config.prior_scaling[0])
+ tmp = np.maximum(bboxes_t[:, 2:4] / default_boxes_t[:, 2:4], 0.000001)
+ bboxes_t[:, 2:4] = np.log(tmp) / config.prior_scaling[1]
+ bboxes[index] = bboxes_t
+
+ num_match = np.array([len(np.nonzero(t_label)[0])], dtype=np.int32)
+ return bboxes, t_label.astype(np.int32), num_match
+
+
+def ssd_bboxes_decode(boxes):
+ """Decode predict boxes to [y, x, h, w]"""
+ boxes_t = boxes.copy()
+ default_boxes_t = default_boxes.copy()
+ boxes_t[:, :2] = boxes_t[:, :2] * config.prior_scaling[0] * default_boxes_t[:, 2:] + default_boxes_t[:, :2]
+ boxes_t[:, 2:4] = np.exp(boxes_t[:, 2:4] * config.prior_scaling[1]) * default_boxes_t[:, 2:4]
+
+ bboxes = np.zeros((len(boxes_t), 4), dtype=np.float32)
+
+ bboxes[:, [0, 1]] = boxes_t[:, [0, 1]] - boxes_t[:, [2, 3]] / 2
+ bboxes[:, [2, 3]] = boxes_t[:, [0, 1]] + boxes_t[:, [2, 3]] / 2
+
+ return np.clip(bboxes, 0, 1)
+
+
+def intersect(box_a, box_b):
+ """Compute the intersect of two sets of boxes."""
+ max_yx = np.minimum(box_a[:, 2:4], box_b[2:4])
+ min_yx = np.maximum(box_a[:, :2], box_b[:2])
+ inter = np.clip((max_yx - min_yx), a_min=0, a_max=np.inf)
+ return inter[:, 0] * inter[:, 1]
+
+
+def jaccard_numpy(box_a, box_b):
+ """Compute the jaccard overlap of two sets of boxes."""
+ inter = intersect(box_a, box_b)
+ area_a = ((box_a[:, 2] - box_a[:, 0]) *
+ (box_a[:, 3] - box_a[:, 1]))
+ area_b = ((box_b[2] - box_b[0]) *
+ (box_b[3] - box_b[1]))
+ union = area_a + area_b - inter
+ return inter / union
diff --git a/cv/detection/ssd/MindSpore/src/dataset.py b/cv/detection/ssd/MindSpore/src/dataset.py
new file mode 100755
index 0000000000000000000000000000000000000000..9e181e526e5ea26eec49629bef46b00eb5d94742
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/src/dataset.py
@@ -0,0 +1,458 @@
+# Copyright 2020-2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""SSD dataset"""
+
+from __future__ import division
+
+import os
+import json
+import multiprocessing
+import xml.etree.ElementTree as et
+import numpy as np
+import cv2
+
+import mindspore.dataset as de
+from mindspore.mindrecord import FileWriter
+from src.model_utils.config import config
+from .box_utils import jaccard_numpy, ssd_bboxes_encode
+
+
+def _rand(a=0., b=1.):
+ """Generate random."""
+ return np.random.rand() * (b - a) + a
+
+
+def get_imageId_from_fileName(filename, id_iter):
+ """Get imageID from fileName if fileName is int, else return id_iter."""
+ filename = os.path.splitext(filename)[0]
+ if filename.isdigit():
+ return int(filename)
+ return id_iter
+
+
+def random_sample_crop(image, boxes):
+ """Random Crop the image and boxes"""
+ height, width, _ = image.shape
+ min_iou = np.random.choice([None, 0.1, 0.3, 0.5, 0.7, 0.9])
+
+ if min_iou is None:
+ return image, boxes
+
+ # max trails (50)
+ for _ in range(50):
+ image_t = image
+
+ w = _rand(0.3, 1.0) * width
+ h = _rand(0.3, 1.0) * height
+
+ # aspect ratio constraint b/t .5 & 2
+ if h / w < 0.5 or h / w > 2:
+ continue
+
+ left = _rand() * (width - w)
+ top = _rand() * (height - h)
+
+ rect = np.array([int(top), int(left), int(top + h), int(left + w)])
+ overlap = jaccard_numpy(boxes, rect)
+
+ # dropout some boxes
+ drop_mask = overlap > 0
+ if not drop_mask.any():
+ continue
+
+ if overlap[drop_mask].min() < min_iou and overlap[drop_mask].max() > (min_iou + 0.2):
+ continue
+
+ image_t = image_t[rect[0]:rect[2], rect[1]:rect[3], :]
+
+ centers = (boxes[:, :2] + boxes[:, 2:4]) / 2.0
+
+ m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1])
+ m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1])
+
+ # mask in that both m1 and m2 are true
+ mask = m1 * m2 * drop_mask
+
+ # have any valid boxes? try again if not
+ if not mask.any():
+ continue
+
+ # take only matching gt boxes
+ boxes_t = boxes[mask, :].copy()
+
+ boxes_t[:, :2] = np.maximum(boxes_t[:, :2], rect[:2])
+ boxes_t[:, :2] -= rect[:2]
+ boxes_t[:, 2:4] = np.minimum(boxes_t[:, 2:4], rect[2:4])
+ boxes_t[:, 2:4] -= rect[:2]
+
+ return image_t, boxes_t
+ return image, boxes
+
+
+def preprocess_fn(img_id, image, box, is_training):
+ """Preprocess function for dataset."""
+ cv2.setNumThreads(2)
+
+ def _infer_data(image, input_shape):
+ img_h, img_w, _ = image.shape
+ input_h, input_w = input_shape
+
+ image = cv2.resize(image, (input_w, input_h))
+
+ # When the channels of image is 1
+ if len(image.shape) == 2:
+ image = np.expand_dims(image, axis=-1)
+ image = np.concatenate([image, image, image], axis=-1)
+
+ return img_id, image, np.array((img_h, img_w), np.float32)
+
+ def _data_aug(image, box, is_training, image_size=(300, 300)):
+ """Data augmentation function."""
+ ih, iw, _ = image.shape
+ h, w = image_size
+
+ if not is_training:
+ return _infer_data(image, image_size)
+
+ # Random crop
+ box = box.astype(np.float32)
+ image, box = random_sample_crop(image, box)
+ ih, iw, _ = image.shape
+
+ # Resize image
+ image = cv2.resize(image, (w, h))
+
+ # Flip image or not
+ flip = _rand() < .5
+ if flip:
+ image = cv2.flip(image, 1, dst=None)
+
+ # When the channels of image is 1
+ if len(image.shape) == 2:
+ image = np.expand_dims(image, axis=-1)
+ image = np.concatenate([image, image, image], axis=-1)
+
+ box[:, [0, 2]] = box[:, [0, 2]] / ih
+ box[:, [1, 3]] = box[:, [1, 3]] / iw
+
+ if flip:
+ box[:, [1, 3]] = 1 - box[:, [3, 1]]
+
+ box, label, num_match = ssd_bboxes_encode(box)
+ return image, box, label, num_match
+
+ return _data_aug(image, box, is_training, image_size=config.img_shape)
+
+
+def create_voc_label(is_training):
+ """Get image path and annotation from VOC."""
+ voc_root = config.voc_root
+ cls_map = {name: i for i, name in enumerate(config.classes)}
+ sub_dir = 'train' if is_training else 'eval'
+ voc_dir = os.path.join(voc_root, sub_dir)
+ if not os.path.isdir(voc_dir):
+ raise ValueError(f'Cannot find {sub_dir} dataset path.')
+
+ image_dir = anno_dir = voc_dir
+ if os.path.isdir(os.path.join(voc_dir, 'Images')):
+ image_dir = os.path.join(voc_dir, 'Images')
+ if os.path.isdir(os.path.join(voc_dir, 'Annotations')):
+ anno_dir = os.path.join(voc_dir, 'Annotations')
+
+ if not is_training:
+ json_file = os.path.join(config.voc_root, config.voc_json)
+ file_dir = os.path.split(json_file)[0]
+ if not os.path.isdir(file_dir):
+ os.makedirs(file_dir)
+ json_dict = {"images": [], "type": "instances", "annotations": [],
+ "categories": []}
+ bnd_id = 1
+
+ image_files_dict = {}
+ image_anno_dict = {}
+ images = []
+ id_iter = 0
+ for anno_file in os.listdir(anno_dir):
+ print(anno_file)
+ if not anno_file.endswith('xml'):
+ continue
+ tree = et.parse(os.path.join(anno_dir, anno_file))
+ root_node = tree.getroot()
+ file_name = root_node.find('filename').text
+ img_id = get_imageId_from_fileName(file_name, id_iter)
+ id_iter += 1
+ image_path = os.path.join(image_dir, file_name)
+ print(image_path)
+ if not os.path.isfile(image_path):
+ print(f'Cannot find image {file_name} according to annotations.')
+ continue
+
+ labels = []
+ for obj in root_node.iter('object'):
+ cls_name = obj.find('name').text
+ if cls_name not in cls_map:
+ print(f'Label "{cls_name}" not in "{config.classes}"')
+ continue
+ bnd_box = obj.find('bndbox')
+ x_min = int(float(bnd_box.find('xmin').text)) - 1
+ y_min = int(float(bnd_box.find('ymin').text)) - 1
+ x_max = int(float(bnd_box.find('xmax').text)) - 1
+ y_max = int(float(bnd_box.find('ymax').text)) - 1
+ labels.append([y_min, x_min, y_max, x_max, cls_map[cls_name]])
+
+ if not is_training:
+ o_width = abs(x_max - x_min)
+ o_height = abs(y_max - y_min)
+ ann = {'area': o_width * o_height, 'iscrowd': 0, 'image_id': \
+ img_id, 'bbox': [x_min, y_min, o_width, o_height], \
+ 'category_id': cls_map[cls_name], 'id': bnd_id, \
+ 'ignore': 0, \
+ 'segmentation': []}
+ json_dict['annotations'].append(ann)
+ bnd_id = bnd_id + 1
+
+ if labels:
+ images.append(img_id)
+ image_files_dict[img_id] = image_path
+ image_anno_dict[img_id] = np.array(labels)
+
+ if not is_training:
+ size = root_node.find("size")
+ width = int(size.find('width').text)
+ height = int(size.find('height').text)
+ image = {'file_name': file_name, 'height': height, 'width': width,
+ 'id': img_id}
+ json_dict['images'].append(image)
+
+ if not is_training:
+ for cls_name, cid in cls_map.items():
+ cat = {'supercategory': 'none', 'id': cid, 'name': cls_name}
+ json_dict['categories'].append(cat)
+ json_fp = open(json_file, 'w')
+ json_str = json.dumps(json_dict)
+ json_fp.write(json_str)
+ json_fp.close()
+
+ return images, image_files_dict, image_anno_dict
+
+
+def create_coco_label(is_training):
+ """Get image path and annotation from COCO."""
+ from pycocotools.coco import COCO
+
+ coco_root = os.path.join(config.data_path, config.coco_root)
+ data_type = config.val_data_type
+ if is_training:
+ data_type = config.train_data_type
+
+ # Classes need to train or test.
+ train_cls = config.classes
+ train_cls_dict = {}
+ for i, cls in enumerate(train_cls):
+ train_cls_dict[cls] = i
+
+ anno_json = os.path.join(coco_root, config.instances_set.format(data_type))
+
+ coco = COCO(anno_json)
+ classs_dict = {}
+ cat_ids = coco.loadCats(coco.getCatIds())
+ for cat in cat_ids:
+ classs_dict[cat["id"]] = cat["name"]
+
+ image_ids = coco.getImgIds()
+ images = []
+ image_path_dict = {}
+ image_anno_dict = {}
+
+ for img_id in image_ids:
+ image_info = coco.loadImgs(img_id)
+ file_name = image_info[0]["file_name"]
+ anno_ids = coco.getAnnIds(imgIds=img_id, iscrowd=None)
+ anno = coco.loadAnns(anno_ids)
+ image_path = os.path.join(coco_root, data_type, file_name)
+ annos = []
+ iscrowd = False
+ for label in anno:
+ bbox = label["bbox"]
+ class_name = classs_dict[label["category_id"]]
+ iscrowd = iscrowd or label["iscrowd"]
+ if class_name in train_cls:
+ x_min, x_max = bbox[0], bbox[0] + bbox[2]
+ y_min, y_max = bbox[1], bbox[1] + bbox[3]
+ annos.append(list(map(round, [y_min, x_min, y_max, x_max])) + [train_cls_dict[class_name]])
+
+ if not is_training and iscrowd:
+ continue
+ if len(annos) >= 1:
+ images.append(img_id)
+ image_path_dict[img_id] = image_path
+ image_anno_dict[img_id] = np.array(annos)
+
+ return images, image_path_dict, image_anno_dict
+
+
+def anno_parser(annos_str):
+ """Parse annotation from string to list."""
+ annos = []
+ for anno_str in annos_str:
+ anno = list(map(int, anno_str.strip().split(',')))
+ annos.append(anno)
+ return annos
+
+
+def filter_valid_data(image_dir, anno_path):
+ """Filter valid image file, which both in image_dir and anno_path."""
+ images = []
+ image_path_dict = {}
+ image_anno_dict = {}
+ if not os.path.isdir(image_dir):
+ raise RuntimeError("Path given is not valid.")
+ if not os.path.isfile(anno_path):
+ raise RuntimeError("Annotation file is not valid.")
+
+ with open(anno_path, "rb") as f:
+ lines = f.readlines()
+ for img_id, line in enumerate(lines):
+ line_str = line.decode("utf-8").strip()
+ line_split = str(line_str).split(' ')
+ file_name = line_split[0]
+ image_path = os.path.join(image_dir, file_name)
+ if os.path.isfile(image_path):
+ images.append(img_id)
+ image_path_dict[img_id] = image_path
+ image_anno_dict[img_id] = anno_parser(line_split[1:])
+
+ return images, image_path_dict, image_anno_dict
+
+
+def voc_data_to_mindrecord(mindrecord_dir, is_training, prefix="ssd.mindrecord", file_num=8):
+ """Create MindRecord file by image_dir and anno_path."""
+ mindrecord_path = os.path.join(mindrecord_dir, prefix)
+ writer = FileWriter(mindrecord_path, file_num)
+ images, image_path_dict, image_anno_dict = create_voc_label(is_training)
+
+ ssd_json = {
+ "img_id": {"type": "int32", "shape": [1]},
+ "image": {"type": "bytes"},
+ "annotation": {"type": "int32", "shape": [-1, 5]},
+ }
+ writer.add_schema(ssd_json, "ssd_json")
+
+ for img_id in images:
+ image_path = image_path_dict[img_id]
+ with open(image_path, 'rb') as f:
+ img = f.read()
+ annos = np.array(image_anno_dict[img_id], dtype=np.int32)
+ img_id = np.array([img_id], dtype=np.int32)
+ row = {"img_id": img_id, "image": img, "annotation": annos}
+ writer.write_raw_data([row])
+ writer.commit()
+
+
+def data_to_mindrecord_byte_image(dataset="coco", is_training=True, prefix="ssd.mindrecord", file_num=8):
+ """Create MindRecord file."""
+ mindrecord_path = os.path.join(config.data_path, config.mindrecord_dir, prefix)
+ writer = FileWriter(mindrecord_path, file_num)
+ if dataset == "coco":
+ images, image_path_dict, image_anno_dict = create_coco_label(is_training)
+ else:
+ images, image_path_dict, image_anno_dict = filter_valid_data(config.image_dir, config.anno_path)
+
+ ssd_json = {
+ "img_id": {"type": "int32", "shape": [1]},
+ "image": {"type": "bytes"},
+ "annotation": {"type": "int32", "shape": [-1, 5]},
+ }
+ writer.add_schema(ssd_json, "ssd_json")
+
+ for img_id in images:
+ image_path = image_path_dict[img_id]
+ with open(image_path, 'rb') as f:
+ img = f.read()
+ annos = np.array(image_anno_dict[img_id], dtype=np.int32)
+ img_id = np.array([img_id], dtype=np.int32)
+ row = {"img_id": img_id, "image": img, "annotation": annos}
+ writer.write_raw_data([row])
+ writer.commit()
+
+
+def create_ssd_dataset(mindrecord_file, batch_size=32, device_num=1, rank=0,
+ is_training=True, num_parallel_workers=6, use_multiprocessing=True):
+ """Create SSD dataset with MindDataset."""
+ cores = multiprocessing.cpu_count()
+ if cores < num_parallel_workers:
+ print("The num_parallel_workers {} is set too large, now set it {}".format(num_parallel_workers, cores))
+ num_parallel_workers = cores
+ ds = de.MindDataset(mindrecord_file, columns_list=["img_id", "image", "annotation"], num_shards=device_num,
+ shard_id=rank, num_parallel_workers=num_parallel_workers, shuffle=is_training)
+ decode = de.vision.Decode()
+ ds = ds.map(operations=decode, input_columns=["image"])
+ change_swap_op = de.vision.HWC2CHW()
+ # Computed from random subset of ImageNet training images
+ normalize_op = de.vision.Normalize(mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
+ std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
+ color_adjust_op = de.vision.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4)
+ compose_map_func = (lambda img_id, image, annotation: preprocess_fn(img_id, image, annotation, is_training))
+ if is_training:
+ output_columns = ["image", "box", "label", "num_match"]
+ trans = [color_adjust_op, normalize_op, change_swap_op]
+ else:
+ output_columns = ["img_id", "image", "image_shape"]
+ trans = [normalize_op, change_swap_op]
+ ds = ds.map(operations=compose_map_func, input_columns=["img_id", "image", "annotation"],
+ output_columns=output_columns, column_order=output_columns,
+ python_multiprocessing=use_multiprocessing,
+ num_parallel_workers=num_parallel_workers)
+ ds = ds.map(operations=trans, input_columns=["image"], python_multiprocessing=use_multiprocessing,
+ num_parallel_workers=num_parallel_workers)
+ ds = ds.batch(batch_size, drop_remainder=True)
+ return ds
+
+
+def create_mindrecord(dataset="coco", prefix="ssd.mindrecord", is_training=True):
+ print("Start create dataset!")
+
+ # It will generate mindrecord file in config.mindrecord_dir,
+ # and the file name is ssd.mindrecord0, 1, ... file_num.
+
+ mindrecord_dir = os.path.join(config.data_path, config.mindrecord_dir)
+ mindrecord_file = os.path.join(mindrecord_dir, prefix + "0")
+ if not os.path.exists(mindrecord_file):
+ if not os.path.isdir(mindrecord_dir):
+ os.makedirs(mindrecord_dir)
+ if dataset == "coco":
+ coco_root = os.path.join(config.data_path, config.coco_root)
+ if os.path.isdir(coco_root):
+ print("Create Mindrecord.")
+ data_to_mindrecord_byte_image("coco", is_training, prefix)
+ print("Create Mindrecord Done, at {}".format(mindrecord_dir))
+ else:
+ print("coco_root not exits.")
+ elif dataset == "voc":
+ if os.path.isdir(config.voc_root):
+ print("Create Mindrecord.")
+ voc_data_to_mindrecord(mindrecord_dir, is_training, prefix)
+ print("Create Mindrecord Done, at {}".format(mindrecord_dir))
+ else:
+ print("voc_root not exits.")
+ else:
+ if os.path.isdir(config.image_dir) and os.path.exists(config.anno_path):
+ print("Create Mindrecord.")
+ data_to_mindrecord_byte_image("other", is_training, prefix)
+ print("Create Mindrecord Done, at {}".format(mindrecord_dir))
+ else:
+ print("image_dir or anno_path not exits.")
+ return mindrecord_file
diff --git a/cv/detection/ssd/MindSpore/src/eval_callback.py b/cv/detection/ssd/MindSpore/src/eval_callback.py
new file mode 100755
index 0000000000000000000000000000000000000000..205fce0eaf9b4c07ed96170c8523a281f22524bc
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/src/eval_callback.py
@@ -0,0 +1,90 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""Evaluation callback when training"""
+
+import os
+import stat
+from mindspore import save_checkpoint
+from mindspore import log as logger
+from mindspore.train.callback import Callback
+
+class EvalCallBack(Callback):
+ """
+ Evaluation callback when training.
+
+ Args:
+ eval_function (function): evaluation function.
+ eval_param_dict (dict): evaluation parameters' configure dict.
+ interval (int): run evaluation interval, default is 1.
+ eval_start_epoch (int): evaluation start epoch, default is 1.
+ save_best_ckpt (bool): Whether to save best checkpoint, default is True.
+ besk_ckpt_name (str): bast checkpoint name, default is `best.ckpt`.
+ metrics_name (str): evaluation metrics name, default is `acc`.
+
+ Returns:
+ None
+
+ Examples:
+ >>> EvalCallBack(eval_function, eval_param_dict)
+ """
+
+ def __init__(self, eval_function, eval_param_dict, interval=1, eval_start_epoch=1, save_best_ckpt=True,
+ ckpt_directory="./", besk_ckpt_name="best.ckpt", metrics_name="acc"):
+ super(EvalCallBack, self).__init__()
+ self.eval_param_dict = eval_param_dict
+ self.eval_function = eval_function
+ self.eval_start_epoch = eval_start_epoch
+ if interval < 1:
+ raise ValueError("interval should >= 1.")
+ self.interval = interval
+ self.save_best_ckpt = save_best_ckpt
+ self.best_res = 0
+ self.best_epoch = 0
+ if not os.path.isdir(ckpt_directory):
+ os.makedirs(ckpt_directory)
+ self.bast_ckpt_path = os.path.join(ckpt_directory, besk_ckpt_name)
+ self.metrics_name = metrics_name
+
+ def remove_ckpoint_file(self, file_name):
+ """Remove the specified checkpoint file from this checkpoint manager and also from the directory."""
+ try:
+ os.chmod(file_name, stat.S_IWRITE)
+ os.remove(file_name)
+ except OSError:
+ logger.warning("OSError, failed to remove the older ckpt file %s.", file_name)
+ except ValueError:
+ logger.warning("ValueError, failed to remove the older ckpt file %s.", file_name)
+
+ def epoch_end(self, run_context):
+ """Callback when epoch end."""
+ cb_params = run_context.original_args()
+ cur_epoch = cb_params.cur_epoch_num
+ if cur_epoch >= self.eval_start_epoch and (cur_epoch - self.eval_start_epoch) % self.interval == 0:
+ res = self.eval_function(self.eval_param_dict)
+ print("epoch: {}, {}: {}".format(cur_epoch, self.metrics_name, res), flush=True)
+ if res >= self.best_res:
+ self.best_res = res
+ self.best_epoch = cur_epoch
+ print("update best result: {}".format(res), flush=True)
+ if self.save_best_ckpt:
+ if os.path.exists(self.bast_ckpt_path):
+ self.remove_ckpoint_file(self.bast_ckpt_path)
+ save_checkpoint(cb_params.train_network, self.bast_ckpt_path)
+ print("update best checkpoint at: {}".format(self.bast_ckpt_path), flush=True)
+
+ def end(self, run_context):
+ print("End training, the best {0} is: {1}, the best {0} epoch is {2}".format(self.metrics_name,
+ self.best_res,
+ self.best_epoch), flush=True)
diff --git a/cv/detection/ssd/MindSpore/src/eval_utils.py b/cv/detection/ssd/MindSpore/src/eval_utils.py
new file mode 100755
index 0000000000000000000000000000000000000000..11ad00bdc096b7aed830306eb921881039452d08
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/src/eval_utils.py
@@ -0,0 +1,153 @@
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""Coco metrics utils"""
+
+import json
+import numpy as np
+from mindspore import Tensor
+from pycocotools.coco import COCO
+from pycocotools.cocoeval import COCOeval
+
+from src.model_utils.config import config
+
+
+def apply_eval(eval_param_dict):
+ net = eval_param_dict["net"]
+ net.set_train(False)
+ ds = eval_param_dict["dataset"]
+ anno_json = eval_param_dict["anno_json"]
+ coco_metrics = COCOMetrics(anno_json=anno_json,
+ classes=config.classes,
+ num_classes=config.num_classes,
+ max_boxes=config.max_boxes,
+ nms_threshold=config.nms_threshold,
+ min_score=config.min_score)
+ for data in ds.create_dict_iterator(output_numpy=True, num_epochs=1):
+ img_id = data['img_id']
+ img_np = data['image']
+ image_shape = data['image_shape']
+
+ output = net(Tensor(img_np))
+
+ for batch_idx in range(img_np.shape[0]):
+ pred_batch = {
+ "boxes": output[0].asnumpy()[batch_idx],
+ "box_scores": output[1].asnumpy()[batch_idx],
+ "img_id": int(np.squeeze(img_id[batch_idx])),
+ "image_shape": image_shape[batch_idx]
+ }
+ coco_metrics.update(pred_batch)
+ eval_metrics = coco_metrics.get_metrics()
+ return eval_metrics
+
+
+def apply_nms(all_boxes, all_scores, thres, max_boxes):
+ """Apply NMS to bboxes."""
+ y1 = all_boxes[:, 0]
+ x1 = all_boxes[:, 1]
+ y2 = all_boxes[:, 2]
+ x2 = all_boxes[:, 3]
+ areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+
+ order = all_scores.argsort()[::-1]
+ keep = []
+
+ while order.size > 0:
+ i = order[0]
+ keep.append(i)
+
+ if len(keep) >= max_boxes:
+ break
+
+ xx1 = np.maximum(x1[i], x1[order[1:]])
+ yy1 = np.maximum(y1[i], y1[order[1:]])
+ xx2 = np.minimum(x2[i], x2[order[1:]])
+ yy2 = np.minimum(y2[i], y2[order[1:]])
+
+ w = np.maximum(0.0, xx2 - xx1 + 1)
+ h = np.maximum(0.0, yy2 - yy1 + 1)
+ inter = w * h
+
+ ovr = inter / (areas[i] + areas[order[1:]] - inter)
+
+ inds = np.where(ovr <= thres)[0]
+
+ order = order[inds + 1]
+ return keep
+
+
+class COCOMetrics:
+ """Calculate mAP of predicted bboxes."""
+
+ def __init__(self, anno_json, classes, num_classes, min_score, nms_threshold, max_boxes):
+ self.num_classes = num_classes
+ self.classes = classes
+ self.min_score = min_score
+ self.nms_threshold = nms_threshold
+ self.max_boxes = max_boxes
+
+ self.val_cls_dict = {i: cls for i, cls in enumerate(classes)}
+ self.coco_gt = COCO(anno_json)
+ cat_ids = self.coco_gt.loadCats(self.coco_gt.getCatIds())
+ self.class_dict = {cat['name']: cat['id'] for cat in cat_ids}
+
+ self.predictions = []
+ self.img_ids = []
+
+ def update(self, batch):
+ pred_boxes = batch['boxes']
+ box_scores = batch['box_scores']
+ img_id = batch['img_id']
+ h, w = batch['image_shape']
+
+ final_boxes = []
+ final_label = []
+ final_score = []
+ self.img_ids.append(img_id)
+
+ for c in range(1, self.num_classes):
+ class_box_scores = box_scores[:, c]
+ score_mask = class_box_scores > self.min_score
+ class_box_scores = class_box_scores[score_mask]
+ class_boxes = pred_boxes[score_mask] * [h, w, h, w]
+
+ if score_mask.any():
+ nms_index = apply_nms(class_boxes, class_box_scores, self.nms_threshold, self.max_boxes)
+ class_boxes = class_boxes[nms_index]
+ class_box_scores = class_box_scores[nms_index]
+
+ final_boxes += class_boxes.tolist()
+ final_score += class_box_scores.tolist()
+ final_label += [self.class_dict[self.val_cls_dict[c]]] * len(class_box_scores)
+
+ for loc, label, score in zip(final_boxes, final_label, final_score):
+ res = {}
+ res['image_id'] = img_id
+ res['bbox'] = [loc[1], loc[0], loc[3] - loc[1], loc[2] - loc[0]]
+ res['score'] = score
+ res['category_id'] = label
+ self.predictions.append(res)
+
+ def get_metrics(self):
+ with open('predictions.json', 'w') as f:
+ json.dump(self.predictions, f)
+
+ coco_dt = self.coco_gt.loadRes('predictions.json')
+ E = COCOeval(self.coco_gt, coco_dt, iouType='bbox')
+ E.params.imgIds = self.img_ids
+ E.evaluate()
+ E.accumulate()
+ E.summarize()
+ return E.stats[0]
diff --git a/cv/detection/ssd/MindSpore/src/fpn.py b/cv/detection/ssd/MindSpore/src/fpn.py
new file mode 100755
index 0000000000000000000000000000000000000000..83beebd70d949035d24856524f6ce004a1a71eaa
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/src/fpn.py
@@ -0,0 +1,138 @@
+# Copyright 2020-2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import mindspore.ops as ops
+import mindspore.nn as nn
+from .mobilenet_v1 import conv_bn_relu, MobileNetV1
+from .resnet import resnet50
+
+
+class FpnTopDown(nn.Cell):
+ """
+ Fpn to extract features
+ """
+ def __init__(self, in_channel_list, out_channels):
+ super(FpnTopDown, self).__init__()
+ self.lateral_convs_list_ = []
+ self.fpn_convs_ = []
+ for channel in in_channel_list:
+ l_conv = nn.Conv2d(channel, out_channels, kernel_size=1, stride=1,
+ has_bias=True, padding=0, pad_mode='same')
+ fpn_conv = conv_bn_relu(out_channels, out_channels, kernel_size=3, stride=1, depthwise=False)
+ self.lateral_convs_list_.append(l_conv)
+ self.fpn_convs_.append(fpn_conv)
+ self.lateral_convs_list = nn.layer.CellList(self.lateral_convs_list_)
+ self.fpn_convs_list = nn.layer.CellList(self.fpn_convs_)
+ self.num_layers = len(in_channel_list)
+
+ def construct(self, inputs):
+ image_features = ()
+ for i, feature in enumerate(inputs):
+ image_features = image_features + (self.lateral_convs_list[i](feature),)
+
+ features = (image_features[-1],)
+ for i in range(len(inputs) - 1):
+ top = len(inputs) - i - 1
+ down = top - 1
+ size = ops.shape(inputs[down])
+ top_down = ops.ResizeBilinear((size[2], size[3]))(features[-1])
+ top_down = top_down + image_features[down]
+ features = features + (top_down,)
+
+ extract_features = ()
+ num_features = len(features)
+ for i in range(num_features):
+ extract_features = extract_features + (self.fpn_convs_list[i](features[num_features - i - 1]),)
+
+ return extract_features
+
+
+class BottomUp(nn.Cell):
+ """
+ Bottom Up feature extractor
+ """
+ def __init__(self, levels, channels, kernel_size, stride):
+ super(BottomUp, self).__init__()
+ self.levels = levels
+ bottom_up_cells = [
+ conv_bn_relu(channels, channels, kernel_size, stride, False) for x in range(self.levels)
+ ]
+ self.blocks = nn.CellList(bottom_up_cells)
+
+ def construct(self, features):
+ for block in self.blocks:
+ features = features + (block(features[-1]),)
+ return features
+
+
+class FeatureSelector(nn.Cell):
+ """
+ Select specific layers from an entire feature list
+ """
+ def __init__(self, feature_idxes):
+ super(FeatureSelector, self).__init__()
+ self.feature_idxes = feature_idxes
+
+ def construct(self, feature_list):
+ selected = ()
+ for i in self.feature_idxes:
+ selected = selected + (feature_list[i],)
+ return selected
+
+
+class MobileNetV1Fpn(nn.Cell):
+ """
+ MobileNetV1 with FPN as SSD backbone.
+ """
+ def __init__(self, config):
+ super(MobileNetV1Fpn, self).__init__()
+ self.mobilenet_v1 = MobileNetV1(features_only=True)
+
+ self.selector = FeatureSelector([10, 22, 26])
+
+ self.layer_indexs = [10, 22, 26]
+ self.fpn = FpnTopDown([256, 512, 1024], 256)
+ self.bottom_up = BottomUp(2, 256, 3, 2)
+
+ def construct(self, x):
+ features = self.mobilenet_v1(x)
+ features = self.selector(features)
+ features = self.fpn(features)
+ features = self.bottom_up(features)
+ return features
+
+class ResNetV1Fpn(nn.Cell):
+ """
+ ResNet with FPN as SSD backbone.
+ """
+ def __init__(self, resnet):
+ super(ResNetV1Fpn, self).__init__()
+ self.resnet = resnet
+ self.fpn = FpnTopDown([512, 1024, 2048], 256)
+ self.bottom_up = BottomUp(2, 256, 3, 2)
+
+ def construct(self, x):
+ _, _, c3, c4, c5 = self.resnet(x)
+ features = self.fpn((c3, c4, c5))
+ features = self.bottom_up(features)
+ return features
+
+
+def mobilenet_v1_fpn(config):
+ return MobileNetV1Fpn(config)
+
+def resnet50_fpn():
+ resnet = resnet50()
+ return ResNetV1Fpn(resnet)
diff --git a/cv/detection/ssd/MindSpore/src/init_params.py b/cv/detection/ssd/MindSpore/src/init_params.py
new file mode 100755
index 0000000000000000000000000000000000000000..8ecd0f41bb06c7c2b893b22d0360360a14d46c9f
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/src/init_params.py
@@ -0,0 +1,50 @@
+# Copyright 2020-2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""Parameters utils"""
+
+from mindspore.common.initializer import initializer, TruncatedNormal
+
+def init_net_param(network, initialize_mode='TruncatedNormal'):
+ """Init the parameters in net."""
+ params = network.trainable_params()
+ for p in params:
+ if 'beta' not in p.name and 'gamma' not in p.name and 'bias' not in p.name:
+ if initialize_mode == 'TruncatedNormal':
+ p.set_data(initializer(TruncatedNormal(0.02), p.data.shape, p.data.dtype))
+ else:
+ p.set_data(initialize_mode, p.data.shape, p.data.dtype)
+
+
+def load_backbone_params(network, param_dict):
+ """Init the parameters from pre-train model, default is mobilenetv2."""
+ for _, param in network.parameters_and_names():
+ param_name = param.name.replace('network.backbone.', '')
+ name_split = param_name.split('.')
+ if 'features_1' in param_name:
+ param_name = param_name.replace('features_1', 'features')
+ if 'features_2' in param_name:
+ param_name = '.'.join(['features', str(int(name_split[1]) + 14)] + name_split[2:])
+ if param_name in param_dict:
+ param.set_data(param_dict[param_name].data)
+
+
+def filter_checkpoint_parameter_by_list(param_dict, filter_list):
+ """remove useless parameters according to filter_list"""
+ for key in list(param_dict.keys()):
+ for name in filter_list:
+ if name in key:
+ print("Delete parameter from checkpoint: ", key)
+ del param_dict[key]
+ break
diff --git a/cv/detection/ssd/MindSpore/src/lr_schedule.py b/cv/detection/ssd/MindSpore/src/lr_schedule.py
new file mode 100755
index 0000000000000000000000000000000000000000..4df26b39056c24e498d8ebb664e868674e4804ab
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/src/lr_schedule.py
@@ -0,0 +1,56 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Learning rate schedule"""
+
+import math
+import numpy as np
+
+
+def get_lr(global_step, lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch):
+ """
+ generate learning rate array
+
+ Args:
+ global_step(int): total steps of the training
+ lr_init(float): init learning rate
+ lr_end(float): end learning rate
+ lr_max(float): max learning rate
+ warmup_epochs(float): number of warmup epochs
+ total_epochs(int): total epoch of training
+ steps_per_epoch(int): steps of one epoch
+
+ Returns:
+ np.array, learning rate array
+ """
+ lr_each_step = []
+ total_steps = steps_per_epoch * total_epochs
+ warmup_steps = steps_per_epoch * warmup_epochs
+ for i in range(total_steps):
+ if i < warmup_steps:
+ lr = lr_init + (lr_max - lr_init) * i / warmup_steps
+ else:
+ lr = lr_end + \
+ (lr_max - lr_end) * \
+ (1. + math.cos(math.pi * (i - warmup_steps) / (total_steps - warmup_steps))) / 2.
+ if lr < 0.0:
+ lr = 0.0
+ lr_each_step.append(lr)
+
+ current_step = global_step
+ lr_each_step = np.array(lr_each_step).astype(np.float32)
+ learning_rate = lr_each_step[current_step:]
+
+ return learning_rate
diff --git a/cv/detection/ssd/MindSpore/src/mobilenet_v1.py b/cv/detection/ssd/MindSpore/src/mobilenet_v1.py
new file mode 100755
index 0000000000000000000000000000000000000000..da64db3001518da0a4343064e30a61d539f56f62
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/src/mobilenet_v1.py
@@ -0,0 +1,125 @@
+# Copyright 2021-2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import mindspore.nn as nn
+import mindspore.ops as ops
+
+def conv_bn_relu(in_channel, out_channel, kernel_size, stride, depthwise, activation='relu6'):
+ output = []
+ output.append(nn.Conv2d(in_channel, out_channel, kernel_size, stride, pad_mode="same",
+ group=1 if not depthwise else in_channel))
+ output.append(nn.BatchNorm2d(out_channel))
+ if activation:
+ output.append(nn.get_activation(activation))
+ return nn.SequentialCell(output)
+
+
+class MobileNetV1(nn.Cell):
+ """
+ MobileNet V1 backbone
+ """
+ def __init__(self, class_num=1001, features_only=False):
+ super(MobileNetV1, self).__init__()
+ self.features_only = features_only
+ cnn = [
+ conv_bn_relu(3, 32, 3, 2, False), # Conv0
+
+ conv_bn_relu(32, 32, 3, 1, True), # Conv1_depthwise
+ conv_bn_relu(32, 64, 1, 1, False), # Conv1_pointwise
+ conv_bn_relu(64, 64, 3, 2, True), # Conv2_depthwise
+ conv_bn_relu(64, 128, 1, 1, False), # Conv2_pointwise
+
+ conv_bn_relu(128, 128, 3, 1, True), # Conv3_depthwise
+ conv_bn_relu(128, 128, 1, 1, False), # Conv3_pointwise
+ conv_bn_relu(128, 128, 3, 2, True), # Conv4_depthwise
+ conv_bn_relu(128, 256, 1, 1, False), # Conv4_pointwise
+
+ conv_bn_relu(256, 256, 3, 1, True), # Conv5_depthwise
+ conv_bn_relu(256, 256, 1, 1, False), # Conv5_pointwise
+ conv_bn_relu(256, 256, 3, 2, True), # Conv6_depthwise
+ conv_bn_relu(256, 512, 1, 1, False), # Conv6_pointwise
+
+ conv_bn_relu(512, 512, 3, 1, True), # Conv7_depthwise
+ conv_bn_relu(512, 512, 1, 1, False), # Conv7_pointwise
+ conv_bn_relu(512, 512, 3, 1, True), # Conv8_depthwise
+ conv_bn_relu(512, 512, 1, 1, False), # Conv8_pointwise
+ conv_bn_relu(512, 512, 3, 1, True), # Conv9_depthwise
+ conv_bn_relu(512, 512, 1, 1, False), # Conv9_pointwise
+ conv_bn_relu(512, 512, 3, 1, True), # Conv10_depthwise
+ conv_bn_relu(512, 512, 1, 1, False), # Conv10_pointwise
+ conv_bn_relu(512, 512, 3, 1, True), # Conv11_depthwise
+ conv_bn_relu(512, 512, 1, 1, False), # Conv11_pointwise
+
+ conv_bn_relu(512, 512, 3, 2, True), # Conv12_depthwise
+ conv_bn_relu(512, 1024, 1, 1, False), # Conv12_pointwise
+ conv_bn_relu(1024, 1024, 3, 1, True), # Conv13_depthwise
+ conv_bn_relu(1024, 1024, 1, 1, False), # Conv13_pointwise
+ ]
+
+ if self.features_only:
+ self.network = nn.CellList(cnn)
+ else:
+ self.network = nn.SequentialCell(cnn)
+ self.fc = nn.Dense(1024, class_num)
+
+ def construct(self, x):
+ output = x
+ if self.features_only:
+ features = ()
+ for block in self.network:
+ output = block(output)
+ features = features + (output,)
+ return features
+ output = self.network(x)
+ output = ops.ReduceMean()(output, (2, 3))
+ output = self.fc(output)
+ return output
+
+class FeatureSelector(nn.Cell):
+ """
+ Select specific layers from an entire feature list
+ """
+ def __init__(self, feature_idxes):
+ super(FeatureSelector, self).__init__()
+ self.feature_idxes = feature_idxes
+
+ def construct(self, feature_list):
+ selected = ()
+ for i in self.feature_idxes:
+ selected = selected + (feature_list[i],)
+ return selected
+
+class MobileNetV1Feature(nn.Cell):
+ """
+ MobileNetV1 with FPN as SSD backbone.
+ """
+ def __init__(self, config):
+ super(MobileNetV1Feature, self).__init__()
+ self.mobilenet_v1 = MobileNetV1(features_only=True)
+
+ self.selector = FeatureSelector([14, 26])
+
+ self.layer_indexs = [14, 26]
+
+ def construct(self, x):
+ features = self.mobilenet_v1(x)
+ features = self.selector(features)
+ return features
+
+def mobilenet_v1(class_num=1001):
+ return MobileNetV1(class_num)
+
+def mobilenet_v1_Feature(config):
+ return MobileNetV1Feature(config)
diff --git a/cv/detection/ssd/MindSpore/src/model_utils/config.py b/cv/detection/ssd/MindSpore/src/model_utils/config.py
new file mode 100755
index 0000000000000000000000000000000000000000..a00dd1174725aac3118de43d00476d52a0bf3aca
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/src/model_utils/config.py
@@ -0,0 +1,130 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Parse arguments"""
+
+import os
+import ast
+import argparse
+from pprint import pformat, pprint
+import yaml
+
+_config_path = "./ssd300_config.yaml"
+
+class Config:
+ """
+ Configuration namespace. Convert dictionary to members.
+ """
+ def __init__(self, cfg_dict):
+ for k, v in cfg_dict.items():
+ if isinstance(v, (list, tuple)):
+ setattr(self, k, [Config(x) if isinstance(x, dict) else x for x in v])
+ else:
+ setattr(self, k, Config(v) if isinstance(v, dict) else v)
+
+ def __str__(self):
+ return pformat(self.__dict__)
+
+ def __repr__(self):
+ return self.__str__()
+
+
+def parse_cli_to_yaml(parser, cfg, helper=None, choices=None, cfg_path="ssd300_config.yaml"):
+ """
+ Parse command line arguments to the configuration according to the default yaml.
+
+ Args:
+ parser: Parent parser.
+ cfg: Base configuration.
+ helper: Helper description.
+ cfg_path: Path to the default yaml config.
+ """
+ parser = argparse.ArgumentParser(description="[REPLACE THIS at config.py]",
+ parents=[parser])
+ helper = {} if helper is None else helper
+ choices = {} if choices is None else choices
+ for item in cfg:
+ if not isinstance(cfg[item], list) and not isinstance(cfg[item], dict):
+ help_description = helper[item] if item in helper else "Please reference to {}".format(cfg_path)
+ choice = choices[item] if item in choices else None
+ if isinstance(cfg[item], bool):
+ parser.add_argument("--" + item, type=ast.literal_eval, default=cfg[item], choices=choice,
+ help=help_description)
+ else:
+ parser.add_argument("--" + item, type=type(cfg[item]), default=cfg[item], choices=choice,
+ help=help_description)
+ args = parser.parse_args()
+ return args
+
+
+def parse_yaml(yaml_path):
+ """
+ Parse the yaml config file.
+
+ Args:
+ yaml_path: Path to the yaml config.
+ """
+ with open(yaml_path, 'r') as fin:
+ try:
+ cfgs = yaml.load_all(fin.read(), Loader=yaml.FullLoader)
+ cfgs = [x for x in cfgs]
+ if len(cfgs) == 1:
+ cfg_helper = {}
+ cfg = cfgs[0]
+ cfg_choices = {}
+ elif len(cfgs) == 2:
+ cfg, cfg_helper = cfgs
+ cfg_choices = {}
+ elif len(cfgs) == 3:
+ cfg, cfg_helper, cfg_choices = cfgs
+ else:
+ raise ValueError("At most 3 docs (config description for help, choices) are supported in config yaml")
+ print(cfg_helper)
+ except:
+ raise ValueError("Failed to parse yaml")
+ return cfg, cfg_helper, cfg_choices
+
+
+def merge(args, cfg):
+ """
+ Merge the base config from yaml file and command line arguments.
+
+ Args:
+ args: Command line arguments.
+ cfg: Base configuration.
+ """
+ args_var = vars(args)
+ for item in args_var:
+ cfg[item] = args_var[item]
+ return cfg
+
+
+def get_config():
+ """
+ Get Config according to the yaml file and cli arguments.
+ """
+ parser = argparse.ArgumentParser(description="default name", add_help=False)
+ current_dir = os.path.dirname(os.path.abspath(__file__))
+ parser.add_argument("--config_path", type=str, default=os.path.join(current_dir, \
+ "../../config/ssd300_config.yaml"), help="Config file path")
+ path_args, _ = parser.parse_known_args()
+ default, helper, choices = parse_yaml(path_args.config_path)
+ args = parse_cli_to_yaml(parser=parser, cfg=default, helper=helper, choices=choices, cfg_path=path_args.config_path)
+ final_config = merge(args, default)
+ pprint(final_config)
+ print("Please check the above information for the configurations", flush=True)
+ return Config(final_config)
+
+config = get_config()
diff --git a/cv/detection/ssd/MindSpore/src/model_utils/device_adapter.py b/cv/detection/ssd/MindSpore/src/model_utils/device_adapter.py
new file mode 100755
index 0000000000000000000000000000000000000000..9c3d21d5e47c22617170887df9da97beff668495
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/src/model_utils/device_adapter.py
@@ -0,0 +1,27 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Device adapter for ModelArts"""
+
+from src.model_utils.config import config
+
+if config.enable_modelarts:
+ from src.model_utils.moxing_adapter import get_device_id, get_device_num, get_rank_id, get_job_id
+else:
+ from src.model_utils.local_adapter import get_device_id, get_device_num, get_rank_id, get_job_id
+
+__all__ = [
+ "get_device_id", "get_device_num", "get_rank_id", "get_job_id"
+]
diff --git a/cv/detection/ssd/MindSpore/src/model_utils/local_adapter.py b/cv/detection/ssd/MindSpore/src/model_utils/local_adapter.py
new file mode 100755
index 0000000000000000000000000000000000000000..769fa6dc78e59eb66dbc8e6773accdc1d08b649e
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/src/model_utils/local_adapter.py
@@ -0,0 +1,36 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Local adapter"""
+
+import os
+
+def get_device_id():
+ device_id = os.getenv('DEVICE_ID', '0')
+ return int(device_id)
+
+
+def get_device_num():
+ device_num = os.getenv('RANK_SIZE', '1')
+ return int(device_num)
+
+
+def get_rank_id():
+ global_rank_id = os.getenv('RANK_ID', '0')
+ return int(global_rank_id)
+
+
+def get_job_id():
+ return "Local Job"
diff --git a/cv/detection/ssd/MindSpore/src/model_utils/moxing_adapter.py b/cv/detection/ssd/MindSpore/src/model_utils/moxing_adapter.py
new file mode 100755
index 0000000000000000000000000000000000000000..72b124bd07b46f04de7575b604bcaa10a6588184
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/src/model_utils/moxing_adapter.py
@@ -0,0 +1,115 @@
+# Copyright 2021-2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Moxing adapter for ModelArts"""
+
+import os
+import functools
+import mindspore as ms
+from src.model_utils.config import config
+
+_global_sync_count = 0
+
+def get_device_id():
+ device_id = os.getenv('DEVICE_ID', '0')
+ return int(device_id)
+
+
+def get_device_num():
+ device_num = os.getenv('RANK_SIZE', '1')
+ return int(device_num)
+
+
+def get_rank_id():
+ global_rank_id = os.getenv('RANK_ID', '0')
+ return int(global_rank_id)
+
+
+def get_job_id():
+ job_id = os.getenv('JOB_ID')
+ job_id = job_id if job_id != "" else "default"
+ return job_id
+
+def sync_data(from_path, to_path):
+ """
+ Download data from remote obs to local directory if the first url is remote url and the second one is local path
+ Upload data from local directory to remote obs in contrast.
+ """
+ import moxing as mox
+ import time
+ global _global_sync_count
+ sync_lock = "/tmp/copy_sync.lock" + str(_global_sync_count)
+ _global_sync_count += 1
+
+ # Each server contains 8 devices as most.
+ if get_device_id() % min(get_device_num(), 8) == 0 and not os.path.exists(sync_lock):
+ print("from path: ", from_path)
+ print("to path: ", to_path)
+ mox.file.copy_parallel(from_path, to_path)
+ print("===finish data synchronization===")
+ try:
+ os.mknod(sync_lock)
+ except IOError:
+ pass
+ print("===save flag===")
+
+ while True:
+ if os.path.exists(sync_lock):
+ break
+ time.sleep(1)
+
+ print("Finish sync data from {} to {}.".format(from_path, to_path))
+
+
+def moxing_wrapper(pre_process=None, post_process=None):
+ """
+ Moxing wrapper to download dataset and upload outputs.
+ """
+ def wrapper(run_func):
+ @functools.wraps(run_func)
+ def wrapped_func(*args, **kwargs):
+ # Download data from data_url
+ if config.enable_modelarts:
+ if config.data_url:
+ sync_data(config.data_url, config.data_path)
+ print("Dataset downloaded: ", os.listdir(config.data_path))
+ if config.checkpoint_url:
+ sync_data(config.checkpoint_url, config.load_path)
+ print("Preload downloaded: ", os.listdir(config.load_path))
+ if config.train_url:
+ sync_data(config.train_url, config.output_path)
+ print("Workspace downloaded: ", os.listdir(config.output_path))
+
+ ms.set_context(save_graphs_path=os.path.join(config.output_path, str(get_rank_id())))
+ config.device_num = get_device_num()
+ config.device_id = get_device_id()
+ if not os.path.exists(config.output_path):
+ os.makedirs(config.output_path)
+
+ if pre_process:
+ pre_process()
+
+ run_func(*args, **kwargs)
+
+ # Upload data to train_url
+ if config.enable_modelarts:
+ if post_process:
+ post_process()
+
+ if config.train_url:
+ print("Start to copy output directory")
+ sync_data(config.output_path, config.train_url)
+ return wrapped_func
+ return wrapper
diff --git a/cv/detection/ssd/MindSpore/src/resnet.py b/cv/detection/ssd/MindSpore/src/resnet.py
new file mode 100755
index 0000000000000000000000000000000000000000..489cbff036165bf90b2fa5611d4bf8b9f2cf8957
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/src/resnet.py
@@ -0,0 +1,216 @@
+# Copyright 2021-2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""ResNet."""
+import mindspore.nn as nn
+import mindspore.ops as ops
+
+
+def _conv3x3(in_channel, out_channel, stride=1):
+ return nn.Conv2d(in_channel, out_channel,
+ kernel_size=3, stride=stride, padding=0, pad_mode='same')
+
+
+def _conv1x1(in_channel, out_channel, stride=1):
+ return nn.Conv2d(in_channel, out_channel, kernel_size=1, stride=stride, padding=0, pad_mode='same')
+
+
+def _conv7x7(in_channel, out_channel, stride=1):
+ return nn.Conv2d(in_channel, out_channel, kernel_size=7, stride=stride, padding=0, pad_mode='same')
+
+
+def _bn(channel):
+ return nn.BatchNorm2d(channel, eps=1e-3, momentum=0.997,
+ gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1)
+
+
+def _bn_last(channel):
+ return nn.BatchNorm2d(channel, eps=1e-3, momentum=0.997,
+ gamma_init=0, beta_init=0, moving_mean_init=0, moving_var_init=1)
+
+class ResidualBlock(nn.Cell):
+ """
+ ResNet V1 residual block definition.
+
+ Args:
+ in_channel (int): Input channel.
+ out_channel (int): Output channel.
+ stride (int): Stride size for the first convolutional layer. Default: 1.
+
+ Returns:
+ Tensor, output tensor.
+
+ Examples:
+ >>> ResidualBlock(3, 256, stride=2)
+ """
+ expansion = 4
+
+ def __init__(self,
+ in_channel,
+ out_channel,
+ stride=1):
+ super(ResidualBlock, self).__init__()
+ self.stride = stride
+ channel = out_channel // self.expansion
+ self.conv1 = _conv1x1(in_channel, channel, stride=1)
+ self.bn1 = _bn(channel)
+ self.conv2 = _conv3x3(channel, channel, stride=stride)
+ self.bn2 = _bn(channel)
+
+ self.conv3 = _conv1x1(channel, out_channel, stride=1)
+ self.bn3 = _bn_last(out_channel)
+ self.relu = nn.ReLU()
+
+ self.down_sample = False
+
+ if stride != 1 or in_channel != out_channel:
+ self.down_sample = True
+ self.down_sample_layer = None
+
+ if self.down_sample:
+ self.down_sample_layer = nn.SequentialCell([_conv1x1(in_channel, out_channel, stride), _bn(out_channel)])
+ self.add = ops.Add()
+
+ def construct(self, x):
+ identity = x
+
+ out = self.conv1(x)
+ out = self.bn1(out)
+ out = self.relu(out)
+ out = self.conv2(out)
+ out = self.bn2(out)
+ out = self.relu(out)
+ out = self.conv3(out)
+ out = self.bn3(out)
+
+ if self.down_sample:
+ identity = self.down_sample_layer(identity)
+
+ out = self.add(out, identity)
+ out = self.relu(out)
+
+ return out
+
+
+class ResNet(nn.Cell):
+ """
+ ResNet architecture.
+
+ Args:
+ block (Cell): Block for network.
+ layer_nums (list): Numbers of block in different layers.
+ in_channels (list): Input channel in each layer.
+ out_channels (list): Output channel in each layer.
+ strides (list): Stride size in each layer.
+ num_classes (int): The number of classes that the training images are belonging to.
+ Returns:
+ Tensor, output tensor.
+
+ Examples:
+ >>> ResNet(ResidualBlock,
+ >>> [3, 4, 6, 3],
+ >>> [64, 256, 512, 1024],
+ >>> [256, 512, 1024, 2048],
+ >>> [1, 2, 2, 2],
+ >>> 10)
+ """
+
+ def __init__(self,
+ block,
+ layer_nums,
+ in_channels,
+ out_channels,
+ strides):
+ super(ResNet, self).__init__()
+
+ if not len(layer_nums) == len(in_channels) == len(out_channels) == 4:
+ raise ValueError("the length of layer_num, in_channels, out_channels list must be 4!")
+ self.conv1 = _conv7x7(3, 64, stride=2)
+ self.bn1 = _bn(64)
+ self.relu = ops.ReLU()
+ self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
+ self.layer1 = self._make_layer(block,
+ layer_nums[0],
+ in_channel=in_channels[0],
+ out_channel=out_channels[0],
+ stride=strides[0])
+ self.layer2 = self._make_layer(block,
+ layer_nums[1],
+ in_channel=in_channels[1],
+ out_channel=out_channels[1],
+ stride=strides[1])
+ self.layer3 = self._make_layer(block,
+ layer_nums[2],
+ in_channel=in_channels[2],
+ out_channel=out_channels[2],
+ stride=strides[2])
+ self.layer4 = self._make_layer(block,
+ layer_nums[3],
+ in_channel=in_channels[3],
+ out_channel=out_channels[3],
+ stride=strides[3])
+
+ def _make_layer(self, block, layer_num, in_channel, out_channel, stride):
+ """
+ Make stage network of ResNet.
+
+ Args:
+ block (Cell): Resnet block.
+ layer_num (int): Layer number.
+ in_channel (int): Input channel.
+ out_channel (int): Output channel.
+ stride (int): Stride size for the first convolutional layer.
+ Returns:
+ SequentialCell, the output layer.
+
+ Examples:
+ >>> _make_layer(ResidualBlock, 3, 128, 256, 2)
+ """
+ layers = []
+
+ resnet_block = block(in_channel, out_channel, stride=stride)
+ layers.append(resnet_block)
+ for _ in range(1, layer_num):
+ resnet_block = block(out_channel, out_channel, stride=1)
+ layers.append(resnet_block)
+ return nn.SequentialCell(layers)
+
+ def construct(self, x):
+ x = self.conv1(x)
+ x = self.bn1(x)
+ x = self.relu(x)
+ c1 = self.maxpool(x)
+
+ c2 = self.layer1(c1)
+ c3 = self.layer2(c2)
+ c4 = self.layer3(c3)
+ c5 = self.layer4(c4)
+ return c1, c2, c3, c4, c5
+
+
+def resnet50():
+ """
+ Get ResNet50 neural network.
+
+ Returns:
+ Cell, cell instance of ResNet50 neural network.
+
+ Examples:
+ >>> net = resnet50()
+ """
+ return ResNet(ResidualBlock,
+ [3, 4, 6, 3],
+ [64, 256, 512, 1024],
+ [256, 512, 1024, 2048],
+ [1, 2, 2, 2])
diff --git a/cv/detection/ssd/MindSpore/src/ssd.py b/cv/detection/ssd/MindSpore/src/ssd.py
new file mode 100755
index 0000000000000000000000000000000000000000..763eab96bada24181ee6376e63101b4ca8de6f9f
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/src/ssd.py
@@ -0,0 +1,769 @@
+# Copyright 2020-2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""SSD net based MobilenetV2."""
+
+import mindspore as ms
+import mindspore.nn as nn
+from mindspore import Tensor
+from mindspore.context import ParallelMode
+from mindspore.parallel._auto_parallel_context import auto_parallel_context
+from mindspore.communication.management import get_group_size
+import mindspore.ops as ops
+from .fpn import mobilenet_v1_fpn, resnet50_fpn
+from .vgg16 import vgg16
+from .mobilenet_v1 import mobilenet_v1_Feature
+
+
+def _make_divisible(v, divisor, min_value=None):
+ """nsures that all layers have a channel number that is divisible by 8."""
+ if min_value is None:
+ min_value = divisor
+ new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+ # Make sure that round down does not go down by more than 10%.
+ if new_v < 0.9 * v:
+ new_v += divisor
+ return new_v
+
+
+def _conv2d(in_channel, out_channel, kernel_size=3, stride=1, pad_mod='same'):
+ return nn.Conv2d(in_channel, out_channel, kernel_size=kernel_size, stride=stride,
+ padding=0, pad_mode=pad_mod, has_bias=True)
+
+
+def _bn(channel):
+ return nn.BatchNorm2d(channel, eps=1e-3, momentum=0.97,
+ gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1)
+
+
+def _last_conv2d(in_channel, out_channel, kernel_size=3, stride=1, pad_mod='same', pad=0):
+ in_channels = in_channel
+ out_channels = in_channel
+ depthwise_conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, pad_mode='same',
+ padding=pad, group=in_channels)
+ conv = _conv2d(in_channel, out_channel, kernel_size=1)
+ return nn.SequentialCell([depthwise_conv, _bn(in_channel), nn.ReLU6(), conv])
+
+
+class ConvBNReLU(nn.Cell):
+ """
+ Convolution/Depthwise fused with Batchnorm and ReLU block definition.
+
+ Args:
+ in_planes (int): Input channel.
+ out_planes (int): Output channel.
+ kernel_size (int): Input kernel size.
+ stride (int): Stride size for the first convolutional layer. Default: 1.
+ groups (int): channel group. Convolution is 1 while Depthiwse is input channel. Default: 1.
+ shared_conv(Cell): Use the weight shared conv, default: None.
+
+ Returns:
+ Tensor, output tensor.
+
+ Examples:
+ >>> ConvBNReLU(16, 256, kernel_size=1, stride=1, groups=1)
+ """
+ def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1, shared_conv=None):
+ super(ConvBNReLU, self).__init__()
+ padding = 0
+ in_channels = in_planes
+ out_channels = out_planes
+ if shared_conv is None:
+ if groups == 1:
+ conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, pad_mode='same', padding=padding)
+ else:
+ out_channels = in_planes
+ conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, pad_mode='same',
+ padding=padding, group=in_channels)
+ layers = [conv, _bn(out_planes), nn.ReLU6()]
+ else:
+ layers = [shared_conv, _bn(out_planes), nn.ReLU6()]
+ self.features = nn.SequentialCell(layers)
+
+ def construct(self, x):
+ output = self.features(x)
+ return output
+
+
+class InvertedResidual(nn.Cell):
+ """
+ Residual block definition.
+
+ Args:
+ inp (int): Input channel.
+ oup (int): Output channel.
+ stride (int): Stride size for the first convolutional layer. Default: 1.
+ expand_ratio (int): expand ration of input channel
+
+ Returns:
+ Tensor, output tensor.
+
+ Examples:
+ >>> ResidualBlock(3, 256, 1, 1)
+ """
+ def __init__(self, inp, oup, stride, expand_ratio, last_relu=False):
+ super(InvertedResidual, self).__init__()
+ assert stride in [1, 2]
+
+ hidden_dim = int(round(inp * expand_ratio))
+ self.use_res_connect = stride == 1 and inp == oup
+
+ layers = []
+ if expand_ratio != 1:
+ layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
+ layers.extend([
+ # dw
+ ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
+ # pw-linear
+ nn.Conv2d(hidden_dim, oup, kernel_size=1, stride=1, has_bias=False),
+ _bn(oup),
+ ])
+ self.conv = nn.SequentialCell(layers)
+ self.cast = ops.Cast()
+ self.last_relu = last_relu
+ self.relu = nn.ReLU6()
+
+ def construct(self, x):
+ identity = x
+ x = self.conv(x)
+ if self.use_res_connect:
+ x = identity + x
+ if self.last_relu:
+ x = self.relu(x)
+ return x
+
+
+class FlattenConcat(nn.Cell):
+ """
+ Concatenate predictions into a single tensor.
+
+ Args:
+ config (dict): The default config of SSD.
+
+ Returns:
+ Tensor, flatten predictions.
+ """
+ def __init__(self, config):
+ super(FlattenConcat, self).__init__()
+ self.num_ssd_boxes = config.num_ssd_boxes
+ self.concat = ops.Concat(axis=1)
+ self.transpose = ops.Transpose()
+ def construct(self, inputs):
+ output = ()
+ batch_size = ops.shape(inputs[0])[0]
+ for x in inputs:
+ x = self.transpose(x, (0, 2, 3, 1))
+ output += (ops.reshape(x, (batch_size, -1)),)
+ res = self.concat(output)
+ return ops.reshape(res, (batch_size, self.num_ssd_boxes, -1))
+
+
+class MultiBox(nn.Cell):
+ """
+ Multibox conv layers. Each multibox layer contains class conf scores and localization predictions.
+
+ Args:
+ config (dict): The default config of SSD.
+
+ Returns:
+ Tensor, localization predictions.
+ Tensor, class conf scores.
+ """
+ def __init__(self, config):
+ super(MultiBox, self).__init__()
+ num_classes = config.num_classes
+ out_channels = config.extras_out_channels
+ num_default = config.num_default
+
+ loc_layers = []
+ cls_layers = []
+ for k, out_channel in enumerate(out_channels):
+ loc_layers += [_last_conv2d(out_channel, 4 * num_default[k],
+ kernel_size=3, stride=1, pad_mod='same', pad=0)]
+ cls_layers += [_last_conv2d(out_channel, num_classes * num_default[k],
+ kernel_size=3, stride=1, pad_mod='same', pad=0)]
+
+ self.multi_loc_layers = nn.layer.CellList(loc_layers)
+ self.multi_cls_layers = nn.layer.CellList(cls_layers)
+ self.flatten_concat = FlattenConcat(config)
+
+ def construct(self, inputs):
+ loc_outputs = ()
+ cls_outputs = ()
+ for i in range(len(self.multi_loc_layers)):
+ loc_outputs += (self.multi_loc_layers[i](inputs[i]),)
+ cls_outputs += (self.multi_cls_layers[i](inputs[i]),)
+ return self.flatten_concat(loc_outputs), self.flatten_concat(cls_outputs)
+
+
+class WeightSharedMultiBox(nn.Cell):
+ """
+ Weight shared Multi-box conv layers. Each multi-box layer contains class conf scores and localization predictions.
+ All box predictors shares the same conv weight in different features.
+
+ Args:
+ config (dict): The default config of SSD.
+ loc_cls_shared_addition(bool): Whether the location predictor and classifier prediction share the
+ same addition layer.
+ Returns:
+ Tensor, localization predictions.
+ Tensor, class conf scores.
+ """
+ def __init__(self, config, loc_cls_shared_addition=False):
+ super(WeightSharedMultiBox, self).__init__()
+ num_classes = config.num_classes
+ out_channels = config.extras_out_channels[0]
+ num_default = config.num_default[0]
+ num_features = len(config.feature_size)
+ num_addition_layers = config.num_addition_layers
+ self.loc_cls_shared_addition = loc_cls_shared_addition
+
+ if not loc_cls_shared_addition:
+ loc_convs = [
+ _conv2d(out_channels, out_channels, 3, 1) for x in range(num_addition_layers)
+ ]
+ cls_convs = [
+ _conv2d(out_channels, out_channels, 3, 1) for x in range(num_addition_layers)
+ ]
+ addition_loc_layer_list = []
+ addition_cls_layer_list = []
+ for _ in range(num_features):
+ addition_loc_layer = [
+ ConvBNReLU(out_channels, out_channels, 3, 1, 1, loc_convs[x]) for x in range(num_addition_layers)
+ ]
+ addition_cls_layer = [
+ ConvBNReLU(out_channels, out_channels, 3, 1, 1, cls_convs[x]) for x in range(num_addition_layers)
+ ]
+ addition_loc_layer_list.append(nn.SequentialCell(addition_loc_layer))
+ addition_cls_layer_list.append(nn.SequentialCell(addition_cls_layer))
+ self.addition_layer_loc = nn.CellList(addition_loc_layer_list)
+ self.addition_layer_cls = nn.CellList(addition_cls_layer_list)
+ else:
+ convs = [
+ _conv2d(out_channels, out_channels, 3, 1) for x in range(num_addition_layers)
+ ]
+ addition_layer_list = []
+ for _ in range(num_features):
+ addition_layers = [
+ ConvBNReLU(out_channels, out_channels, 3, 1, 1, convs[x]) for x in range(num_addition_layers)
+ ]
+ addition_layer_list.append(nn.SequentialCell(addition_layers))
+ self.addition_layer = nn.SequentialCell(addition_layer_list)
+
+ loc_layers = [_conv2d(out_channels, 4 * num_default,
+ kernel_size=3, stride=1, pad_mod='same')]
+ cls_layers = [_conv2d(out_channels, num_classes * num_default,
+ kernel_size=3, stride=1, pad_mod='same')]
+
+ self.loc_layers = nn.SequentialCell(loc_layers)
+ self.cls_layers = nn.SequentialCell(cls_layers)
+ self.flatten_concat = FlattenConcat(config)
+
+ def construct(self, inputs):
+ loc_outputs = ()
+ cls_outputs = ()
+ num_heads = len(inputs)
+ for i in range(num_heads):
+ if self.loc_cls_shared_addition:
+ features = self.addition_layer[i](inputs[i])
+ loc_outputs += (self.loc_layers(features),)
+ cls_outputs += (self.cls_layers(features),)
+ else:
+ features = self.addition_layer_loc[i](inputs[i])
+ loc_outputs += (self.loc_layers(features),)
+ features = self.addition_layer_cls[i](inputs[i])
+ cls_outputs += (self.cls_layers(features),)
+ return self.flatten_concat(loc_outputs), self.flatten_concat(cls_outputs)
+
+
+class SSD300(nn.Cell):
+ """
+ SSD300 Network. Default backbone is resnet34.
+
+ Args:
+ backbone (Cell): Backbone Network.
+ config (dict): The default config of SSD.
+
+ Returns:
+ Tensor, localization predictions.
+ Tensor, class conf scores.
+
+ Examples:backbone
+ SSD300(backbone=resnet34(num_classes=None),
+ config=config).
+ """
+ def __init__(self, backbone, config, is_training=True):
+ super(SSD300, self).__init__()
+
+ self.backbone = backbone
+ in_channels = config.extras_in_channels
+ out_channels = config.extras_out_channels
+ ratios = config.extras_ratio
+ strides = config.extras_strides
+ residual_list = []
+ for i in range(2, len(in_channels)):
+ residual = InvertedResidual(in_channels[i], out_channels[i], stride=strides[i],
+ expand_ratio=ratios[i], last_relu=True)
+ residual_list.append(residual)
+ self.multi_residual = nn.layer.CellList(residual_list)
+ self.multi_box = MultiBox(config)
+ self.is_training = is_training
+ if not is_training:
+ self.activation = ops.Sigmoid()
+
+ def construct(self, x):
+ layer_out_13, output = self.backbone(x)
+ multi_feature = (layer_out_13, output)
+ feature = output
+ for residual in self.multi_residual:
+ feature = residual(feature)
+ multi_feature += (feature,)
+ pred_loc, pred_label = self.multi_box(multi_feature)
+ if not self.is_training:
+ pred_label = self.activation(pred_label)
+ pred_loc = ops.cast(pred_loc, ms.float32)
+ pred_label = ops.cast(pred_label, ms.float32)
+ return pred_loc, pred_label
+
+
+class SsdMobilenetV1Fpn(nn.Cell):
+ """
+ SSD Network using mobilenetV1 with fpn to extract features
+
+ Args:
+ config (dict): The default config of SSD.
+ is_training (bool): Used for training, default is True.
+
+ Returns:
+ Tensor, localization predictions.
+ Tensor, class conf scores.
+
+ Examples:backbone
+ SsdMobilenetV1Fpn(config, True).
+ """
+ def __init__(self, config):
+ super(SsdMobilenetV1Fpn, self).__init__()
+ self.multi_box = WeightSharedMultiBox(config)
+ self.activation = ops.Sigmoid()
+ self.feature_extractor = mobilenet_v1_fpn(config)
+
+ def construct(self, x):
+ features = self.feature_extractor(x)
+ pred_loc, pred_label = self.multi_box(features)
+ if not self.training:
+ pred_label = self.activation(pred_label)
+ pred_loc = ops.cast(pred_loc, ms.float32)
+ pred_label = ops.cast(pred_label, ms.float32)
+ return pred_loc, pred_label
+
+
+class SsdMobilenetV1Feature(nn.Cell):
+ """
+ SSD Network using mobilenetV1 with fpn to extract features
+
+ Args:
+ config (dict): The default config of SSD.
+ is_training (bool): Used for training, default is True.
+
+ Returns:
+ Tensor, localization predictions.
+ Tensor, class conf scores.
+
+ Examples:backbone
+ SsdMobilenetV1Feature(config, True).
+ """
+ def __init__(self, config, is_training=True):
+ super(SsdMobilenetV1Feature, self).__init__()
+ self.multi_box = MultiBox(config)
+ self.activation = ops.Sigmoid()
+ self.feature_extractor = mobilenet_v1_Feature(config)
+ in_channels = config.extras_in_channels
+ out_channels = config.extras_out_channels
+ strides = config.extras_strides
+ residual_list = []
+ for i in range(2, len(in_channels)):
+ residual = ConvBNReLU(in_channels[i], out_channels[i], stride=strides[i],
+ )
+ residual_list.append(residual)
+ self.multi_residual = nn.layer.CellList(residual_list)
+ self.multi_box = MultiBox(config)
+ self.is_training = is_training
+ if not is_training:
+ self.activation = ops.Sigmoid()
+
+ def construct(self, x):
+ feature, output = self.feature_extractor(x)
+ multi_feature = (feature, output)
+ feature = output
+ for residual in self.multi_residual:
+ feature = residual(feature)
+ multi_feature += (feature,)
+ pred_loc, pred_label = self.multi_box(multi_feature)
+ if not self.training:
+ pred_label = self.activation(pred_label)
+ pred_loc = ops.cast(pred_loc, ms.float32)
+ pred_label = ops.cast(pred_label, ms.float32)
+ return pred_loc, pred_label
+
+
+class SsdResNet50Fpn(nn.Cell):
+ """
+ SSD Network using ResNet50 with fpn to extract features
+
+ Args:
+ config (dict): The default config of SSD.
+
+ Returns:
+ Tensor, localization predictions.
+ Tensor, class conf scores.
+
+ Examples:backbone
+ SsdResNet50Fpn(config).
+ """
+ def __init__(self, config):
+ super(SsdResNet50Fpn, self).__init__()
+ self.multi_box = WeightSharedMultiBox(config)
+ self.activation = ops.Sigmoid()
+ self.feature_extractor = resnet50_fpn()
+
+ def construct(self, x):
+ features = self.feature_extractor(x)
+ pred_loc, pred_label = self.multi_box(features)
+ if not self.training:
+ pred_label = self.activation(pred_label)
+ pred_loc = ops.cast(pred_loc, ms.float32)
+ pred_label = ops.cast(pred_label, ms.float32)
+ return pred_loc, pred_label
+
+
+class SigmoidFocalClassificationLoss(nn.Cell):
+ """"
+ Sigmoid focal-loss for classification.
+
+ Args:
+ gamma (float): Hyper-parameter to balance the easy and hard examples. Default: 2.0
+ alpha (float): Hyper-parameter to balance the positive and negative example. Default: 0.25
+
+ Returns:
+ Tensor, the focal loss.
+ """
+ def __init__(self, gamma=2.0, alpha=0.25):
+ super(SigmoidFocalClassificationLoss, self).__init__()
+ self.sigmiod_cross_entropy = ops.SigmoidCrossEntropyWithLogits()
+ self.sigmoid = ops.Sigmoid()
+ self.pow = ops.Pow()
+ self.onehot = ops.OneHot()
+ self.on_value = Tensor(1.0, ms.float32)
+ self.off_value = Tensor(0.0, ms.float32)
+ self.gamma = gamma
+ self.alpha = alpha
+
+ def construct(self, logits, label):
+ label = self.onehot(label, ops.shape(logits)[-1], self.on_value, self.off_value)
+ sigmiod_cross_entropy = self.sigmiod_cross_entropy(logits, label)
+ sigmoid = self.sigmoid(logits)
+ label = ops.cast(label, ms.float32)
+ p_t = label * sigmoid + (1 - label) * (1 - sigmoid)
+ modulating_factor = self.pow(1 - p_t, self.gamma)
+ alpha_weight_factor = label * self.alpha + (1 - label) * (1 - self.alpha)
+ focal_loss = modulating_factor * alpha_weight_factor * sigmiod_cross_entropy
+ return focal_loss
+
+
+class SSDWithLossCell(nn.Cell):
+ """"
+ Provide SSD training loss through network.
+
+ Args:
+ network (Cell): The training network.
+ config (dict): SSD config.
+
+ Returns:
+ Tensor, the loss of the network.
+ """
+ def __init__(self, network, config):
+ super(SSDWithLossCell, self).__init__()
+ self.network = network
+ self.less = ops.Less()
+ self.tile = ops.Tile()
+ self.reduce_sum = ops.ReduceSum()
+ self.expand_dims = ops.ExpandDims()
+ self.class_loss = SigmoidFocalClassificationLoss(config.gamma, config.alpha)
+ self.loc_loss = nn.SmoothL1Loss()
+
+ def construct(self, x, gt_loc, gt_label, num_matched_boxes):
+ pred_loc, pred_label = self.network(x)
+ mask = ops.cast(self.less(0, gt_label), ms.float32)
+ num_matched_boxes = self.reduce_sum(ops.cast(num_matched_boxes, ms.float32))
+
+ # Localization Loss
+ mask_loc = self.tile(self.expand_dims(mask, -1), (1, 1, 4))
+ smooth_l1 = self.loc_loss(pred_loc, gt_loc) * mask_loc
+ loss_loc = self.reduce_sum(self.reduce_sum(smooth_l1, -1), -1)
+
+ # Classification Loss
+ loss_cls = self.class_loss(pred_label, gt_label)
+ loss_cls = self.reduce_sum(loss_cls, (1, 2))
+
+ return self.reduce_sum((loss_cls + loss_loc) / num_matched_boxes)
+
+
+grad_scale = ops.MultitypeFuncGraph("grad_scale")
+@grad_scale.register("Tensor", "Tensor")
+def tensor_grad_scale(scale, grad):
+ return grad * ops.Reciprocal()(scale)
+
+
+class TrainingWrapper(nn.Cell):
+ """
+ Encapsulation class of SSD network training.
+
+ Append an optimizer to the training network after that the construct
+ function can be called to create the backward graph.
+
+ Args:
+ network (Cell): The training network. Note that loss function should have been added.
+ optimizer (Optimizer): Optimizer for updating the weights.
+ sens (Number): The adjust parameter. Default: 1.0.
+ use_global_nrom(bool): Whether apply global norm before optimizer. Default: False
+ """
+ def __init__(self, network, optimizer, sens=1.0, use_global_norm=False):
+ super(TrainingWrapper, self).__init__(auto_prefix=False)
+ self.network = network
+ self.network.set_grad()
+ self.weights = ms.ParameterTuple(network.trainable_params())
+ self.optimizer = optimizer
+ self.grad = ops.GradOperation(get_by_list=True, sens_param=True)
+ self.sens = sens
+ self.reducer_flag = False
+ self.grad_reducer = None
+ self.use_global_norm = use_global_norm
+ self.parallel_mode = ms.get_auto_parallel_context("parallel_mode")
+ if self.parallel_mode in [ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL]:
+ self.reducer_flag = True
+ if self.reducer_flag:
+ mean = ms.get_auto_parallel_context("gradients_mean")
+ if auto_parallel_context().get_device_num_is_set():
+ degree = ms.get_auto_parallel_context("device_num")
+ else:
+ degree = get_group_size()
+ self.grad_reducer = nn.DistributedGradReducer(optimizer.parameters, mean, degree)
+ self.hyper_map = ops.HyperMap()
+
+ def construct(self, *args):
+ weights = self.weights
+ loss = self.network(*args)
+ sens = ops.Fill()(ops.DType()(loss), ops.Shape()(loss), self.sens)
+ grads = self.grad(self.network, weights)(*args, sens)
+ if self.reducer_flag:
+ # apply grad reducer on grads
+ grads = self.grad_reducer(grads)
+ if self.use_global_norm:
+ grads = self.hyper_map(ops.partial(grad_scale, ops.scalar_to_array(self.sens)), grads)
+ grads = ops.clip_by_global_norm(grads)
+ self.optimizer(grads)
+ return loss
+
+
+class SSDWithMobileNetV2(nn.Cell):
+ """
+ MobileNetV2 architecture for SSD backbone.
+
+ Args:
+ width_mult (int): Channels multiplier for round to 8/16 and others. Default is 1.
+ inverted_residual_setting (list): Inverted residual settings. Default is None
+ round_nearest (list): Channel round to. Default is 8
+ Returns:
+ Tensor, the 13th feature after ConvBNReLU in MobileNetV2.
+ Tensor, the last feature in MobileNetV2.
+
+ Examples:
+ >>> SSDWithMobileNetV2()
+ """
+ def __init__(self, width_mult=1.0, inverted_residual_setting=None, round_nearest=8):
+ super(SSDWithMobileNetV2, self).__init__()
+ block = InvertedResidual
+ input_channel = 32
+ last_channel = 1280
+
+ if inverted_residual_setting is None:
+ inverted_residual_setting = [
+ # t, c, n, s
+ [1, 16, 1, 1],
+ [6, 24, 2, 2],
+ [6, 32, 3, 2],
+ [6, 64, 4, 2],
+ [6, 96, 3, 1],
+ [6, 160, 3, 2],
+ [6, 320, 1, 1],
+ ]
+ if len(inverted_residual_setting[0]) != 4:
+ raise ValueError("inverted_residual_setting should be non-empty "
+ "or a 4-element list, got {}".format(inverted_residual_setting))
+
+ #building first layer
+ input_channel = _make_divisible(input_channel * width_mult, round_nearest)
+ self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
+ features = [ConvBNReLU(3, input_channel, stride=2)]
+ # building inverted residual blocks
+ layer_index = 0
+ for t, c, n, s in inverted_residual_setting:
+ output_channel = _make_divisible(c * width_mult, round_nearest)
+ for i in range(n):
+ if layer_index == 13:
+ hidden_dim = int(round(input_channel * t))
+ self.expand_layer_conv_13 = ConvBNReLU(input_channel, hidden_dim, kernel_size=1)
+ stride = s if i == 0 else 1
+ features.append(block(input_channel, output_channel, stride, expand_ratio=t))
+ input_channel = output_channel
+ layer_index += 1
+ # building last several layers
+ features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1))
+
+ self.features_1 = nn.SequentialCell(features[:14])
+ self.features_2 = nn.SequentialCell(features[14:])
+
+ def construct(self, x):
+ out = self.features_1(x)
+ expand_layer_conv_13 = self.expand_layer_conv_13(out)
+ out = self.features_2(out)
+ return expand_layer_conv_13, out
+
+ def get_out_channels(self):
+ return self.last_channel
+
+
+class SsdInferWithDecoder(nn.Cell):
+ """
+ SSD Infer wrapper to decode the bbox locations.
+
+ Args:
+ network (Cell): the origin ssd infer network without bbox decoder.
+ default_boxes (Tensor): the default_boxes from anchor generator
+ config (dict): ssd config
+ Returns:
+ Tensor, the locations for bbox after decoder representing (y0,x0,y1,x1)
+ Tensor, the prediction labels.
+
+ """
+ def __init__(self, network, default_boxes, config):
+ super(SsdInferWithDecoder, self).__init__()
+ self.network = network
+ self.default_boxes = default_boxes
+ self.prior_scaling_xy = config.prior_scaling[0]
+ self.prior_scaling_wh = config.prior_scaling[1]
+
+ def construct(self, x):
+ pred_loc, pred_label = self.network(x)
+
+ default_bbox_xy = self.default_boxes[..., :2]
+ default_bbox_wh = self.default_boxes[..., 2:]
+ pred_xy = pred_loc[..., :2] * self.prior_scaling_xy * default_bbox_wh + default_bbox_xy
+ pred_wh = ops.Exp()(pred_loc[..., 2:] * self.prior_scaling_wh) * default_bbox_wh
+
+ pred_xy_0 = pred_xy - pred_wh / 2.0
+ pred_xy_1 = pred_xy + pred_wh / 2.0
+ pred_xy = ops.Concat(-1)((pred_xy_0, pred_xy_1))
+ pred_xy = ops.Maximum()(pred_xy, 0)
+ pred_xy = ops.Minimum()(pred_xy, 1)
+ return pred_xy, pred_label
+
+
+def ssd_mobilenet_v1_fpn(**kwargs):
+ return SsdMobilenetV1Fpn(**kwargs)
+
+def ssd_mobilenet_v1(**kwargs):
+ return SsdMobilenetV1Feature(**kwargs)
+
+def ssd_resnet50_fpn(**kwargs):
+ return SsdResNet50Fpn(**kwargs)
+
+def ssd_mobilenet_v2(**kwargs):
+ return SSDWithMobileNetV2(**kwargs)
+
+
+class SSD300VGG16(nn.Cell):
+ def __init__(self, config):
+ super(SSD300VGG16, self).__init__()
+
+ # VGG16 backbone: block1~5
+ self.backbone = vgg16()
+
+ # SSD blocks: block6~7
+ self.b6_1 = nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, padding=6, dilation=6, pad_mode='pad')
+ self.b6_2 = nn.Dropout(0.5)
+
+ self.b7_1 = nn.Conv2d(in_channels=1024, out_channels=1024, kernel_size=1)
+ self.b7_2 = nn.Dropout(0.5)
+
+ # Extra Feature Layers: block8~11
+ self.b8_1 = nn.Conv2d(in_channels=1024, out_channels=256, kernel_size=1, padding=1, pad_mode='pad')
+ self.b8_2 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=2, pad_mode='valid')
+
+ self.b9_1 = nn.Conv2d(in_channels=512, out_channels=128, kernel_size=1, padding=1, pad_mode='pad')
+ self.b9_2 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, pad_mode='valid')
+
+ self.b10_1 = nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1)
+ self.b10_2 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, pad_mode='valid')
+
+ self.b11_1 = nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1)
+ self.b11_2 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, pad_mode='valid')
+
+ # boxes
+ self.multi_box = MultiBox(config)
+ if not self.training:
+ self.activation = ops.Sigmoid()
+
+ def construct(self, x):
+ # VGG16 backbone: block1~5
+ block4, x = self.backbone(x)
+
+ # SSD blocks: block6~7
+ x = self.b6_1(x) # 1024
+ x = self.b6_2(x)
+
+ x = self.b7_1(x) # 1024
+ x = self.b7_2(x)
+ block7 = x
+
+ # Extra Feature Layers: block8~11
+ x = self.b8_1(x) # 256
+ x = self.b8_2(x) # 512
+ block8 = x
+
+ x = self.b9_1(x) # 128
+ x = self.b9_2(x) # 256
+ block9 = x
+
+ x = self.b10_1(x) # 128
+ x = self.b10_2(x) # 256
+ block10 = x
+
+ x = self.b11_1(x) # 128
+ x = self.b11_2(x) # 256
+ block11 = x
+
+ # boxes
+ multi_feature = (block4, block7, block8, block9, block10, block11)
+ pred_loc, pred_label = self.multi_box(multi_feature)
+ if not self.training:
+ pred_label = self.activation(pred_label)
+ pred_loc = ops.cast(pred_loc, ms.float32)
+ pred_label = ops.cast(pred_label, ms.float32)
+ return pred_loc, pred_label
+
+
+def ssd_vgg16(**kwargs):
+ return SSD300VGG16(**kwargs)
diff --git a/cv/detection/ssd/MindSpore/src/vgg16.py b/cv/detection/ssd/MindSpore/src/vgg16.py
new file mode 100755
index 0000000000000000000000000000000000000000..af02a823a839829bd08d5a331760d815f5eb839b
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/src/vgg16.py
@@ -0,0 +1,99 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""VGG16 backbone for SSD"""
+
+from mindspore import nn
+from src.model_utils.config import config
+
+pretrain_vgg_bn = config.pretrain_vgg_bn
+ssd_vgg_bn = config.ssd_vgg_bn
+
+
+def _get_key_mapper():
+ vgg_key_num = [1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5]
+ size = len(vgg_key_num)
+
+ pretrain_vgg_bn_false = [0, 2, 5, 7, 10, 12, 14, 17, 19, 21, 24, 26, 28]
+ pretrain_vgg_bn_true = [0, 3, 7, 10, 14, 17, 20, 24, 27, 30, 34, 37, 40]
+ ssd_vgg_bn_false = [0, 2, 0, 2, 0, 2, 4, 0, 2, 4, 0, 2, 4]
+ ssd_vgg_bn_true = [0, 3, 0, 3, 0, 3, 6, 0, 3, 6, 0, 3, 6]
+
+ pretrain_vgg_keys = pretrain_vgg_bn_true if pretrain_vgg_bn else pretrain_vgg_bn_false
+ ssd_vgg_keys = ssd_vgg_bn_true if ssd_vgg_bn else ssd_vgg_bn_false
+
+ pretrain_vgg_keys = ['layers.' + str(pretrain_vgg_keys[i]) for i in range(size)]
+ ssd_vgg_keys = ['b' + str(vgg_key_num[i]) + '.' + str(ssd_vgg_keys[i]) for i in range(size)]
+
+ return {pretrain_vgg_keys[i]: ssd_vgg_keys[i] for i in range(size)}
+
+
+ssd_vgg_key_mapper = _get_key_mapper()
+
+
+def _make_layer(channels):
+ in_channels = channels[0]
+ layers = []
+ for out_channels in channels[1:]:
+ layers.append(nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3))
+ if ssd_vgg_bn:
+ layers.append(nn.BatchNorm2d(out_channels))
+ layers.append(nn.ReLU())
+ in_channels = out_channels
+ return nn.SequentialCell(layers)
+
+
+class VGG16(nn.Cell):
+ def __init__(self):
+ super(VGG16, self).__init__()
+ self.b1 = _make_layer([3, 64, 64])
+ self.b2 = _make_layer([64, 128, 128])
+ self.b3 = _make_layer([128, 256, 256, 256])
+ self.b4 = _make_layer([256, 512, 512, 512])
+ self.b5 = _make_layer([512, 512, 512, 512])
+
+ self.m1 = nn.MaxPool2d(kernel_size=2, stride=2, pad_mode='SAME')
+ self.m2 = nn.MaxPool2d(kernel_size=2, stride=2, pad_mode='SAME')
+ self.m3 = nn.MaxPool2d(kernel_size=2, stride=2, pad_mode='SAME')
+ self.m4 = nn.MaxPool2d(kernel_size=2, stride=2, pad_mode='SAME')
+ self.m5 = nn.MaxPool2d(kernel_size=3, stride=1, pad_mode='SAME')
+
+ def construct(self, x):
+ # block1
+ x = self.b1(x)
+ x = self.m1(x)
+
+ # block2
+ x = self.b2(x)
+ x = self.m2(x)
+
+ # block3
+ x = self.b3(x)
+ x = self.m3(x)
+
+ # block4
+ x = self.b4(x)
+ block4 = x
+ x = self.m4(x)
+
+ # block5
+ x = self.b5(x)
+ x = self.m5(x)
+
+ return block4, x
+
+
+def vgg16():
+ return VGG16()
diff --git a/cv/detection/ssd/MindSpore/train.py b/cv/detection/ssd/MindSpore/train.py
new file mode 100755
index 0000000000000000000000000000000000000000..e18062d5f409ce7f16fa2ef35bd4e9465b110988
--- /dev/null
+++ b/cv/detection/ssd/MindSpore/train.py
@@ -0,0 +1,199 @@
+# Copyright 2020-2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Train SSD and get checkpoint files."""
+
+import os
+import mindspore as ms
+import mindspore.nn as nn
+from mindspore import Tensor
+from mindspore.communication.management import init, get_rank
+from mindspore.train.callback import CheckpointConfig, ModelCheckpoint, LossMonitor, TimeMonitor
+from mindspore.train import Model
+from mindspore.context import ParallelMode
+from mindspore.common import set_seed, dtype
+from src.ssd import SSD300, SsdInferWithDecoder, SSDWithLossCell, TrainingWrapper, ssd_mobilenet_v2,\
+ ssd_mobilenet_v1_fpn, ssd_mobilenet_v1, ssd_resnet50_fpn, ssd_vgg16
+from src.dataset import create_ssd_dataset, create_mindrecord
+from src.lr_schedule import get_lr
+from src.init_params import init_net_param, filter_checkpoint_parameter_by_list
+from src.eval_callback import EvalCallBack
+from src.eval_utils import apply_eval
+from src.box_utils import default_boxes
+from src.model_utils.config import config
+from src.model_utils.moxing_adapter import moxing_wrapper
+
+set_seed(1)
+
+def ssd_model_build():
+ if config.model_name == "ssd300":
+ backbone = ssd_mobilenet_v2()
+ ssd = SSD300(backbone=backbone, config=config)
+ init_net_param(ssd)
+ if config.freeze_layer == "backbone":
+ for param in backbone.feature_1.trainable_params():
+ param.requires_grad = False
+ elif config.model_name == "ssd_mobilenet_v1_fpn":
+ ssd = ssd_mobilenet_v1_fpn(config=config)
+ init_net_param(ssd)
+ if config.feature_extractor_base_param != "":
+ param_dict = ms.load_checkpoint(config.feature_extractor_base_param)
+ for x in list(param_dict.keys()):
+ param_dict["network.feature_extractor.mobilenet_v1." + x] = param_dict[x]
+ del param_dict[x]
+ ms.load_param_into_net(ssd.feature_extractor.mobilenet_v1.network, param_dict)
+ elif config.model_name == "ssd_mobilenet_v1":
+ ssd = ssd_mobilenet_v1(config=config)
+ init_net_param(ssd)
+ if config.feature_extractor_base_param != "":
+ param_dict = ms.load_checkpoint(config.feature_extractor_base_param)
+ for x in list(param_dict.keys()):
+ param_dict["network.feature_extractor.mobilenet_v1." + x] = param_dict[x]
+ del param_dict[x]
+ ms.load_param_into_net(ssd.feature_extractor.mobilenet_v1.network, param_dict)
+ elif config.model_name == "ssd_resnet50_fpn":
+ ssd = ssd_resnet50_fpn(config=config)
+ init_net_param(ssd)
+ if config.feature_extractor_base_param != "":
+ param_dict = ms.load_checkpoint(config.feature_extractor_base_param)
+ for x in list(param_dict.keys()):
+ param_dict["network.feature_extractor.resnet." + x] = param_dict[x]
+ del param_dict[x]
+ ms.load_param_into_net(ssd.feature_extractor.resnet, param_dict)
+ elif config.model_name == "ssd_vgg16":
+ ssd = ssd_vgg16(config=config)
+ init_net_param(ssd)
+ if config.feature_extractor_base_param != "":
+ param_dict = ms.load_checkpoint(config.feature_extractor_base_param)
+ from src.vgg16 import ssd_vgg_key_mapper
+ for k in ssd_vgg_key_mapper:
+ v = ssd_vgg_key_mapper[k]
+ param_dict["network.backbone." + v + ".weight"] = param_dict[k + ".weight"]
+ del param_dict[k + ".weight"]
+ ms.load_param_into_net(ssd.backbone, param_dict)
+ else:
+ raise ValueError(f'config.model: {config.model_name} is not supported')
+ return ssd
+
+def set_graph_kernel_context(device_target, model):
+ if device_target == "GPU" and model == "ssd300":
+ # Enable graph kernel for default model ssd300 on GPU back-end.
+ ms.set_context(enable_graph_kernel=True,
+ graph_kernel_flags="--enable_parallel_fusion --enable_expand_ops=Conv2D")
+ if device_target == "GPU" and model == "ssd_mobilenet_v1":
+ # Enable graph kernel for default model ssd300 on GPU back-end.
+ ms.context.set_context(enable_graph_kernel=True,
+ graph_kernel_flags="--enable_parallel_fusion --enable_expand_ops=Conv2D")
+
+@moxing_wrapper()
+def train_net():
+ if hasattr(config, 'num_ssd_boxes') and config.num_ssd_boxes == -1:
+ num = 0
+ h, w = config.img_shape
+ for i in range(len(config.steps)):
+ num += (h // config.steps[i]) * (w // config.steps[i]) * config.num_default[i]
+ config.num_ssd_boxes = num
+
+ rank = 0
+ device_num = 1
+ loss_scale = float(config.loss_scale)
+ if config.device_target == "CPU":
+ loss_scale = 1.0
+ ms.set_context(mode=ms.GRAPH_MODE, device_target="CPU")
+ else:
+ ms.set_context(mode=ms.GRAPH_MODE, device_target=config.device_target, device_id=config.device_id)
+ set_graph_kernel_context(config.device_target, config.model_name)
+ if config.run_distribute:
+ device_num = config.device_num
+ ms.reset_auto_parallel_context()
+ ms.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True,
+ device_num=device_num)
+ init()
+ if config.all_reduce_fusion_config:
+ ms.set_auto_parallel_context(all_reduce_fusion_config=config.all_reduce_fusion_config)
+ rank = get_rank()
+
+ mindrecord_file = create_mindrecord(config.dataset, "ssd.mindrecord", True)
+
+ if config.only_create_dataset:
+ return
+
+ # When create MindDataset, using the fitst mindrecord file, such as ssd.mindrecord0.
+ use_multiprocessing = (config.device_target != "CPU")
+ dataset = create_ssd_dataset(mindrecord_file, batch_size=config.batch_size,
+ device_num=device_num, rank=rank, use_multiprocessing=use_multiprocessing)
+
+ dataset_size = dataset.get_dataset_size()
+ print(f"Create dataset done! dataset size is {dataset_size}")
+ ssd = ssd_model_build()
+ if (hasattr(config, 'use_float16') and config.use_float16):
+ ssd.to_float(dtype.float16)
+ net = SSDWithLossCell(ssd, config)
+
+ # checkpoint
+ ckpt_config = CheckpointConfig(save_checkpoint_steps=dataset_size * config.save_checkpoint_epochs)
+ ckpt_save_dir = config.output_path +'/ckpt_{}/'.format(rank)
+ ckpoint_cb = ModelCheckpoint(prefix="ssd", directory=ckpt_save_dir, config=ckpt_config)
+
+ if config.pre_trained:
+ param_dict = ms.load_checkpoint(config.pre_trained)
+ if config.filter_weight:
+ filter_checkpoint_parameter_by_list(param_dict, config.checkpoint_filter_list)
+ ms.load_param_into_net(net, param_dict, True)
+
+ lr = Tensor(get_lr(global_step=config.pre_trained_epoch_size * dataset_size,
+ lr_init=config.lr_init, lr_end=config.lr_end_rate * config.lr, lr_max=config.lr,
+ warmup_epochs=config.warmup_epochs,
+ total_epochs=config.epoch_size,
+ steps_per_epoch=dataset_size))
+
+ if hasattr(config, 'use_global_norm') and config.use_global_norm:
+ opt = nn.Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr,
+ config.momentum, config.weight_decay, 1.0)
+ net = TrainingWrapper(net, opt, loss_scale, True)
+ else:
+ opt = nn.Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr,
+ config.momentum, config.weight_decay, loss_scale)
+ net = TrainingWrapper(net, opt, loss_scale)
+
+ callback = [TimeMonitor(data_size=dataset_size), LossMonitor(), ckpoint_cb]
+ if config.run_eval:
+ eval_net = SsdInferWithDecoder(ssd, Tensor(default_boxes), config)
+ eval_net.set_train(False)
+ mindrecord_file = create_mindrecord(config.dataset, "ssd_eval.mindrecord", False)
+ eval_dataset = create_ssd_dataset(mindrecord_file, batch_size=config.batch_size,
+ is_training=False, use_multiprocessing=False)
+ if config.dataset == "coco":
+ anno_json = os.path.join(config.coco_root, config.instances_set.format(config.val_data_type))
+ elif config.dataset == "voc":
+ anno_json = os.path.join(config.voc_root, config.voc_json)
+ else:
+ raise ValueError('SSD eval only support dataset mode is coco and voc!')
+ eval_param_dict = {"net": eval_net, "dataset": eval_dataset, "anno_json": anno_json}
+ eval_cb = EvalCallBack(apply_eval, eval_param_dict, interval=config.eval_interval,
+ eval_start_epoch=config.eval_start_epoch, save_best_ckpt=True,
+ ckpt_directory=ckpt_save_dir, besk_ckpt_name="best_map.ckpt",
+ metrics_name="mAP")
+ callback.append(eval_cb)
+ model = Model(net)
+ dataset_sink_mode = False
+ if config.mode_sink == "sink" and config.device_target != "CPU":
+ print("In sink mode, one epoch return a loss.")
+ dataset_sink_mode = True
+ print("Start train SSD, the first epoch will be slower because of the graph compilation.")
+ model.train(config.epoch_size, dataset, callbacks=callback, dataset_sink_mode=dataset_sink_mode)
+
+if __name__ == '__main__':
+ train_net()
diff --git a/cv/image_generation/dcgan/MindSpore/README.md b/cv/image_generation/dcgan/MindSpore/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..a12eb335f1e114481c765d2c66edf3b46d522ba6
--- /dev/null
+++ b/cv/image_generation/dcgan/MindSpore/README.md
@@ -0,0 +1,51 @@
+
+# DCGAN
+## Model description
+
+The deep convolutional generative adversarial networks (DCGANs) first introduced CNN into the GAN structure, and the strong feature extraction ability of convolution layer was used to improve the generation effect of GAN.
+
+[Paper](https://arxiv.org/pdf/1511.06434.pdf): Radford A, Metz L, Chintala S. Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks[J]. Computer ence, 2015.
+## Step 1: Installing
+```
+pip3 install -r requirements.txt
+```
+## Step 2: Prepare Datasets
+
+Train DCGAN Dataset used: [Imagenet-1k]()
+
+- Dataset size: ~125G, 224*224 colorful images in 1000 classes
+ - Train: 120G, 1281167 images
+ - Test: 5G, 50000 images
+- Data format: RGB images.
+ - Note: Data will be processed in src/dataset.py
+
+```path
+
+└─imagenet_original
+ └─train
+```
+## Step 3: Training
+### On single GPU
+```bash
+python3 train.py --device_id=2 --data_url=/home/datasets/cv/imagenet/train --train_url=./ --device_target=GPU
+```
+### [Evaluation]
+
+```bash
+python3 -u eval.py --device_id=$DEVICE_ID --img_url=$PATH1 --ckpt_url=$PATH2 --device_target=GPU
+```
+
+### [Evaluation result]
+### 单卡性能数据:BI-V100
+
+
+### 单卡性能数据:NV-V100S
+
+
+
+
+
+
+
+
+
diff --git a/cv/image_generation/dcgan/MindSpore/ascend310_infer/CMakeLists.txt b/cv/image_generation/dcgan/MindSpore/ascend310_infer/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d8c92f9b1d031943418145713e41fdda83bd73f0
--- /dev/null
+++ b/cv/image_generation/dcgan/MindSpore/ascend310_infer/CMakeLists.txt
@@ -0,0 +1,16 @@
+cmake_minimum_required(VERSION 3.14.1)
+project(Ascend310Infer)
+add_compile_definitions(_GLIBCXX_USE_CXX11_ABI=0)
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -g -std=c++17 -Werror -Wall -fPIE -Wl,--allow-shlib-undefined")
+set(PROJECT_SRC_ROOT ${CMAKE_CURRENT_LIST_DIR}/)
+option(MINDSPORE_PATH "mindspore install path" "")
+include_directories(${MINDSPORE_PATH})
+include_directories(${MINDSPORE_PATH}/include)
+include_directories(${PROJECT_SRC_ROOT})
+find_library(MS_LIB libmindspore.so ${MINDSPORE_PATH}/lib)
+file(GLOB_RECURSE MD_LIB ${MINDSPORE_PATH}/_c_dataengine*)
+
+add_executable(main src/main.cc src/utils.cc)
+find_package(gflags REQUIRED)
+target_link_libraries(main ${MS_LIB} ${MD_LIB} gflags)
+
diff --git a/cv/image_generation/dcgan/MindSpore/ascend310_infer/build.sh b/cv/image_generation/dcgan/MindSpore/ascend310_infer/build.sh
new file mode 100644
index 0000000000000000000000000000000000000000..d803d4a1683240601d3028de2aba2ef4640af460
--- /dev/null
+++ b/cv/image_generation/dcgan/MindSpore/ascend310_infer/build.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+if [ ! -d out ]; then
+ mkdir out
+fi
+cd out || exit
+cmake .. \
+ -DMINDSPORE_PATH="`pip show mindspore-ascend | grep Location | awk '{print $2"/mindspore"}' | xargs realpath`"
+make
\ No newline at end of file
diff --git a/cv/image_generation/dcgan/MindSpore/ascend310_infer/inc/utils.h b/cv/image_generation/dcgan/MindSpore/ascend310_infer/inc/utils.h
new file mode 100644
index 0000000000000000000000000000000000000000..52542080ea932266a8dc25d6080e4b564cce0881
--- /dev/null
+++ b/cv/image_generation/dcgan/MindSpore/ascend310_infer/inc/utils.h
@@ -0,0 +1,36 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_INFERENCE_UTILS_H_
+#define MINDSPORE_INFERENCE_UTILS_H_
+
+#include
+#include
+#include
+#include
+#include
+#include "include/api/types.h"
+
+std::vector GetAllFiles(std::string_view dirName);
+DIR *OpenDir(std::string_view dirName);
+std::string RealPath(std::string_view path);
+mindspore::MSTensor ReadFileToTensor(const std::string &file);
+int WriteResult(const std::string& imageFile, const std::vector &outputs, const std::string& mode);
+std::vector GetAllFiles(std::string dir_name);
+std::vector> GetAllInputData(std::string dir_name);
+
+#endif
+
diff --git a/cv/image_generation/dcgan/MindSpore/ascend310_infer/src/main.cc b/cv/image_generation/dcgan/MindSpore/ascend310_infer/src/main.cc
new file mode 100644
index 0000000000000000000000000000000000000000..336d9632d9f6f114b9a7daa007adf6047c63652b
--- /dev/null
+++ b/cv/image_generation/dcgan/MindSpore/ascend310_infer/src/main.cc
@@ -0,0 +1,118 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "../inc/utils.h"
+#include "include/dataset/execute.h"
+#include "include/dataset/transforms.h"
+#include "include/dataset/vision.h"
+#include "include/dataset/vision_ascend.h"
+#include "include/api/types.h"
+#include "include/api/model.h"
+#include "include/api/serialization.h"
+#include "include/api/context.h"
+
+using mindspore::Serialization;
+using mindspore::Model;
+using mindspore::Context;
+using mindspore::Status;
+using mindspore::ModelType;
+using mindspore::Graph;
+using mindspore::GraphCell;
+using mindspore::kSuccess;
+using mindspore::MSTensor;
+using mindspore::DataType;
+using mindspore::dataset::Execute;
+using mindspore::dataset::TensorTransform;
+using mindspore::dataset::vision::Decode;
+using mindspore::dataset::vision::Resize;
+using mindspore::dataset::vision::Normalize;
+using mindspore::dataset::vision::HWC2CHW;
+
+
+DEFINE_string(mindir_path, "", "model path");
+DEFINE_string(dataset_path, "", "dataset path");
+DEFINE_int32(device_id, 0, "device id");
+DEFINE_string(mode, "", "train or test");
+
+int main(int argc, char **argv) {
+ gflags::ParseCommandLineFlags(&argc, &argv, true);
+
+ Model model;
+
+ std::vector model_inputs;
+ if (RealPath(FLAGS_mindir_path).empty()) {
+ std::cout << "Invalid mindir" << std::endl;
+ return -1;
+ }
+
+ auto context = std::make_shared();
+ auto ascend310 = std::make_shared();
+ ascend310->SetDeviceID(FLAGS_device_id);
+ context->MutableDeviceInfo().push_back(ascend310);
+ mindspore::Graph graph;
+ Serialization::Load(FLAGS_mindir_path, ModelType::kMindIR, &graph);
+
+ Status ret_build = model.Build(GraphCell(graph), context);
+ if (ret_build != kSuccess) {
+ std::cout << "ERROR: Build failed." << std::endl;
+ return -1;
+ }
+
+ model_inputs = model.GetInputs();
+ if (model_inputs.empty()) {
+ std::cout << "Invalid model, inputs is empty." << std::endl;
+ return -1;
+ }
+
+ auto input0_files = GetAllFiles(FLAGS_dataset_path);
+ if (input0_files.empty()) {
+ std::cout << "ERROR: no input data." << std::endl;
+ return 1;
+ }
+ size_t size = input0_files.size();
+ for (size_t i = 0; i < size; ++i) {
+ std::vector inputs;
+ std::vector outputs;
+ std::cout << "Start predict input files:" << input0_files[i] <
+#include
+#include
+#include "inc/utils.h"
+
+using mindspore::MSTensor;
+using mindspore::DataType;
+
+std::vector> GetAllInputData(std::string dir_name) {
+ std::vector> ret;
+
+ DIR *dir = OpenDir(dir_name);
+ if (dir == nullptr) {
+ return {};
+ }
+ struct dirent *filename;
+ /* read all the files in the dir ~ */
+ std::vector sub_dirs;
+ while ((filename = readdir(dir)) != nullptr) {
+ std::string d_name = std::string(filename->d_name);
+ // get rid of "." and ".."
+ if (d_name == "." || d_name == ".." || d_name.empty()) {
+ continue;
+ }
+ std::string dir_path = RealPath(std::string(dir_name) + "/" + filename->d_name);
+ struct stat s;
+ lstat(dir_path.c_str(), &s);
+ if (!S_ISDIR(s.st_mode)) {
+ continue;
+ }
+
+ sub_dirs.emplace_back(dir_path);
+ }
+ std::sort(sub_dirs.begin(), sub_dirs.end());
+
+ (void)std::transform(sub_dirs.begin(), sub_dirs.end(), std::back_inserter(ret),
+ [](const std::string &d) { return GetAllFiles(d); });
+
+ return ret;
+}
+
+std::vector GetAllFiles(std::string dir_name) {
+ struct dirent *filename;
+ DIR *dir = OpenDir(dir_name);
+ if (dir == nullptr) {
+ return {};
+ }
+ std::vector res;
+ while ((filename = readdir(dir)) != nullptr) {
+ std::string d_name = std::string(filename->d_name);
+ if (d_name == "." || d_name == ".." || d_name.size() <= 3) {
+ continue;
+ }
+ res.emplace_back(std::string(dir_name) + "/" + filename->d_name);
+ }
+ std::sort(res.begin(), res.end());
+ return res;
+}
+
+std::vector GetAllFiles(std::string_view dirName) {
+ std::cout << "string_view" << std::endl;
+ struct dirent *filename;
+ DIR *dir = OpenDir(dirName);
+ if (dir == nullptr) {
+ return {};
+ }
+ std::vector res;
+ while ((filename = readdir(dir)) != nullptr) {
+ std::string dName = std::string(filename->d_name);
+ if (dName == "." || dName == ".." || filename->d_type != DT_REG) {
+ continue;
+ }
+ res.emplace_back(std::string(dirName) + "/" + filename->d_name);
+ }
+ std::sort(res.begin(), res.end());
+ for (auto &f : res) {
+ std::cout << "image file: " << f << std::endl;
+ }
+ return res;
+}
+
+int WriteResult(const std::string& imageFile, const std::vector &outputs, const std::string& mode) {
+ std::string homePath = "./result_Files_"+mode;
+ const int INVALID_POINTER = -1;
+ const int ERROR = -2;
+ for (size_t i = 0; i < outputs.size(); ++i) {
+ size_t outputSize;
+ std::shared_ptr netOutput;
+ netOutput = outputs[i].Data();
+ outputSize = outputs[i].DataSize();
+ int pos = imageFile.rfind('/');
+ std::string fileName(imageFile, pos + 1);
+ fileName.replace(fileName.find('.'), fileName.size() - fileName.find('.'), "_"+mode+"_" + std::to_string(i)
+ + ".bin");
+ std::string outFileName = homePath + "/" + fileName;
+ FILE *outputFile = fopen(outFileName.c_str(), "wb");
+ if (outputFile == nullptr) {
+ std::cout << "open result file " << outFileName << " failed" << std::endl;
+ return INVALID_POINTER;
+ }
+ size_t size = fwrite(netOutput.get(), sizeof(char), outputSize, outputFile);
+ if (size != outputSize) {
+ fclose(outputFile);
+ outputFile = nullptr;
+ std::cout << "write result file " << outFileName << " failed, write size[" << size <<
+ "] is smaller than output size[" << outputSize << "], maybe the disk is full." << std::endl;
+ return ERROR;
+ }
+ fclose(outputFile);
+ std::cout << "save result file " << outFileName << " success" << std::endl;
+ outputFile = nullptr;
+ }
+ return 0;
+}
+
+mindspore::MSTensor ReadFileToTensor(const std::string &file) {
+ if (file.empty()) {
+ std::cout << "Pointer file is nullptr" << std::endl;
+ return mindspore::MSTensor();
+ }
+
+ std::ifstream ifs(file);
+ if (!ifs.good()) {
+ std::cout << "File: " << file << " is not exist" << std::endl;
+ return mindspore::MSTensor();
+ }
+
+ if (!ifs.is_open()) {
+ std::cout << "File: " << file << "open failed" << std::endl;
+ return mindspore::MSTensor();
+ }
+
+ ifs.seekg(0, std::ios::end);
+ size_t size = ifs.tellg();
+ mindspore::MSTensor buffer(file, mindspore::DataType::kNumberTypeUInt8,
+ {static_cast(size)}, nullptr, size);
+
+ ifs.seekg(0, std::ios::beg);
+ ifs.read(reinterpret_cast(buffer.MutableData()), size);
+ ifs.close();
+
+ return buffer;
+}
+
+DIR *OpenDir(std::string_view dirName) {
+ if (dirName.empty()) {
+ std::cout << " dirName is null ! " << std::endl;
+ return nullptr;
+ }
+ std::string realPath = RealPath(dirName);
+ struct stat s;
+ lstat(realPath.c_str(), &s);
+ if (!S_ISDIR(s.st_mode)) {
+ std::cout << "dirName is not a valid directory !" << std::endl;
+ return nullptr;
+ }
+ DIR *dir;
+ dir = opendir(realPath.c_str());
+ if (dir == nullptr) {
+ std::cout << "Can not open dir " << dirName << std::endl;
+ return nullptr;
+ }
+ std::cout << "Successfully opened the dir " << dirName << std::endl;
+ return dir;
+}
+
+std::string RealPath(std::string_view path) {
+ char realPathMem[PATH_MAX] = {0};
+ char *realPathRet = nullptr;
+ realPathRet = realpath(path.data(), realPathMem);
+ if (realPathRet == nullptr) {
+ std::cout << "File: " << path << " is not exist.";
+ return "";
+ }
+
+ std::string realPath(realPathMem);
+ std::cout << path << " realpath is: " << realPath << std::endl;
+ return realPath;
+}
diff --git a/cv/image_generation/dcgan/MindSpore/docker_start.sh b/cv/image_generation/dcgan/MindSpore/docker_start.sh
new file mode 100644
index 0000000000000000000000000000000000000000..ff4ec545577096e72691d0cdbaa4403e2aacadca
--- /dev/null
+++ b/cv/image_generation/dcgan/MindSpore/docker_start.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+# Copyright(C) 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+docker_image=$1
+data_dir=$2
+model_dir=$3
+
+docker run -it --ipc=host \
+ --device=/dev/davinci0 \
+ --device=/dev/davinci1 \
+ --device=/dev/davinci2 \
+ --device=/dev/davinci3 \
+ --device=/dev/davinci4 \
+ --device=/dev/davinci5 \
+ --device=/dev/davinci6 \
+ --device=/dev/davinci7 \
+ --device=/dev/davinci_manager \
+ --device=/dev/devmm_svm --device=/dev/hisi_hdc \
+ -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
+ -v /usr/local/Ascend/add-ons/:/usr/local/Ascend/add-ons/ \
+ -v ${model_dir}:${model_dir} \
+ -v ${data_dir}:${data_dir} \
+ -v ~/ascend/log/npu/conf/slog/slog.conf:/var/log/npu/conf/slog/slog.conf \
+ -v ~/ascend/log/npu/slog/:/var/log/npu/slog -v ~/ascend/log/npu/profiling/:/var/log/npu/profiling \
+ -v ~/ascend/log/npu/dump/:/var/log/npu/dump -v ~/ascend/log/npu/:/usr/slog ${docker_image} \
+ /bin/bash
diff --git a/cv/image_generation/dcgan/MindSpore/eval.py b/cv/image_generation/dcgan/MindSpore/eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..ba2e93a90f1285926f09246f0d8d9068166596c1
--- /dev/null
+++ b/cv/image_generation/dcgan/MindSpore/eval.py
@@ -0,0 +1,82 @@
+# Copyright 2021-2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""dcgan eval"""
+import argparse
+import numpy as np
+from mindspore import context, Tensor, nn, load_checkpoint
+
+from src.config import dcgan_imagenet_cfg as cfg
+from src.generator import Generator
+from src.discriminator import Discriminator
+from src.cell import WithLossCellD, WithLossCellG
+from src.dcgan import DCGAN
+
+
+def save_imgs(gen_imgs, img_url):
+ """save_imgs function"""
+ import matplotlib
+ matplotlib.use('Agg')
+ import matplotlib.pyplot as plt
+ for i in range(gen_imgs.shape[0]):
+ plt.subplot(4, 4, i + 1)
+ gen_imgs[i] = gen_imgs[i] * 127.5 + 127.5
+ perm = (1, 2, 0)
+ show_imgs = np.transpose(gen_imgs[i], perm)
+ sdf = show_imgs.astype(int)
+ plt.imshow(sdf)
+ plt.axis("off")
+ plt.savefig(img_url + "/generate.png")
+
+
+def load_dcgan(ckpt_url):
+ """load_dcgan function"""
+ netD = Discriminator()
+ netG = Generator()
+
+ criterion = nn.BCELoss(reduction='mean')
+
+ netD_with_criterion = WithLossCellD(netD, netG, criterion)
+ netG_with_criterion = WithLossCellG(netD, netG, criterion)
+
+ optimizerD = nn.Adam(netD.trainable_params(), learning_rate=cfg.learning_rate, beta1=cfg.beta1)
+ optimizerG = nn.Adam(netG.trainable_params(), learning_rate=cfg.learning_rate, beta1=cfg.beta1)
+
+ myTrainOneStepCellForD = nn.TrainOneStepCell(netD_with_criterion, optimizerD)
+ myTrainOneStepCellForG = nn.TrainOneStepCell(netG_with_criterion, optimizerG)
+
+ dcgan = DCGAN(myTrainOneStepCellForD, myTrainOneStepCellForG)
+ load_checkpoint(ckpt_url, dcgan)
+ netG_trained = dcgan.myTrainOneStepCellForG.network.netG
+ return netG_trained
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='MindSpore dcgan training')
+ parser.add_argument('--device_target', type=str, default='Ascend', help='Ascend or GPU')
+ parser.add_argument('--device_id', type=int, default=0, help='device id of Ascend or GPU (Default: 0)')
+ parser.add_argument('--img_url', type=str, default=None, help='img save path')
+ parser.add_argument('--ckpt_url', type=str, default=None, help='checkpoint load path')
+ args = parser.parse_args()
+
+ context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)
+ context.set_context(device_id=args.device_id)
+
+ fixed_noise = Tensor(np.random.normal(size=(16, cfg.latent_size, 1, 1)).astype("float32"))
+
+ net_G = load_dcgan(args.ckpt_url)
+ fake = net_G(fixed_noise)
+ print("================saving images================")
+ save_imgs(fake.asnumpy(), args.img_url)
+ print("================success================")
diff --git a/cv/image_generation/dcgan/MindSpore/export.py b/cv/image_generation/dcgan/MindSpore/export.py
new file mode 100644
index 0000000000000000000000000000000000000000..617ac0883523961988a30e1efb3d36db0273d90f
--- /dev/null
+++ b/cv/image_generation/dcgan/MindSpore/export.py
@@ -0,0 +1,172 @@
+# Copyright 2021-2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""export checkpoint file into air, onnx, mindir models"""
+import argparse
+import ast
+import os
+
+import numpy as np
+
+from mindspore import Tensor, nn, ops, context, load_checkpoint, export
+import mindspore.common.dtype as mstype
+
+from src.cell import WithLossCellD, WithLossCellG
+from src.dcgan import DCGAN
+from src.discriminator import Discriminator
+from src.generator import Generator
+from src.config import dcgan_imagenet_cfg as cfg
+
+
+def load_dcgan(ckpt_url):
+ """
+ load dcgan from checkpoint file
+ """
+ netD = Discriminator()
+ netG = Generator()
+
+ criterion = nn.BCELoss(reduction='mean')
+
+ netD_with_criterion = WithLossCellD(netD, netG, criterion)
+ netG_with_criterion = WithLossCellG(netD, netG, criterion)
+
+ optimizerD = nn.Adam(netD.trainable_params(), learning_rate=cfg.learning_rate, beta1=cfg.beta1)
+ optimizerG = nn.Adam(netG.trainable_params(), learning_rate=cfg.learning_rate, beta1=cfg.beta1)
+
+ myTrainOneStepCellForD = nn.TrainOneStepCell(netD_with_criterion, optimizerD)
+ myTrainOneStepCellForG = nn.TrainOneStepCell(netG_with_criterion, optimizerG)
+
+ net = DCGAN(myTrainOneStepCellForD, myTrainOneStepCellForG)
+ load_checkpoint(ckpt_url, net)
+ net.set_train(False)
+ return net
+
+
+def load_discriminator(dcgan_net):
+ """
+ load discriminator layers from dcgan model
+ """
+ netD_trained = dcgan_net.myTrainOneStepCellForD.network.netD
+ for m in netD_trained.discriminator.cells_and_names():
+ if m[0] == '0':
+ print(m[0], m[1])
+ conv_1 = m[1]
+ elif m[0] == '1':
+ print(m[0], m[1])
+ leakyReLU_1 = m[1]
+ elif m[0] == '2':
+ print(m[0], m[1])
+ conv_2 = m[1]
+ elif m[0] == '3':
+ print(m[0], m[1])
+ bm_1 = m[1]
+ elif m[0] == '4':
+ print(m[0], m[1])
+ leakyReLU_2 = m[1]
+ elif m[0] == '5':
+ print(m[0], m[1])
+ conv_3 = m[1]
+ return conv_1, leakyReLU_1, conv_2, bm_1, leakyReLU_2, conv_3
+
+
+class DiscriminatorConvert(nn.Cell):
+ """
+ Discriminator_convert
+ """
+
+ def __init__(self, conv1, leakyReLU1, conv2, bm1, leakyReLU2, conv3):
+ super(DiscriminatorConvert, self).__init__()
+ self.conv1 = conv1
+ self.leakyReLU1 = leakyReLU1
+ self.conv2 = conv2
+ self.bm1 = bm1
+ self.leakyReLU2 = leakyReLU2
+ self.conv3 = conv3
+ self.maxpool1 = nn.MaxPool2d(kernel_size=4, stride=4)
+ self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)
+ self.concat = ops.Concat(1)
+ self.reshape = ops.Reshape()
+
+ def construct(self, x):
+ x = self.conv1(x)
+ output1 = self.maxpool1(x)
+ x = self.conv2(self.leakyReLU1(x))
+ output2 = self.maxpool2(x)
+ x = self.conv3(self.leakyReLU2(self.bm1(x)))
+ output3 = x
+ result = self.concat((output1, output2, output3))
+ result = self.reshape(result, (1, -1))
+ return result
+
+
+parser = argparse.ArgumentParser(description='dcgan export')
+parser.add_argument("--run_modelart", type=ast.literal_eval, default=False, help="Run on modelArt, default is false.")
+parser.add_argument('--device_target', type=str, default='Ascend', choices=('Ascend', 'GPU'),
+ help='device where the code will be implemented (default: Ascend)')
+parser.add_argument("--device_id", type=int, default=0, help="Device id")
+parser.add_argument("--batch_size", type=int, default=100, help="batch size")
+parser.add_argument("--ckpt_url", default=None, help="Checkpoint file url.")
+parser.add_argument("--ckpt_file", default=None, help="Checkpoint file name.")
+parser.add_argument('--data_url', default=None, help='Directory contains dataset.')
+parser.add_argument('--train_url', default=None, help='Directory contains checkpoint file')
+parser.add_argument("--file_name", type=str, default="dcgan", help="output file name.")
+parser.add_argument("--file_format", type=str, default="MINDIR", help="file format")
+parser.add_argument("--load_netG", type=str, default=False, help="export netG, default is false.")
+parser.add_argument("--load_netD", type=str, default=True, help="export netD for infer, default is True.")
+parser.add_argument("--load_G_and_D", type=str, default=False, help="export netG and netD, default is false.")
+args = parser.parse_args()
+
+if args.run_modelart:
+ local_ckpt_url = '/cache/train_outputs'
+ device_id = int(os.getenv('DEVICE_ID'))
+ context.set_context(mode=context.GRAPH_MODE, device_target="Ascend",
+ save_graphs=False)
+ context.set_context(device_id=device_id)
+ import moxing as mox
+ mox.file.copy_parallel(src_url=args.ckpt_url, dst_url=local_ckpt_url)
+ local_ckpt_url = local_ckpt_url + args.ckpt_file
+else:
+ local_ckpt_url = args.ckpt_file
+ device_target = args.device_target
+ device_id = args.device_id
+ context.set_context(mode=context.GRAPH_MODE, device_target=device_target, save_graphs=False, device_id=device_id)
+
+
+if __name__ == '__main__':
+ if args.load_netD:
+ dcgan = load_dcgan(local_ckpt_url)
+ d_conv1, d_leakyReLU1, d_conv2, d_bm1, d_leakyReLU2, d_conv3 = load_discriminator(dcgan)
+ discriminator_convert = DiscriminatorConvert(conv1=d_conv1, leakyReLU1=d_leakyReLU1, conv2=d_conv2, bm1=d_bm1,
+ leakyReLU2=d_leakyReLU2, conv3=d_conv3)
+ discriminator_convert.set_train(False)
+
+ inputs = Tensor(np.random.rand(args.batch_size, 3, 32, 32), mstype.float32)
+ export(discriminator_convert, inputs, file_name=args.file_name, file_format=args.file_format)
+ elif args.load_netG:
+ dcgan = load_dcgan(local_ckpt_url)
+ netG_trained = dcgan.myTrainOneStepCellForG.network.netG
+ netG_trained.set_train(False)
+ latent_code = Tensor(np.random.rand(args.batch_size, 100, 1, 1), mstype.float32)
+ export(netG_trained, latent_code, file_name=args.file_name, file_format=args.file_format)
+ else:
+ dcgan = load_dcgan(local_ckpt_url)
+ # inputs = Tensor(np.random.rand(args.batch_size, 3, 448, 448), mstype.float32)
+ real_data = Tensor(np.random.rand(args.batch_size, 3, 32, 32), mstype.float32)
+ latent_code = Tensor(np.random.rand(args.batch_size, 100, 1, 1), mstype.float32)
+ inputs = [real_data, latent_code]
+ export(dcgan, *inputs, file_name=args.file_name, file_format=args.file_format)
+ if args.run_modelart:
+ file_name = args.file_name + "." + args.file_format.lower()
+ mox.file.copy_parallel(src_url=file_name,
+ dst_url=os.path.join(args.ckpt_url, file_name))
diff --git a/cv/image_generation/dcgan/MindSpore/gpu_infer/CMakeLists.txt b/cv/image_generation/dcgan/MindSpore/gpu_infer/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fe0084e53564dbbb9cf5281c86601a941a5a8324
--- /dev/null
+++ b/cv/image_generation/dcgan/MindSpore/gpu_infer/CMakeLists.txt
@@ -0,0 +1,16 @@
+cmake_minimum_required(VERSION 3.14.1)
+project(GpuInfer)
+add_compile_definitions(_GLIBCXX_USE_CXX11_ABI=0)
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -g -std=c++17 -Werror -Wall -fPIE -Wl,--allow-shlib-undefined")
+set(PROJECT_SRC_ROOT ${CMAKE_CURRENT_LIST_DIR}/)
+option(MINDSPORE_PATH "mindspore install path" "")
+include_directories(${MINDSPORE_PATH})
+include_directories(${MINDSPORE_PATH}/include)
+include_directories(${PROJECT_SRC_ROOT})
+find_library(MS_LIB libmindspore.so ${MINDSPORE_PATH}/lib)
+file(GLOB_RECURSE MD_LIB ${MINDSPORE_PATH}/_c_dataengine*)
+
+add_executable(main src/main.cc src/utils.cc)
+find_package(gflags REQUIRED)
+target_link_libraries(main ${MS_LIB} ${MD_LIB} gflags)
+
diff --git a/cv/image_generation/dcgan/MindSpore/gpu_infer/build.sh b/cv/image_generation/dcgan/MindSpore/gpu_infer/build.sh
new file mode 100644
index 0000000000000000000000000000000000000000..cad23b6251e7cbe6bc7635b740626dd920c3963a
--- /dev/null
+++ b/cv/image_generation/dcgan/MindSpore/gpu_infer/build.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+if [ -d out ]; then
+ rm -rf out
+fi
+
+mkdir out
+cd out || exit
+
+if [ -f "Makefile" ]; then
+ make clean
+fi
+cmake .. \
+ -DMINDSPORE_PATH="`pip show mindspore-gpu | grep Location | awk '{print $2"/mindspore"}' | xargs realpath`"
+make
\ No newline at end of file
diff --git a/cv/image_generation/dcgan/MindSpore/gpu_infer/inc/utils.h b/cv/image_generation/dcgan/MindSpore/gpu_infer/inc/utils.h
new file mode 100644
index 0000000000000000000000000000000000000000..492ce809487ce5c6e85e20b3cf071094988b1708
--- /dev/null
+++ b/cv/image_generation/dcgan/MindSpore/gpu_infer/inc/utils.h
@@ -0,0 +1,36 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_INFERENCE_UTILS_H_
+#define MINDSPORE_INFERENCE_UTILS_H_
+
+#include
+#include
+#include
+#include
+#include
+#include "include/api/types.h"
+
+std::vector GetAllFiles(std::string_view dirName);
+DIR *OpenDir(std::string_view dirName);
+std::string RealPath(std::string_view path);
+mindspore::MSTensor ReadFileToTensor(const std::string &file);
+int WriteResult(const std::string& imageFile, const std::vector &outputs, const std::string& mode);
+std::vector GetAllFiles(std::string dir_name);
+std::vector> GetAllInputData(std::string dir_name);
+
+#endif
+
diff --git a/cv/image_generation/dcgan/MindSpore/gpu_infer/src/main.cc b/cv/image_generation/dcgan/MindSpore/gpu_infer/src/main.cc
new file mode 100644
index 0000000000000000000000000000000000000000..f083b0a5964e2bbc8266b6a554d2a8554e2502d9
--- /dev/null
+++ b/cv/image_generation/dcgan/MindSpore/gpu_infer/src/main.cc
@@ -0,0 +1,116 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "../inc/utils.h"
+#include "include/dataset/execute.h"
+#include "include/dataset/transforms.h"
+#include "include/dataset/vision.h"
+#include "include/api/types.h"
+#include "include/api/model.h"
+#include "include/api/serialization.h"
+#include "include/api/context.h"
+
+using mindspore::Serialization;
+using mindspore::Model;
+using mindspore::Context;
+using mindspore::Status;
+using mindspore::ModelType;
+using mindspore::Graph;
+using mindspore::GraphCell;
+using mindspore::kSuccess;
+using mindspore::MSTensor;
+using mindspore::DataType;
+using mindspore::dataset::Execute;
+using mindspore::dataset::TensorTransform;
+using mindspore::dataset::vision::Decode;
+using mindspore::dataset::vision::Resize;
+using mindspore::dataset::vision::Normalize;
+using mindspore::dataset::vision::HWC2CHW;
+
+DEFINE_string(mindir_path, "", "model path");
+DEFINE_string(dataset_path, "", "dataset path");
+DEFINE_int32(device_id, 0, "device id");
+DEFINE_string(mode, "", "train or test");
+
+int main(int argc, char **argv) {
+ gflags::ParseCommandLineFlags(&argc, &argv, true);
+
+ Model model;
+
+ std::vector model_inputs;
+ if (RealPath(FLAGS_mindir_path).empty()) {
+ std::cout << "Invalid mindir" << std::endl;
+ return -1;
+ }
+
+ auto context = std::make_shared();
+ auto gpu_device_info = std::make_shared();
+ gpu_device_info->SetDeviceID(FLAGS_device_id);
+ context->MutableDeviceInfo().push_back(gpu_device_info);
+ mindspore::Graph graph;
+ Serialization::Load(FLAGS_mindir_path, ModelType::kMindIR, &graph);
+
+ Status ret_build = model.Build(GraphCell(graph), context);
+ if (ret_build != kSuccess) {
+ std::cout << "ERROR: Build failed." << std::endl;
+ return -1;
+ }
+
+ model_inputs = model.GetInputs();
+ if (model_inputs.empty()) {
+ std::cout << "Invalid model, inputs is empty." << std::endl;
+ return -1;
+ }
+
+ auto input0_files = GetAllFiles(FLAGS_dataset_path);
+ if (input0_files.empty()) {
+ std::cout << "ERROR: no input data." << std::endl;
+ return 1;
+ }
+ size_t size = input0_files.size();
+ for (size_t i = 0; i < size; ++i) {
+ std::vector inputs;
+ std::vector outputs;
+ std::cout << "Start predict input files:" << input0_files[i] <
+#include
+#include
+#include "inc/utils.h"
+
+using mindspore::MSTensor;
+using mindspore::DataType;
+
+std::vector> GetAllInputData(std::string dir_name) {
+ std::vector> ret;
+
+ DIR *dir = OpenDir(dir_name);
+ if (dir == nullptr) {
+ return {};
+ }
+ struct dirent *filename;
+ /* read all the files in the dir ~ */
+ std::vector sub_dirs;
+ while ((filename = readdir(dir)) != nullptr) {
+ std::string d_name = std::string(filename->d_name);
+ // get rid of "." and ".."
+ if (d_name == "." || d_name == ".." || d_name.empty()) {
+ continue;
+ }
+ std::string dir_path = RealPath(std::string(dir_name) + "/" + filename->d_name);
+ struct stat s;
+ lstat(dir_path.c_str(), &s);
+ if (!S_ISDIR(s.st_mode)) {
+ continue;
+ }
+
+ sub_dirs.emplace_back(dir_path);
+ }
+ std::sort(sub_dirs.begin(), sub_dirs.end());
+
+ (void)std::transform(sub_dirs.begin(), sub_dirs.end(), std::back_inserter(ret),
+ [](const std::string &d) { return GetAllFiles(d); });
+
+ return ret;
+}
+
+std::vector GetAllFiles(std::string dir_name) {
+ struct dirent *filename;
+ DIR *dir = OpenDir(dir_name);
+ if (dir == nullptr) {
+ return {};
+ }
+ std::vector res;
+ while ((filename = readdir(dir)) != nullptr) {
+ std::string d_name = std::string(filename->d_name);
+ if (d_name == "." || d_name == ".." || d_name.size() <= 3) {
+ continue;
+ }
+ res.emplace_back(std::string(dir_name) + "/" + filename->d_name);
+ }
+ std::sort(res.begin(), res.end());
+ return res;
+}
+
+std::vector GetAllFiles(std::string_view dirName) {
+ std::cout << "string_view" << std::endl;
+ struct dirent *filename;
+ DIR *dir = OpenDir(dirName);
+ if (dir == nullptr) {
+ return {};
+ }
+ std::vector res;
+ while ((filename = readdir(dir)) != nullptr) {
+ std::string dName = std::string(filename->d_name);
+ if (dName == "." || dName == ".." || filename->d_type != DT_REG) {
+ continue;
+ }
+ res.emplace_back(std::string(dirName) + "/" + filename->d_name);
+ }
+ std::sort(res.begin(), res.end());
+ for (auto &f : res) {
+ std::cout << "image file: " << f << std::endl;
+ }
+ return res;
+}
+
+int WriteResult(const std::string& imageFile, const std::vector &outputs, const std::string& mode) {
+ std::string homePath = "./result_Files_"+mode;
+ const int INVALID_POINTER = -1;
+ const int ERROR = -2;
+ for (size_t i = 0; i < outputs.size(); ++i) {
+ size_t outputSize;
+ std::shared_ptr netOutput;
+ netOutput = outputs[i].Data();
+ outputSize = outputs[i].DataSize();
+ int pos = imageFile.rfind('/');
+ std::string fileName(imageFile, pos + 1);
+ fileName.replace(fileName.find('.'), fileName.size() - fileName.find('.'), "_"+mode+"_" + std::to_string(i)
+ + ".bin");
+ std::string outFileName = homePath + "/" + fileName;
+ FILE *outputFile = fopen(outFileName.c_str(), "wb");
+ if (outputFile == nullptr) {
+ std::cout << "open result file " << outFileName << " failed" << std::endl;
+ return INVALID_POINTER;
+ }
+ size_t size = fwrite(netOutput.get(), sizeof(char), outputSize, outputFile);
+ if (size != outputSize) {
+ fclose(outputFile);
+ outputFile = nullptr;
+ std::cout << "write result file " << outFileName << " failed, write size[" << size <<
+ "] is smaller than output size[" << outputSize << "], maybe the disk is full." << std::endl;
+ return ERROR;
+ }
+ fclose(outputFile);
+ std::cout << "save result file " << outFileName << " success" << std::endl;
+ outputFile = nullptr;
+ }
+ return 0;
+}
+
+mindspore::MSTensor ReadFileToTensor(const std::string &file) {
+ if (file.empty()) {
+ std::cout << "Pointer file is nullptr" << std::endl;
+ return mindspore::MSTensor();
+ }
+
+ std::ifstream ifs(file);
+ if (!ifs.good()) {
+ std::cout << "File: " << file << " is not exist" << std::endl;
+ return mindspore::MSTensor();
+ }
+
+ if (!ifs.is_open()) {
+ std::cout << "File: " << file << "open failed" << std::endl;
+ return mindspore::MSTensor();
+ }
+
+ ifs.seekg(0, std::ios::end);
+ size_t size = ifs.tellg();
+ mindspore::MSTensor buffer(file, mindspore::DataType::kNumberTypeUInt8,
+ {static_cast(size)}, nullptr, size);
+
+ ifs.seekg(0, std::ios::beg);
+ ifs.read(reinterpret_cast(buffer.MutableData()), size);
+ ifs.close();
+
+ return buffer;
+}
+
+DIR *OpenDir(std::string_view dirName) {
+ if (dirName.empty()) {
+ std::cout << " dirName is null ! " << std::endl;
+ return nullptr;
+ }
+ std::string realPath = RealPath(dirName);
+ struct stat s;
+ lstat(realPath.c_str(), &s);
+ if (!S_ISDIR(s.st_mode)) {
+ std::cout << "dirName is not a valid directory !" << std::endl;
+ return nullptr;
+ }
+ DIR *dir;
+ dir = opendir(realPath.c_str());
+ if (dir == nullptr) {
+ std::cout << "Can not open dir " << dirName << std::endl;
+ return nullptr;
+ }
+ std::cout << "Successfully opened the dir " << dirName << std::endl;
+ return dir;
+}
+
+std::string RealPath(std::string_view path) {
+ char realPathMem[PATH_MAX] = {0};
+ char *realPathRet = nullptr;
+ realPathRet = realpath(path.data(), realPathMem);
+ if (realPathRet == nullptr) {
+ std::cout << "File: " << path << " is not exist.";
+ return "";
+ }
+
+ std::string realPath(realPathMem);
+ std::cout << path << " realpath is: " << realPath << std::endl;
+ return realPath;
+}
diff --git a/cv/image_generation/dcgan/MindSpore/image2022-9-13_13-12-42.png b/cv/image_generation/dcgan/MindSpore/image2022-9-13_13-12-42.png
new file mode 100644
index 0000000000000000000000000000000000000000..36901977820dc2db4c1500b8e9eaf13349867479
Binary files /dev/null and b/cv/image_generation/dcgan/MindSpore/image2022-9-13_13-12-42.png differ
diff --git a/cv/image_generation/dcgan/MindSpore/image2022-9-13_13-5-52.png b/cv/image_generation/dcgan/MindSpore/image2022-9-13_13-5-52.png
new file mode 100644
index 0000000000000000000000000000000000000000..881479905ec808855d4064e0eb95384c3953fd28
Binary files /dev/null and b/cv/image_generation/dcgan/MindSpore/image2022-9-13_13-5-52.png differ
diff --git a/cv/image_generation/dcgan/MindSpore/image2022-9-14_10-39-29.png b/cv/image_generation/dcgan/MindSpore/image2022-9-14_10-39-29.png
new file mode 100644
index 0000000000000000000000000000000000000000..dd7e306ba3886020cdca2ba05a5dd5d78d382f3d
Binary files /dev/null and b/cv/image_generation/dcgan/MindSpore/image2022-9-14_10-39-29.png differ
diff --git a/cv/image_generation/dcgan/MindSpore/image2022-9-14_10-41-12.png b/cv/image_generation/dcgan/MindSpore/image2022-9-14_10-41-12.png
new file mode 100644
index 0000000000000000000000000000000000000000..62cd6b7bf307ed47eb936da01c4d1bb548e9fb77
Binary files /dev/null and b/cv/image_generation/dcgan/MindSpore/image2022-9-14_10-41-12.png differ
diff --git a/cv/image_generation/dcgan/MindSpore/infer/convert/convert_om.sh b/cv/image_generation/dcgan/MindSpore/infer/convert/convert_om.sh
new file mode 100644
index 0000000000000000000000000000000000000000..dca25ecec209d651f37ccda5bab31eb4cb550dd0
--- /dev/null
+++ b/cv/image_generation/dcgan/MindSpore/infer/convert/convert_om.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+# Parameter format
+if [ $# -ne 2 ]
+then
+ echo "Wrong parameter format."
+ echo "Usage:"
+ echo " bash $0 INPUT_AIR_PATH OUTPUT_OM_PATH_NAME"
+ echo "Example:"
+ echo " bash $0 ./dcgan_16_20220106.air ../models/DCGAN"
+
+ exit 255
+fi
+
+# DCGAN model from .air to .om
+AIR_PATH=$1
+OM_PATH=$2
+atc --input_format=NCHW \
+--framework=1 \
+--model="${AIR_PATH}" \
+--output="${OM_PATH}" \
+--soc_version=Ascend310
+
+# Delete unnecessary files
+rm fusion_result.json
+rm -r kernel_meta/
+
+# Modify file permissions
+chmod +r+w "${OM_PATH}.om"
diff --git a/cv/image_generation/dcgan/MindSpore/infer/data/config/DCGAN.pipeline b/cv/image_generation/dcgan/MindSpore/infer/data/config/DCGAN.pipeline
new file mode 100644
index 0000000000000000000000000000000000000000..73dbf49550164c89be32da3e3d936769eac57e83
--- /dev/null
+++ b/cv/image_generation/dcgan/MindSpore/infer/data/config/DCGAN.pipeline
@@ -0,0 +1,29 @@
+{
+ "DCGAN":{
+ "stream_config": {
+ "deviceId": "0"
+ },
+ "appsrc0":{
+ "props": {
+ "blocksize": "409600"
+ },
+ "factory": "appsrc",
+ "next": "mxpi_tensorinfer0"
+ },
+ "mxpi_tensorinfer0":{
+ "props": {
+ "dataSource":"appsrc0",
+ "singleBatchInfer":"1",
+ "modelPath":"../data/models/DCGAN.om"
+ },
+ "factory": "mxpi_tensorinfer",
+ "next": "appsink0"
+ },
+ "appsink0":{
+ "props": {
+ "blocksize": "4096000"
+ },
+ "factory": "appsink"
+ }
+ }
+}
\ No newline at end of file
diff --git a/cv/image_generation/dcgan/MindSpore/infer/docker_start_infer.sh b/cv/image_generation/dcgan/MindSpore/infer/docker_start_infer.sh
new file mode 100644
index 0000000000000000000000000000000000000000..f7d4ba9ef6dd1529085436081e579a5998f9f3e4
--- /dev/null
+++ b/cv/image_generation/dcgan/MindSpore/infer/docker_start_infer.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+docker_image=$1
+data_dir=$2
+
+function show_help() {
+ echo "Usage: docker_start_infer.sh docker_image data_dir"
+}
+
+function param_check() {
+ if [ -z "${docker_image}" ]; then
+ echo "please input docker_image"
+ show_help
+ exit 1
+ fi
+
+ if [ -z "${data_dir}" ]; then
+ echo "please input data_dir"
+ show_help
+ exit 1
+ fi
+}
+
+param_check
+
+docker run -it \
+ --device=/dev/davinci0 \
+ --device=/dev/davinci_manager \
+ --device=/dev/devmm_svm \
+ --device=/dev/hisi_hdc \
+ -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
+ -v ${data_dir}:${data_dir} \
+ ${docker_image} \
+ /bin/bash
diff --git a/cv/image_generation/dcgan/MindSpore/infer/mxbase/CMakeLists.txt b/cv/image_generation/dcgan/MindSpore/infer/mxbase/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..94210cd63bd508793e53adaf7e620d32cfde3437
--- /dev/null
+++ b/cv/image_generation/dcgan/MindSpore/infer/mxbase/CMakeLists.txt
@@ -0,0 +1,68 @@
+#!/bin/bash
+
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+cmake_minimum_required(VERSION 3.10.0)
+project(DCGAN)
+set(TARGET DCGAN)
+
+add_definitions(-DENABLE_DVPP_INTERFACE)
+add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
+add_definitions(-Dgoogle=mindxsdk_private)
+add_compile_options(-std=c++11 -fPIE -fstack-protector-all -fPIC -Wall)
+add_link_options(-Wl,-z,relro,-z,now,-z,noexecstack -s -pie)
+
+# Check environment variable
+if(NOT DEFINED ENV{ASCEND_HOME})
+ message(FATAL_ERROR "please define environment variable:ASCEND_HOME")
+endif()
+if(NOT DEFINED ENV{ASCEND_VERSION})
+ message(WARNING "please define environment variable:ASCEND_VERSION")
+endif()
+if(NOT DEFINED ENV{ARCH_PATTERN})
+ message(WARNING "please define environment variable:ARCH_PATTERN")
+endif()
+
+set(ACL_INC_DIR $ENV{ASCEND_HOME}/$ENV{ASCEND_VERSION}/$ENV{ARCH_PATTERN}/acllib/include)
+set(ACL_LIB_DIR $ENV{ASCEND_HOME}/$ENV{ASCEND_VERSION}/$ENV{ARCH_PATTERN}/acllib/lib64)
+
+set(MXBASE_ROOT_DIR $ENV{MX_SDK_HOME})
+set(MXBASE_INC ${MXBASE_ROOT_DIR}/include)
+set(MXBASE_LIB_DIR ${MXBASE_ROOT_DIR}/lib)
+set(MXBASE_POST_LIB_DIR ${MXBASE_ROOT_DIR}/lib/modelpostprocessors)
+set(MXBASE_POST_PROCESS_DIR ${MXBASE_ROOT_DIR}/include/MxBase/postprocess/include)
+
+if(DEFINED ENV{MXSDK_OPENSOURCE_DIR})
+ set(OPENSOURCE_DIR $ENV{MXSDK_OPENSOURCE_DIR})
+else()
+ set(OPENSOURCE_DIR ${MXBASE_ROOT_DIR}/opensource)
+endif()
+include_directories(${ACL_INC_DIR})
+include_directories(${OPENSOURCE_DIR}/include)
+include_directories(${OPENSOURCE_DIR}/include/opencv4)
+
+include_directories(${MXBASE_INC})
+include_directories(${MXBASE_POST_PROCESS_DIR})
+link_directories(${ACL_LIB_DIR})
+link_directories(${OPENSOURCE_DIR}/lib)
+link_directories(${MXBASE_LIB_DIR})
+link_directories(${MXBASE_POST_LIB_DIR})
+
+add_executable(${TARGET} src/main.cpp src/DCGAN.cpp)
+
+target_link_libraries(${TARGET} glog cpprest mxbase opencv_world stdc++fs)
+
+install(TARGETS ${TARGET} RUNTIME DESTINATION ${PROJECT_SOURCE_DIR}/)
\ No newline at end of file
diff --git a/cv/image_generation/dcgan/MindSpore/infer/mxbase/build.sh b/cv/image_generation/dcgan/MindSpore/infer/mxbase/build.sh
new file mode 100644
index 0000000000000000000000000000000000000000..6f28aa3b3894fd62ea9084b54321d2078b865c93
--- /dev/null
+++ b/cv/image_generation/dcgan/MindSpore/infer/mxbase/build.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+# Parameter format
+if [ $# -ne 3 ]
+then
+ echo "Wrong parameter format."
+ echo "Usage:"
+ echo " bash $0 OM_PATH RESULT_PATH GEN_NUM"
+ echo "Example:"
+ echo " bash $0 ../data/models/DCGAN.om ./results 10"
+
+ exit 255
+fi
+
+
+# Rebuild build folder
+rm core
+rm -r build
+mkdir -p build
+# Enter build floder
+cd build || exit
+
+# Cmake & make
+if ! cmake ..;
+then
+ echo "[ERROR] Cmake failed."
+ exit
+fi
+if ! (make);
+then
+ echo "[ERROR] Make failed."
+ exit
+fi
+echo "[INFO] Build successfully."
+
+# Enter previous floder
+cd - || exit
+# Rebuild results folder
+rm -r results
+mkdir -p results
+
+# run
+OM_PATH=$1
+RESULT_PATH=$2
+GEN_NUM=$3
+./build/DCGAN "${OM_PATH}" "${RESULT_PATH}" "${GEN_NUM}"
\ No newline at end of file
diff --git a/cv/image_generation/dcgan/MindSpore/infer/mxbase/src/DCGAN.cpp b/cv/image_generation/dcgan/MindSpore/infer/mxbase/src/DCGAN.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..fd0b30069cbce0ffc6073a20c5e05456f3e44e21
--- /dev/null
+++ b/cv/image_generation/dcgan/MindSpore/infer/mxbase/src/DCGAN.cpp
@@ -0,0 +1,226 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ============================================================================
+ */
+
+#include
+#include
+#include
+#include
+
+#include "DCGAN.h"
+
+InitParam initParam_;
+
+APP_ERROR DCGAN::Init(const InitParam &initParam) {
+ // Param init
+ initParam_ = initParam;
+
+ // Device init
+ APP_ERROR ret = MxBase::DeviceManager::GetInstance()->InitDevices();
+ if (ret != APP_ERR_OK) {
+ LogError << "Init devices failed, ret=" << ret << ".";
+ return ret;
+ }
+
+ // Context init
+ ret = MxBase::TensorContext::GetInstance()->SetContext(initParam.deviceId);
+ if (ret != APP_ERR_OK) {
+ LogError << "Set context failed, ret=" << ret << ".";
+ return ret;
+ }
+
+ // Model init
+ model_ = std::make_shared();
+ ret = model_->Init(initParam.modelPath, modelDesc_);
+ if (ret != APP_ERR_OK) {
+ LogError << "ModelInferenceProcessor init failed, ret=" << ret << ".";
+ return ret;
+ }
+
+ // create random number from normal distribution (mean=0.0, std=1.0)
+ std::mt19937 gen_{1213};
+ std::normal_distribution dis_{0.0, 1.0};
+
+ return ret;
+}
+
+APP_ERROR DCGAN::DeInit() {
+ model_->DeInit();
+ MxBase::DeviceManager::GetInstance()->DestroyDevices();
+
+ return APP_ERR_OK;
+}
+
+APP_ERROR DCGAN::CreateRandomTensorBase(std::vector &inputs) {
+ MxBase::TensorBase tensorBase;
+ size_t D0 = initParam_.batchSize, D1 = 100, D2 = 1, D3 = 1; // D0:batchsize
+ const uint32_t dataSize = D0 * D1 * D2 * D3 * FLOAT32_TYPE_BYTE_NUM;
+
+ float *mat_data = new float[dataSize / FLOAT32_TYPE_BYTE_NUM];
+ for (size_t d0 = 0; d0 < D0; d0++) {
+ for (size_t d1 = 0; d1 < D1; d1++) {
+ for (size_t d2 = 0; d2 < D2; d2++) {
+ for (size_t d3 = 0; d3 < D3; d3++) {
+ int i = d0 * D1 * D2 * D3 + d1 * D2 * D3 + d2 * D3 + d3;
+ mat_data[i] = dis_(gen_);
+ }
+ }
+ }
+ }
+
+ MxBase::MemoryData memoryDataDst(dataSize, MxBase::MemoryData::MEMORY_DEVICE, initParam_.deviceId);
+ MxBase::MemoryData memoryDataSrc(reinterpret_cast(mat_data),
+ dataSize, MxBase::MemoryData::MEMORY_HOST_MALLOC);
+
+ APP_ERROR ret = MxBase::MemoryHelper::MxbsMallocAndCopy(memoryDataDst, memoryDataSrc);
+ if (ret != APP_ERR_OK) {
+ LogError << GetError(ret) << "Memory malloc failed.";
+ return ret;
+ }
+
+ std::vector shape = {static_cast(D0), static_cast(D1),
+ static_cast(D2), static_cast(D3)};
+ tensorBase = MxBase::TensorBase(memoryDataDst, false, shape, MxBase::TENSOR_DTYPE_FLOAT32);
+
+ inputs.push_back(tensorBase);
+
+ return APP_ERR_OK;
+}
+
+APP_ERROR DCGAN::Inference(const std::vector &inputs,
+ std::vector &outputs) {
+ // apply for output Tensor buffer
+ auto dtypes = model_->GetOutputDataType();
+ for (size_t i = 0; i < modelDesc_.outputTensors.size(); ++i) {
+ // shape
+ std::vector shape = {};
+ for (size_t j = 0; j < modelDesc_.outputTensors[i].tensorDims.size(); ++j) {
+ shape.push_back((uint32_t)modelDesc_.outputTensors[i].tensorDims[j]);
+ }
+ // define tensor
+ MxBase::TensorBase tensor(shape, dtypes[i], MxBase::MemoryData::MemoryType::MEMORY_DEVICE, initParam_.deviceId);
+ // request memory
+ APP_ERROR ret = MxBase::TensorBase::TensorBaseMalloc(tensor);
+ if (ret != APP_ERR_OK) {
+ LogError << "TensorBaseMalloc failed, ret=" << ret << ".";
+ return ret;
+ }
+ outputs.push_back(tensor);
+ }
+
+ // dynamic information
+ MxBase::DynamicInfo dynamicInfo = {};
+ dynamicInfo.dynamicType = MxBase::DynamicType::STATIC_BATCH;
+
+ // do inferrnce
+ auto startTime = std::chrono::high_resolution_clock::now();
+ APP_ERROR ret = model_->ModelInference(inputs, outputs, dynamicInfo);
+ auto endTime = std::chrono::high_resolution_clock::now();
+ double costMs = std::chrono::duration(endTime - startTime).count();
+ g_inferCost.push_back(costMs);
+ if (ret != APP_ERR_OK) {
+ LogError << "ModelInference failed, ret=" << ret << ".";
+ return ret;
+ }
+ return APP_ERR_OK;
+}
+
+APP_ERROR DCGAN::PostProcess(std::vector outputs, std::vector &resultMats) {
+ APP_ERROR ret;
+ ret = outputs[0].ToHost();
+ if (ret != APP_ERR_OK) {
+ LogError << GetError(ret) << "tohost fail.";
+ return ret;
+ }
+
+ float *outputPtr = reinterpret_cast(outputs[0].GetBuffer());
+
+ size_t H = initParam_.imageHeight, W = initParam_.imageWidth, C = CHANNEL;
+
+ for (uint32_t b = 0; b < initParam_.batchSize; b++) {
+ cv::Mat resultMat(initParam_.imageHeight, initParam_.imageWidth, CV_8UC3);
+ for (size_t c = 0; c < C; c++) {
+ for (size_t h = 0; h < H; h++) {
+ for (size_t w = 0; w < W; w++) {
+ float *tmpLoc = outputPtr + b * C * H * W + (C - c - 1) * H * W + h * W + w;
+ // denormalize
+ float tmpNum = (*tmpLoc) * NORMALIZE_STD + NORMALIZE_MEAN;
+ // NCHW to NHWC
+ resultMat.at(h, w)[c] = static_cast(tmpNum);
+ }
+ }
+ }
+ resultMats.push_back(resultMat);
+ }
+
+ return ret;
+}
+
+APP_ERROR DCGAN::SaveResult(std::vector &resultMats, const std::string &imgName) {
+ DIR *dirPtr = opendir(initParam_.savePath.c_str());
+ if (dirPtr == nullptr) {
+ std::string path = "mkdir -p " + initParam_.savePath;
+ system(path.c_str());
+ }
+ for (uint32_t b = 0; b < initParam_.batchSize; b++) {
+ std::string file_path = initParam_.savePath + "/" + imgName + "-" + std::to_string(b) + ".jpg";
+ cv::imwrite(file_path, resultMats[b]);
+ std::cout << "[INFO] image saved path: " << file_path << std::endl;
+ }
+
+ return APP_ERR_OK;
+}
+
+APP_ERROR DCGAN::Process(uint32_t gen_id) {
+ APP_ERROR ret;
+
+ // create random tensor
+ std::vector inputs = {};
+ std::vector outputs = {};
+ ret = CreateRandomTensorBase(inputs);
+ if (ret != APP_ERR_OK) {
+ LogError << "CVMatToTensorBase failed, ret=" << ret << ".";
+ return ret;
+ }
+
+ // do inference
+ ret = Inference(inputs, outputs);
+ if (ret != APP_ERR_OK) {
+ LogError << "Inference failed, ret=" << ret << ".";
+ return ret;
+ }
+ std::cout << "[INFO] Inference finished!" << std::endl;
+
+ // do postprocess
+ std::vector resultMats = {};
+ ret = PostProcess(outputs, resultMats);
+ if (ret != APP_ERR_OK) {
+ LogError << "PostProcess failed, ret=" << ret << ".";
+ return ret;
+ }
+ std::cout << "[INFO] Postprocess finished!" << std::endl;
+
+ // save results
+ std::string imgName = std::to_string(gen_id);
+ ret = SaveResult(resultMats, imgName);
+ if (ret != APP_ERR_OK) {
+ LogError << "Save result failed, ret=" << ret << ".";
+ return ret;
+ }
+ std::cout << "[INFO] Result saved successfully!" << std::endl;
+
+ return APP_ERR_OK;
+}
diff --git a/cv/image_generation/dcgan/MindSpore/infer/mxbase/src/DCGAN.h b/cv/image_generation/dcgan/MindSpore/infer/mxbase/src/DCGAN.h
new file mode 100644
index 0000000000000000000000000000000000000000..fc9fe585821404be54be4154b2758542bfbce98f
--- /dev/null
+++ b/cv/image_generation/dcgan/MindSpore/infer/mxbase/src/DCGAN.h
@@ -0,0 +1,72 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ============================================================================
+ */
+
+#ifndef MXBASE_DCGAN_H
+#define MXBASE_DCGAN_H
+
+#include
+
+#include
+#include
+#include
+#include