From 2f20cee41114d286548d687bd7fccab1ca432179 Mon Sep 17 00:00:00 2001
From: c00818886 <chenchuwei@huawei.com>
Date: Thu, 19 Oct 2023 17:41:01 +0800
Subject: [PATCH] YoloX torch_aie adaption & README

---
 .../built-in/cv/detection/YoloX/README.md     | 211 ++++++++++++++++++
 .../cv/detection/YoloX/requirements.txt       |  12 +
 .../built-in/cv/detection/YoloX/test.sh       |  62 +++++
 .../YoloX/yolox_coco_evaluator.patch          |  67 ++++++
 .../built-in/cv/detection/YoloX/yolox_eval.py |  96 ++++++++
 .../YoloX/yolox_export_torch_aie_ts.py        |  57 +++++
 6 files changed, 505 insertions(+)
 create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/YoloX/README.md
 create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/YoloX/requirements.txt
 create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/YoloX/test.sh
 create mode 100755 AscendIE/TorchAIE/built-in/cv/detection/YoloX/yolox_coco_evaluator.patch
 create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/YoloX/yolox_eval.py
 create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/YoloX/yolox_export_torch_aie_ts.py
diff --git a/AscendIE/TorchAIE/built-in/cv/detection/YoloX/README.md b/AscendIE/TorchAIE/built-in/cv/detection/YoloX/README.md
new file mode 100644
index 0000000000..28badefec4
--- /dev/null
+++ b/AscendIE/TorchAIE/built-in/cv/detection/YoloX/README.md
@@ -0,0 +1,211 @@
+# YOLOX模型-AIE推理引擎部署指导
+
+- [概述](#ZH-CN_TOPIC_0000001172161501)
+
+    - [输入输出数据](#section540883920406)
+
+- [推理环境准备](#ZH-CN_TOPIC_0000001126281702)
+
+- [模型部署](#ZH-CN_TOPIC_0000001126281700)
+
+  - [安装依赖](#section4622531142816)
+  - [准备数据集](#section183221994411)
+  - [模型推理](#section741711594517)
+
+- [模型推理性能&精度](#ZH-CN_TOPIC_0000001172201573)
+
+******
+
+# 概述<a name="ZH-CN_TOPIC_0000001172161501"></a>
+
+YOLOX是基于往年对YOLO系列众多改进而产生的目标检测模型，其采用无锚方式，并应用了解耦头和领先的标签分配策略 SimOTA.其在众多数据集中均获得了最佳结果。
+
+- 参考实现：
+
+  ```
+  url=https://github.com/Megvii-BaseDetection/YOLOX
+  commit_id=6880e3999eb5cf83037e1818ee63d589384587bd
+  code_path=ACL_PyTorch/contrib/cv/detection/YOLOX
+  model_name=YOLOX
+  ```
+
+## 输入输出数据<a name="section540883920406"></a>
+
+- 输入数据
+
+  | 输入数据 | 数据类型 | 大小                      | 数据排布格式 |
+  | -------- | -------- | ------------------------- | ------------ |
+  | input    | RGB_FP32 | batchsize x 3 x 640 x 640 | NCHW         |
+
+- 输出数据
+
+  | 输出数据 | 数据类型 | 大小    |
+  | -------- | -------- | ------- |
+  | output   | FLOAT32  | 1 x 8400 x 85 |
+
+# 推理环境准备<a name="ZH-CN_TOPIC_0000001126281702"></a>
+
+- 该模型需要以下插件与驱动
+
+  **表 1**  版本配套表
+
+  | 配套                                                            | 版本    | 环境准备指导                                                                                          |
+  | --------------------------------------------------------------- | ------- | ----------------------------------------------------------------------------------------------------- |
+  | 固件与驱动                                                      | 23.0.rc1  | [Pytorch框架推理环境准备](https://www.hiascend.com/document/detail/zh/ModelZoo/pytorchframework/pies) |
+  | CANN                                                            | 7.0.RC1.alpha003 | -                                                                                                     |
+  | Python                                                          | 3.9.11  | -                                                                                                     |
+  | PyTorch                                                         | 2.0.1   | -                                                                                                     |
+  | torch_aie                                                       | 6.3rc2  | 
+  | 说明：Atlas 300I Duo 推理卡请以CANN版本选择实际固件与驱动版本。 | \       | \                                                                                                     |
+
+# 模型部署<a name="ZH-CN_TOPIC_0000001126281700"></a>
+
+## 安装依赖<a name="section4622531142816"></a>
+
+1. 获取源码
+
+   ```shell
+   git clone https://github.com/Megvii-BaseDetection/YOLOX
+   cd YOLOX
+   git reset 6880e3999eb5cf83037e1818ee63d589384587bd --hard
+   patch -p1 < ../yolox_coco_evaluator.patch
+   pip install -v -e .  # or  python3 setup.py develop
+   cd ..
+   ```
+
+2. 安装需要的Python Library
+
+   ```shell
+   apt-get install libprotobuf-dev protobuf-compiler
+   apt-get install libgl1-mesa-glx
+   pip install -r requirements.txt
+   ```
+
+3. 设置环境变量
+
+   执行环境中推理引擎安装路径下的环境变量设置脚本
+
+   ```shell
+   source {aie}/set_env.sh
+   ```
+
+## 准备数据集<a name="section183221994411"></a>
+
+1. 获取原始数据集。（解压命令参考tar –xvf  \*.tar与 unzip \*.zip）
+
+   请参考开源代码仓方式获得[COCO2017数据集](https://cocodataset.org/)，并根据需要置于服务器上（如 `datasets_path=/data/dataset/coco`），val2017目录存放coco数据集的验证集图片，annotations目录存放coco数据集的instances_val2017.json，文件目录结构如下：
+
+   ```
+    data
+    ├── dataset
+    │   ├── coco
+    │   │   ├── annotations
+    │   │   ├── val2017
+   ```
+
+## 模型推理<a name="section741711594517"></a>
+
+### 1. 模型转换
+
+   使用PyTorch将模型权重文件.pth转换为torchscript文件
+
+   1. 获取权重文件
+
+       我们利用官方的PTH文件进行验证，官方PTH文件可从原始开源库中获取，我们需要[yolox_x.pth](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_x.pth)文件,请将其放在与README.md文件同一目录内。
+
+   2. 导出torchscript文件
+
+      ```shell
+      cd YOLOX
+      python tools/export_torchscript.py \
+         --output-name ../yolox.torchscript.pt \
+         -n yolox-x \
+         -c ../yolox_x.pth
+      cd ..
+      ```
+
+      获得yolox.torchscript.pt文件。
+
+      + 参数说明
+         + `--output-name`：输出文件名称
+         + `-n`：模型名称
+         + `-c`：权重文件路径
+
+### 2. 开始推理验证
+
+   1. 执行命令查看芯片名称（$\{chip\_name\}）。
+
+      ```shell
+      npu-smi info
+      #该设备芯片名为Ascend310P3 （在下一步中赋值给soc_version环境变量）
+      回显如下：
+      +-------------------+-----------------+------------------------------------------------------+
+      | NPU     Name      | Health          | Power(W)     Temp(C)           Hugepages-Usage(page) |
+      | Chip    Device    | Bus-Id          | AICore(%)    Memory-Usage(MB)                        |
+      +===================+=================+======================================================+
+      | 0       310P3     | OK              | 15.8         42                0    / 0              |
+      | 0       0         | 0000:82:00.0    | 0            1074 / 21534                            |
+      +===================+=================+======================================================+
+      | 1       310P3     | OK              | 15.4         43                0    / 0              |
+      | 0       1         | 0000:89:00.0    | 0            1070 / 21534                            |
+      +===================+=================+======================================================+
+      ```
+
+   2. 对原生ts文件执行torch_aie编译，导出NPU支持的ts文件
+
+      ```shell
+      soc_version="Ascend310P3" # User-defined
+      python yolox_export_torch_aie_ts.py \
+         --torch-script-path ./yolox.torchscript.pt \
+         --batch-size 1 \
+         --save-path ./yoloxb1_torch_aie.pt \
+         --soc-version ${soc_version}
+      ```
+   + 参数说明
+      + `--torch-script-path`：原生ts文件路径
+      + `--batch-size`：用户自定义的batch size
+      + `--save-path`：AIE编译后的ts文件保存路径
+      + `--soc-version`：NPU型号
+
+   3. 执行推理并验证精度与性能
+   
+      <em>COCO2017数据集需要约5分钟完成所有推理任务，请耐心等待。</em>
+
+      ```shell
+      python yolox_eval.py \
+         --dataroot /data/dataset/coco \
+         --batch 1 \
+         --ts ./yoloxb1_torch_aie.pt
+      ```
+
+      - 参数说明：
+         -   --dataroot：COCO数据集的路径，同上
+         -   --batch：用户自定义的batch size
+         -   --ts：AIE编译后的ts文件路径
+
+      运行成功后将打印该模型在NPU推理结果的精度信息与性能信息。
+
+
+# 模型推理性能&精度<a name="ZH-CN_TOPIC_0000001172201573"></a>
+
+基于推理引擎完成推理计算，精度与性能可参考下列数据：
+
+| Soc version | Batch Size | Dataset | Accuracy ||
+| ----------  | ---------- | ---------- | ---------- | ---------- |
+| 310P3精度   | 1  | coco2017 | Average Precision(IoU=0.50:0.95): 0.498 | Average Precision(IoU=0.50): 0.678 |
+| 310P3精度   | 4  | coco2017 | Average Precision(IoU=0.50:0.95): 0.498 | Average Precision(IoU=0.50): 0.678 |
+| 310P3精度   | 8  | coco2017 | Average Precision(IoU=0.50:0.95): 0.498 | Average Precision(IoU=0.50): 0.678 |
+| 310P3精度   | 20 | coco2017 | Average Precision(IoU=0.50:0.95): 0.498 | Average Precision(IoU=0.50): 0.678 |
+
+| Soc version | Batch Size | Dataset | Performance |
+| -------- | ---------- | ---------- | ---------- |
+| 310P3    | 1          | coco2017 | 35.65 ms/pic |
+| 310P3    | 4          | coco2017 | 33.09 ms/pic |
+| 310P3    | 8          | coco2017 | 33.63 ms/pic |
+| 310P3    | 20         | coco2017 | 32.67 ms/pic |
+
+# FAQ
+1. 若遇到类似报错：ImportError: /lib/aarch64-linux-gnu/libGLdispatch.so.0: cannot allocate memory in static TLS block
+
+   解决方法：
+   export LD_PRELOAD=$LD_PRELOAD:{报错信息中的路径}
\ No newline at end of file
diff --git a/AscendIE/TorchAIE/built-in/cv/detection/YoloX/requirements.txt b/AscendIE/TorchAIE/built-in/cv/detection/YoloX/requirements.txt
new file mode 100644
index 0000000000..92675b2315
--- /dev/null
+++ b/AscendIE/TorchAIE/built-in/cv/detection/YoloX/requirements.txt
@@ -0,0 +1,12 @@
+numpy
+torch==2.0.1
+opencv_python
+loguru
+scikit-image
+tqdm
+torchvision
+Pillow
+thop
+ninja
+tabulate
+tensorboard
diff --git a/AscendIE/TorchAIE/built-in/cv/detection/YoloX/test.sh b/AscendIE/TorchAIE/built-in/cv/detection/YoloX/test.sh
new file mode 100644
index 0000000000..7f767ca7cd
--- /dev/null
+++ b/AscendIE/TorchAIE/built-in/cv/detection/YoloX/test.sh
@@ -0,0 +1,62 @@
+# Copyright(C) 2023. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Description: Test script for running YoloX model with Ascend Inference Engine
+# Author: chenchuwei c00818886
+# Create: 2023/10/20
+
+soc_version="Ascend310P3"
+data_root="/data/dataset/coco"
+batch=1
+
+if [ ! -f "yolox_x.pth" ]; then
+    echo "[ERROR] yolox_x.pth not found in current dir, please make sure it exists."
+    exit 1
+fi
+
+if [ ! -d "YOLOX" ]; then
+    echo "[INFO] Preparing YoloX's dependencies"
+    git clone https://github.com/Megvii-BaseDetection/YOLOX
+    cd YOLOX
+    git reset 6880e3999eb5cf83037e1818ee63d589384587bd --hard
+    patch -p1 < ../yolox_coco_evaluator.patch
+    pip install -v -e .
+    cd ..
+fi
+
+echo "[INFO] Installing Python dependencies"
+apt-get install libprotobuf-dev protobuf-compiler -y
+apt-get install libgl1-mesa-glx -y
+pip install -r requirements.txt
+
+if [ ! -f "yolox.torchscript.pt" ]; then
+    echo "[INFO] Exporting torchscript module"
+    cd YOLOX
+    python tools/export_torchscript.py --output-name ../yolox.torchscript.pt -n yolox-x -c ../yolox_x.pth
+    cd ..
+fi
+
+if [ ! -f "yoloxb${batch}_torch_aie.pt" ]; then
+    echo "[INFO] AIE Compiling"
+    python yolox_export_torch_aie_ts.py \
+        --torch-script-path ./yolox.torchscript.pt \
+        --batch-size ${batch} \
+        --soc-version ${soc_version}
+fi
+
+echo "[INFO] Start AIE evaluation"
+python yolox_eval.py \
+   --dataroot ${data_root} \
+   --batch ${batch} \
+   --ts ./yoloxb${batch}_torch_aie.pt
\ No newline at end of file
diff --git a/AscendIE/TorchAIE/built-in/cv/detection/YoloX/yolox_coco_evaluator.patch b/AscendIE/TorchAIE/built-in/cv/detection/YoloX/yolox_coco_evaluator.patch
new file mode 100755
index 0000000000..a81cb0971e
--- /dev/null
+++ b/AscendIE/TorchAIE/built-in/cv/detection/YoloX/yolox_coco_evaluator.patch
@@ -0,0 +1,67 @@
+diff --git a/yolox/evaluators/coco_evaluator.py b/yolox/evaluators/coco_evaluator.py
+index 96eb56a..64827fc 100644
+--- a/yolox/evaluators/coco_evaluator.py
++++ b/yolox/evaluators/coco_evaluator.py
+@@ -164,33 +164,34 @@ class COCOEvaluator:
+                 data_list.append(pred_data)
+         return data_list
+ 
+-    def evaluate_prediction(self, data_dict, statistics):
++    def evaluate_prediction(self, data_dict, statistics=None):
+         if not is_main_process():
+             return 0, 0, None
+ 
+         logger.info("Evaluate in main process...")
+ 
+         annType = ["segm", "bbox", "keypoints"]
++        
++        if statistics is not None:
++            inference_time = statistics[0].item()
++            nms_time = statistics[1].item()
++            n_samples = statistics[2].item()
++
++            a_infer_time = 1000 * inference_time / (n_samples * self.dataloader.batch_size)
++            a_nms_time = 1000 * nms_time / (n_samples * self.dataloader.batch_size)
++    
++            time_info = ", ".join(
++                [
++                    "Average {} time: {:.2f} ms".format(k, v)
++                    for k, v in zip(
++                        ["forward", "NMS", "inference"],
++                        [a_infer_time, a_nms_time, (a_infer_time + a_nms_time)],
++                    )
++                ]
++            )
+ 
+-        inference_time = statistics[0].item()
+-        nms_time = statistics[1].item()
+-        n_samples = statistics[2].item()
+-
+-        a_infer_time = 1000 * inference_time / (n_samples * self.dataloader.batch_size)
+-        a_nms_time = 1000 * nms_time / (n_samples * self.dataloader.batch_size)
+-
+-        time_info = ", ".join(
+-            [
+-                "Average {} time: {:.2f} ms".format(k, v)
+-                for k, v in zip(
+-                    ["forward", "NMS", "inference"],
+-                    [a_infer_time, a_nms_time, (a_infer_time + a_nms_time)],
+-                )
+-            ]
+-        )
+-
+-        info = time_info + "\n"
+-
++            info = time_info + "\n"
++        info = "\n"
+         # Evaluate the Dt (detection) json comparing with the ground truth
+         if len(data_dict) > 0:
+             cocoGt = self.dataloader.dataset.coco
+@@ -216,6 +217,7 @@ class COCOEvaluator:
+             with contextlib.redirect_stdout(redirect_string):
+                 cocoEval.summarize()
+             info += redirect_string.getvalue()
+-            return cocoEval.stats[0], cocoEval.stats[1], info
++            return info
+         else:
+             return 0, 0, info
diff --git a/AscendIE/TorchAIE/built-in/cv/detection/YoloX/yolox_eval.py b/AscendIE/TorchAIE/built-in/cv/detection/YoloX/yolox_eval.py
new file mode 100644
index 0000000000..3dd182eca6
--- /dev/null
+++ b/AscendIE/TorchAIE/built-in/cv/detection/YoloX/yolox_eval.py
@@ -0,0 +1,96 @@
+# Copyright(C) 2023. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import argparse
+import time
+import torch
+import torch_aie
+import numpy as np
+
+from tqdm import tqdm
+from ais_bench.infer.interface import InferSession
+from yolox.data import COCODataset, ValTransform
+from yolox.evaluators import COCOEvaluator
+from yolox.utils.boxes import postprocess
+from yolox.utils.demo_utils import demo_postprocess
+
+
+def main():
+    print("[INFO] YoloX AIE evaluation process start")
+
+    parser = argparse.ArgumentParser(description="YOLOX Preprocess")
+    parser.add_argument('--dataroot', dest='dataroot',
+                        help='data root dirname', default='/data/datasets/coco',
+                        type=str)
+    parser.add_argument('--batch',
+                        help='validation batch size', default=1,
+                        type=int)
+    parser.add_argument('--ts',
+                        help='root of ts module', default="./yoloxb1_torch_aie.pt",
+                        type=str)
+    opt = parser.parse_args()
+
+    valdataset = COCODataset(
+        data_dir=opt.dataroot,
+        json_file='instances_val2017.json',
+        name="val2017",
+        img_size=(640, 640),
+        preproc=ValTransform(legacy=False),
+    )
+    sampler = torch.utils.data.SequentialSampler(valdataset)
+
+    dataloader_kwargs = {
+        "num_workers": 8, "pin_memory": True, "sampler": sampler, "batch_size": opt.batch
+    }
+
+    val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs)
+
+    torch_aie.set_device(0)
+
+    print("[INFO] Loading TS module")
+    aie_module = torch.jit.load(opt.ts)
+    aie_module.eval()
+
+    aie_cost = 0
+    infer_times = 0
+    data_list = []
+    coco_evaluator = COCOEvaluator(val_loader, img_size=(640, 640), confthre=0.001, nmsthre=0.65, num_classes=80)
+
+    print(f"[INFO] Start AIE inference, please be patient (batch={opt.batch})")
+    for _, datas in enumerate(tqdm(val_loader)):
+        data = datas[0]
+
+        # Inference with AIE-compiled TS module
+        start = time.time()
+        result = aie_module(data)
+        cost = time.time() - start
+
+        outputs = demo_postprocess(result, [640, 640])
+        outputs = postprocess(outputs, num_classes=80, conf_thre=0.001, nms_thre=0.65)
+
+        data_list.extend(coco_evaluator.convert_to_coco_format(outputs, datas[2], datas[3]))
+        aie_cost += cost
+        infer_times += 1
+
+    # Use COCO_Evaluator to evaluate the accuracy
+    coco_result = coco_evaluator.evaluate_prediction(data_list)
+    print(coco_result)
+    print(f'\n[INFO] PT-AIE inference avg cost: {aie_cost / infer_times * 1000 / opt.batch} ms/pic')
+    print(f'[INFO] Total sample count = {infer_times * opt.batch} pics')
+    print('[INFO] YoloX AIE evaluation process finished')
+
+if __name__ == "__main__":
+    main()
diff --git a/AscendIE/TorchAIE/built-in/cv/detection/YoloX/yolox_export_torch_aie_ts.py b/AscendIE/TorchAIE/built-in/cv/detection/YoloX/yolox_export_torch_aie_ts.py
new file mode 100644
index 0000000000..8c1ca305ce
--- /dev/null
+++ b/AscendIE/TorchAIE/built-in/cv/detection/YoloX/yolox_export_torch_aie_ts.py
@@ -0,0 +1,57 @@
+# Copyright(C) 2023. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import os
+import argparse
+import torch
+import torch_aie
+from torch_aie import _enums
+
+
+def export_torch_aie(model_path, batch_size, soc_version, save_path="./"):
+    trace_model = torch.jit.load(model_path)
+    trace_model.eval()
+    input_info = [torch_aie.Input((batch_size, 3, 640, 640))]
+    torch_aie.set_device(0)
+    torchaie_model = torch_aie.compile(
+        trace_model,
+        inputs=input_info,
+        torch_executed_ops=[],
+        precision_policy=_enums.PrecisionPolicy.FP32,
+        soc_version=soc_version,
+        )
+    suffix = os.path.splitext(model_path)[-1]
+    saved_name = os.path.basename(model_path).split('.')[0] + f"b{batch_size}_torch_aie" + suffix
+    torchaie_model.save(os.path.join(save_path, saved_name))
+    print("[INFO] torch_aie compile for YoloX finished, model saved in: ", os.path.join(save_path, saved_name))
+
+
+def parse_opt():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--torch-script-path', type=str, default='./yolox.torchscript.pt', help='trace model path')
+    parser.add_argument('--batch-size', type=int, default=1, help='batch size')
+    parser.add_argument('--save-path', type=str, default='./', help='compiled model path')
+    parser.add_argument('--soc-version', type=str, default='Ascend310P3', help='soc version')
+    opt_args = parser.parse_args()
+    return opt_args
+
+
+def main(opt_args):
+    print("[INFO] torch_aie compile for YoloX start")
+    export_torch_aie(opt_args.torch_script_path, opt_args.batch_size, opt_args.soc_version, opt_args.save_path)
+
+if __name__ == '__main__':
+    opt = parse_opt()
+    main(opt)
\ No newline at end of file
-- 
Gitee