diff --git a/contrib/SuperResolution/README.md b/contrib/SuperResolution/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d19f1069de8858760db4d5989ea004bf2221a398 --- /dev/null +++ b/contrib/SuperResolution/README.md @@ -0,0 +1,155 @@ +# 基于MxVision的FSRCNN图像超分辨率 + +## 介绍 + +基于MindX SDK 2.0.1 mxVision开发图像超分辨率程序。本程序采用python开发,通过预处理操作对输入的图片数据解码为YUV格式图片,并将解码后的图片缩放到模型推理要求的尺寸。然后利用图像超分辨率模型FSRCNN获取得到图片超分辨率重建结果。最后,利用python的第三方图像处理库PIL将低分辨率的输入图像和超分辨率重建的结果一同可视化。其次,针对两个图片集91-images和General-100进行PSNR(峰值信噪比)验证。 + +程序输入:任意jpg图片 +程序输出:输出得到低分辨率图片(256 x 256px)和超分辨率重建图片(768 x 768px)组合的可视化大图 + +## 目录结构 + +``` +super_resolution +| +|-------- font +| |---- SourceHanSansCN-Normal-2.otf // otf字体 +|-------- image +| |---- test.jpg // 测试图片(需自行准备) +|-------- result // 测试图片程序输出存放处 +|-------- model +| |---- YUV420SP_U8_GRAY.cfg // 模型转换配置文件(灰度图) +| |---- model_conversion.sh // 模型转换脚本 +| |---- FSRCNN_256_256.om // 转换后OM模型 +|-------- testSet +| |---- 91-images-jpg // 91-images图片验证集 +| |---- general-100-jpg // general-100图片验证集 +| |---- output // 验证集结果输出目录 +| |---- evaluate.py // 模型精度验证 +|-------- README.md // ReadMe +|-------- main.py // 图像超分辨率主程序 +|-------- util.py // 公共方法 + +``` + +## 准备工作 + +> 模型转换 + +**步骤1** 获取原始模型网络及权重, [权重下载地址](https://modelzoo-train-atc.obs.cn-north-4.myhuaweicloud.com/003_Atc_Models/AE/ATC%20Model/super_resolution/FSRCNN/FSRCNN.caffemodel)、[网络下载地址](https://modelzoo-train-atc.obs.cn-north-4.myhuaweicloud.com/003_Atc_Models/AE/ATC%20Model/super_resolution/FSRCNN/FSRCNN.prototxt) + +**步骤2** AIPP配置文件-YUV420SP_U8_GRAY.cfg + +```cfg +aipp_op { + aipp_mode: static + input_format : YUV420SP_U8 + csc_switch : true + rbuv_swap_switch : false + matrix_r0c0 : 256 + matrix_r0c1 : 0 + matrix_r0c2 : 0 + matrix_r1c0 : 0 + matrix_r1c1 : 0 + matrix_r1c2 : 0 + matrix_r2c0 : 0 + matrix_r2c1 : 0 + matrix_r2c2 : 0 + input_bias_0 : 0 + input_bias_1 : 0 + input_bias_2 : 0 +} +``` + +色域转换,用于将输入的图片格式,转换为模型需要的图片格式,这里将YUV420SP_U8转GRAY,详细色域转换配置说明请参考[这里](https://support.huawei.com/enterprise/zh/doc/EDOC1100191944/3e66a3c#ZH-CN_TOPIC_0000001095142890) + +**步骤3** 将下载得到模型网络及权重(`FSRCNN.prototxt`、`FSRCNN.caffemodel`)、AIPP配置文件放在 `model` 目录下 + +**步骤4** 使用ATC模型转换工具进行模型转换 + +设置环境变量(请确认install_path路径是否正确) + +``` +export install_path=/usr/local/Ascend/ascend-toolkit/latest +export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH +export PYTHONPATH=${install_path}/atc/python/site-packages:${install_path}/atc/python/site-packages/auto_tune.egg/auto_tune:${install_path}/atc/python/site-packages/schedule_search.egg +export LD_LIBRARY_PATH=${install_path}/atc/lib64:$LD_LIBRARY_PATH +export ASCEND_OPP_PATH=${install_path}/opp +``` + +参照指令执行,转换FSRCNN模型成om格式 + +``` +atc --model=./FSRCNN.prototxt --weight=./FSRCNN.caffemodel --framework=0 --input_format=NCHW --input_shape="data: 1, 1, 256, 256" --output=./FSRCNN_256_256 --soc_version=Ascend310 --output_type=FP32 --insert_op_conf=aYUV420SP_U8_GRAY.cfg +``` + +执行完模型转换脚本后,会生成相应的FSRCNN_256_256.om模型文件。 + +模型转换使用了ATC工具,如需更多信息请参考 [这里]( https://support.huaweicloud.com/tg-cannApplicationDev330/atlasatc_16_0005.html) + + +## 运行 + +1) 配置环境变量 + +```bash +# 执行如下命令,打开.bashrc文件 +cd $HOME +vi .bashrc + +# 在.bashrc文件中添加以下环境变量 +MX_SDK_HOME=${SDK安装路径} +LD_LIBRARY_PATH=${MX_SDK_HOME}/lib:${MX_SDK_HOME}/opensource/lib:${MX_SDK_HOME}/opensource/lib64:/usr/local/Ascend/ascend-toolkit/latest/acllib/lib64:/usr/local/Ascend/driver/lib64/ +GST_PLUGIN_SCANNER=${MX_SDK_HOME}/opensource/libexec/gstreamer-1.0/gst-plugin-scanner +GST_PLUGIN_PATH=${MX_SDK_HOME}/opensource/lib/gstreamer-1.0:${MX_SDK_HOME}/lib/plugins +PYTHONPATH=${MX_SDK_HOME}/python + +# 保存退出.bashrc文件 +# 执行如下命令使环境变量生效 +source ~/.bashrc + +# 查看环境变量 +env +``` + +2) 准备一张室内图片,置于 image 文件夹中(仅支持jpg格式) + +2) 进入工程目录,键入执行指令,发起推理性能测试: + +``` +python3.7 main.py ${测试图片路径} +例如: python3.7 main.py image/head.jpg +``` + +执行完毕后,sample会将程序输出保存在工程目录下`result`中 (文件名和输入图片一致) + + + +## PSNR验证 + +对两个图片集91-images和General-100进行PSNR(峰值信噪比)测试 + +具体命令文件参照如下目录 + +``` +|-------- testSet +| |---- 91-images-jpg // 91-images图片验证集 +| |---- general-100-jpg // general-100图片验证集 +| |---- output // 验证集结果输出目录 +| |---- evaluate.py // 模型精度验证 +``` + +1. 91-images共计91张.bmp图片,General-100共计100张.bmp图片,使用PIL转换为jpg + +2. 将91-images图片输入模型中得到每个jpg图片的PSNR(峰值信噪比)如下 + [33.64 28.86 29.10 28.22 27.60 27.53 27.47 27.43 28.58 27.75 29.77 28.10 28.42 27.47 28.20 27.68 29.88 27.99 32.31 32.64 28.06 27.63 32.66 33.02 30.46 27.62 28.72 30.91 28.53 28.06 27.58 27.38 27.50 27.55 30.04 29.70 29.80 28.23 27.56 29.73 27.48 27.59 27.95 32.77 28.92 29.88 29.60 27.59 30.77 29.94 28.18 32.10 30.69 30.27 35.63 28.35 29.41 27.06 30.12 28.46 29.52 29.01 27.51 29.46 29.70 30.51 27.43 27.53 27.51 28.82 28.06 29.58 28.79 29.93 28.83 28.17 33.54 28.89 27.84 29.64 31.87 30.58 27.54 32.66 27.78 31.56 31.25 28.09 29.24 28.76 28.17] + 计算得到91张图片的平均PSNR avg= 29.23 + +3. 将General-100图片输入模型中得到每个jpg图片的PSNR(峰值信噪比)如下 + [29.09 30.25 28.06 29.66 28.70 29.06 28.23 31.02 30.34 28.04 27.58 28.36 29.46 27.87 27.59 29.78 31.77 29.70 29.56 28.91 29.76 27.70 28.85 27.83 28.43 29.49 28.66 29.53 27.87 30.36 27.57 27.54 34.45 28.10 28.91 28.47 28.62 27.75 29.82 29.00 27.53 26.83 28.26 28.36 28.26 28.19 28.18 28.98 29.44 28.51 27.73 27.93 28.91 28.81 28.50 28.43 28.42 27.32 28.87 29.33 30.58 29.31 29.69 28.45 30.17 33.51 27.67 29.88 27.38 30.73 33.13 31.50 31.45 27.64 28.48 29.98 29.43 28.78 30.10 30.90 27.77 31.66 27.11 29.68 32.94 30.61 28.45 33.29 27.73 30.12 28.11 27.74 29.48 29.42 28.97 28.16 29.49 29.28 28.88 29.56 ] + 计算得到100张图片的平均PSNR avg= 29.16 + +4. 可视化效果 + + ![head](./result/head.jpg) + diff --git a/contrib/SuperResolution/font/SourceHanSansCN-Normal-2.otf b/contrib/SuperResolution/font/SourceHanSansCN-Normal-2.otf new file mode 100644 index 0000000000000000000000000000000000000000..091f56d11b3bce257be0ccf7c30ebed789282667 Binary files /dev/null and b/contrib/SuperResolution/font/SourceHanSansCN-Normal-2.otf differ diff --git a/contrib/SuperResolution/image/bird.jpg b/contrib/SuperResolution/image/bird.jpg new file mode 100644 index 0000000000000000000000000000000000000000..5258345e0f0e425a308fa8ebf9bc3889ce2c81e7 Binary files /dev/null and b/contrib/SuperResolution/image/bird.jpg differ diff --git a/contrib/SuperResolution/image/butterfly.jpg b/contrib/SuperResolution/image/butterfly.jpg new file mode 100644 index 0000000000000000000000000000000000000000..93cb2acb16caea2d577d2cb50d5878201bbc6e34 Binary files /dev/null and b/contrib/SuperResolution/image/butterfly.jpg differ diff --git a/contrib/SuperResolution/image/child.jpg b/contrib/SuperResolution/image/child.jpg new file mode 100644 index 0000000000000000000000000000000000000000..a6820ed441cbcfa7726e834b353ace76a68785c2 Binary files /dev/null and b/contrib/SuperResolution/image/child.jpg differ diff --git a/contrib/SuperResolution/image/head.jpg b/contrib/SuperResolution/image/head.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ff6dae0389c47d571b8e565b2bca9567b949a401 Binary files /dev/null and b/contrib/SuperResolution/image/head.jpg differ diff --git a/contrib/SuperResolution/image/woman.jpg b/contrib/SuperResolution/image/woman.jpg new file mode 100644 index 0000000000000000000000000000000000000000..233a7c6daec529cb33bdc2cd332230369d9b2453 Binary files /dev/null and b/contrib/SuperResolution/image/woman.jpg differ diff --git a/contrib/SuperResolution/main.py b/contrib/SuperResolution/main.py new file mode 100644 index 0000000000000000000000000000000000000000..1cc1d65bc70786495fde5b96a87d08c56a3e43d2 --- /dev/null +++ b/contrib/SuperResolution/main.py @@ -0,0 +1,166 @@ +# !/usr/bin/env python +# -*- encoding: utf-8 -*- +# Copyright(C) 2021. Huawei Technologies Co.,Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os +from StreamManagerApi import StreamManagerApi, MxDataInput, StringVector +import MxpiDataType_pb2 as MxpiDataType +from utils import colorize, calc_psnr +import numpy as np +from PIL import Image +from PIL import ImageFont +from PIL import ImageDraw + + +if __name__ == '__main__': + # init stream manager + streamManagerApi = StreamManagerApi() + ret = streamManagerApi.InitManager() + if ret != 0: + print("Failed to init Stream manager, ret=%s" % str(ret)) + exit() + # create streams by pipeline config file + pipeline = { + "superResolution": { + "stream_config": { + "deviceId": "0" + }, + "appsrc0": { + "props": { + "blocksize": "409600" + }, + "factory": "appsrc", + "next": "mxpi_imagedecoder0" + }, + "mxpi_imagedecoder0": { + "factory": "mxpi_imagedecoder", + "next": "mxpi_imageresize0" + }, + "mxpi_imageresize0": { + "props": { + "dataSource": "mxpi_imagedecoder0", + "resizeHeight": "256", + "resizeWidth": "256" + }, + "factory": "mxpi_imageresize", + "next": "mxpi_tensorinfer0" + }, + "mxpi_tensorinfer0": { + "props": { + "dataSource": "mxpi_imagedecoder0", + "modelPath": "model/FSRCNN_256_256.om" + }, + "factory": "mxpi_tensorinfer", + "next": "appsink0" + }, + "appsink0": { + "props": { + "blocksize": "409600" + }, + "factory": "appsink" + } + } + } + pipelineStr = json.dumps(pipeline).encode() + ret = streamManagerApi.CreateMultipleStreams(pipelineStr) + if ret != 0: + print("Failed to create Stream, ret=%s" % str(ret)) + exit() + + input_image_path = "./image/head.jpg" + if os.path.exists(input_image_path) != 1: + print("The image image does not exist.") + + image = Image.open(input_image_path).convert('RGB') + # 768 x 768 high resolution image and 3x reduced image + hr = image.resize((768, 768), resample=Image.BICUBIC) + lr = hr.resize((256, 256), resample=Image.BILINEAR) + lr.save("./result/lr.jpg") + + # construct the input of the stream + dataInput = MxDataInput() + with open("./result/lr.jpg", 'rb') as f: + dataInput.data = f.read() + os.remove("./result/lr.jpg") + streamName = b'superResolution' + inPluginId = 0 + key = b"mxpi_tensorinfer0" + uniqueId = streamManagerApi.SendData(streamName, inPluginId, dataInput) + if uniqueId < 0: + print("Failed to send data to stream.") + exit() + keys = [b"mxpi_imagedecoder0", b"mxpi_tensorinfer0"] + keyVec = StringVector() + for key in keys: + keyVec.push_back(key) + + inferResult = streamManagerApi.GetProtobuf(streamName, 0, keyVec) + if inferResult.size() == 0: + print("inferResult is null") + exit() + if inferResult[0].errorCode != 0: + print("GetProtobuf error. errorCode=%d, errorMsg=%s" % ( + inferResult[0].errorCode, inferResult[0].messageName.decode())) + exit() + # get the infer result + inferList0 = MxpiDataType.MxpiTensorPackageList() + inferList0.ParseFromString(inferResult[1].messageBuf) + inferVisionData = inferList0.tensorPackageVec[0].tensorVec[0].dataStr + + inferTensorShape = inferList0.tensorPackageVec[0].tensorVec[0].tensorShape + output_pic_data = np.frombuffer(inferVisionData, dtype="