diff --git a/modelzoo/contrib/.keep b/modelzoo/contrib/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modelzoo/contrib/Research/.keep b/modelzoo/contrib/Research/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modelzoo/contrib/Research/cv/.keep b/modelzoo/contrib/Research/cv/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/LICENSE b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..56ee3c8c4cc2b4b32e0975d17258f9ba515fdbcc --- /dev/null +++ b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/README.md b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6f269a50a07f1a76f6d8b050189126966daeeb62 --- /dev/null +++ b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/README.md @@ -0,0 +1,71 @@ +# AdvancedEAST + +实现了AdvancedEAST在天池ICPR数据集上的训练。 +- 参考实现: +``` +url=https://github.com/BaoWentz/AdvancedEAST-PyTorch +branch=master +commit_id=a835c8cedce4ada1bc9580754245183d9f4aaa17 +``` + +## AdvancedEAST Detail + +- 为数据集前处理增加了多线程优化 +- 增加了混合精度训练 +- 增加了多卡分布式训练 +- 增加了CosineAnnealingLR +- 优化了loss在NPU上的计算效率 + +## Requirements + +- CANN 5.0.2及对应版本的PyTorch +- `pip install -r requirements.txt` +- 下载[天池ICPR数据集](https://pan.baidu.com/s/1NSyc-cHKV3IwDo6qojIrKA),密码: ye9y + - 下载ICPR_text_train_part2_20180313.zip和[update] ICPR_text_train_part1_20180316.zip两个压缩包,新建目录icpr和子目录icpr/image_10000、icpr/txt_10000,将压缩包中image_9000、image_1000中的图片文件解压至image_10000中,将压缩包中txt_9000、txt_1000中的标签文件解压至txt_10000中 + - `bash test/prep_dataset.sh` + +## Training + +依次训练size为256x256,384x384,512x512,640x640,736x736的图片,每个size加载上个size的训练结果,加速模型收敛。 + +```bash +# 1p train perf +bash test/train_performance_1p.sh + +# 8p train perf +bash test/train_performance_8p.sh + +# 8p train full +bash test/train_full_8p.sh +# 默认依次训练256,384,512,640,736五个size,可以指定要训练size,用于恢复中断的训练,例如 +# bash test/train_full_8p.sh 640 736 + +# eval +bash test/train_eval.sh +# 默认评估736 size,可以指定要评估的size,例如 +# bash test/train_eval.sh 640 + +# finetuning +bash test/train_finetune_1p.sh + +# online inference demo +python3.7 demo.py + +# To ONNX +python3.7 pth2onnx.py +``` + +## AdvancedEAST training result + +| Size | F1-score | FPS | Npu_nums | Epochs | AMP_Type | +| :------: | :------: | :------: | :------: | :------: | :------: | +| 256 | - | 254 | 1 | - | O1 | +| 256 | - | 1075 | 8 | 60 | O1 | +| 384 | - | 118 | 1 | - | O1 | +| 384 | - | 680 | 8 | 60 | O1 | +| 512 | - | 63 | 1 | - | O1 | +| 512 | - | 400 | 8 | 60 | O1 | +| 640 | - | 37 | 1 | - | O1 | +| 640 | - | 243 | 8 | 60 | O1 | +| 736 | - | 34 | 1 | - | O1 | +| 736 | 62.41% | 218 | 8 | 60 | O1 | diff --git a/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/README.raw.md b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/README.raw.md new file mode 100644 index 0000000000000000000000000000000000000000..7e0639054a616a8543dd4557f0af691f8931ea4e --- /dev/null +++ b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/README.raw.md @@ -0,0 +1,69 @@ +# AdvancedEAST +AdvancedEAST-PyTorch is mainly inherited from +[AdvancedEAST](https://github.com/huoyijie/AdvancedEAST), +also we made some changes for better usage in PyTorch. +If this project is helpful to you, welcome to star. + +# New features +* writen in PyTorch, easy to read and run +* change the dataset into LMDB format, reduce I/O overhead +* added precision/recall/F1_score output which is helpful when training the model +* just run `train.py` to automatically start training + +# Project files +* config file: `cfg.py`, control parameters +* pre-process data: `preprocess.py` , resize image +* generate LMDB dataset: `imgs2LMDB.py` +* **[optional]** *label data: `label.py`, produce label info* +* define network: `model_VGG.py` +* define loss function: `losses.py` +* execute training: `train.py` +* read LMDB dataset: `dataset.py` +* predict: `predict.py` and `nms.py` +* evaluate the model: `utils.py` + +# Network arch +* AdvancedEast + +![AdvancedEast network arch](image/AdvancedEast.network.png "AdvancedEast network arch") + +[原理简介(含原理图)](https://huoyijie.cn/blog/9a37ea00-755f-11ea-98d3-6d733527e90f/play) + +[后置处理(含原理图)](https://huoyijie.cn/blog/82c8e470-7562-11ea-98d3-6d733527e90f/play) + +# Setup +* python 3.6.5 +* PyTorch-gpu 1.4.0 +* lmdb 0.98 +* numpy 1.19.0 +* tqdm 4.48.0 +* natsort 7.0.1 +* openCV 4.2.0 +* shapely 1.7.0 +* **[optional]** torchsummary + +# Training +* tianchi ICPR dataset download +链接: https://pan.baidu.com/s/1NSyc-cHKV3IwDo6qojIrKA 密码: ye9y + +* prepare training data: make data root dir(train_1000), +copy images to root dir, and copy txts to root dir, +data format details could refer to [ICPR MTWI 2018 挑战赛二:网络图像的文本检测](https://tianchi.aliyun.com/competition/introduction.htm?spm=5176.100066.0.0.3bcad780oQ9Ce4&raceId=231651) +* modify config params in `cfg.py`, see default values +* **[optional]** ```python preprocess.py```, resize image to 256X256, 384X384, 512X512, 640X640, 736X736, +and train one by one could speed up training process(依次训练可以加速模型收敛) +* **[optional]** ```python imgs2LMDB.py```, generate LMDB sataset +* ```python train.py```, train entrance +* ```python predict.py -p demo/001.png```, to predict +* pretrain model download(use for further training or test) +链接: 链接: https://pan.baidu.com/s/1q473YIt2b18RqpOT8rdY6g 提取码: nkit + +# License +The codes are released under the MIT License. + +# References +* [EAST:An Efficient and Accurate Scene Text Detector](https://arxiv.org/abs/1704.03155v2) + +* [CTPN:Detecting Text in Natural Image with Connectionist Text Proposal Network](https://arxiv.org/abs/1609.03605) + +* [Deep Matching Prior Network: Toward Tighter Multi-oriented Text Detection](https://arxiv.org/abs/1703.01425) diff --git a/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/cfg.py b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/cfg.py new file mode 100644 index 0000000000000000000000000000000000000000..68412afa23c503eef59ad41c8984c667ab5af260 --- /dev/null +++ b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/cfg.py @@ -0,0 +1,119 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument('--apex', action='store_true', help='Use apex for mixed precision training') +parser.add_argument('--device', default='npu', help='cpu npu') +parser.add_argument('--size', default=736, type=int) +parser.add_argument('--local_rank', default=-1, type=int) +parser.add_argument('--img_path', default='demo/004.jpg') +parser.add_argument('--threshold', default=0.9, type=float) +parser.add_argument('--pth_path') +parser.add_argument('--epoch_num', default=60, type=int) +parser.add_argument('--val_interval', default=3, type=int) + +args = parser.parse_args() +amp = args.apex +device = args.device +local_rank = args.local_rank +img_path = args.img_path +predict_threshold = args.threshold +pth_path = args.pth_path +distributed = False if local_rank == -1 else True +is_master_node = True if local_rank < 1 else False +world_size = int(os.environ['WORLD_SIZE']) if distributed else 1 + +train_task_id = '3T' + str(args.size) +if is_master_node: + print(train_task_id) +initial_epoch = 0 +epoch_num = args.epoch_num +lr = 5e-4 +decay = 5e-4 +# clipvalue = 0.5 # default 0.5, 0 means no clip +workers = 16 +patience = 5 +val_interval = args.val_interval +load_weights = False +lambda_inside_score_loss = 4.0 +lambda_side_vertex_code_loss = 1.0 +lambda_side_vertex_coord_loss = 1.0 + +total_img = 10000 +validation_split_ratio = 0.1 +max_train_img_size = int(train_task_id[-3:]) +max_predict_img_size = int(train_task_id[-3:]) # 2400 +assert max_train_img_size in [256, 384, 512, 640, 736], \ + 'max_train_img_size must in [256, 384, 512, 640, 736]' +if max_train_img_size == 256: + batch_size = 8 +elif max_train_img_size == 384: + batch_size = 4 +elif max_train_img_size == 512: + batch_size = 2 +else: + batch_size = 1 +batch_size = batch_size * 4 +steps_per_epoch = total_img * (1 - validation_split_ratio) // batch_size +validation_steps = total_img * validation_split_ratio // batch_size + +data_dir = '/home/lcy/AdvancedEast/AdvancedEAST/icpr/' +origin_image_dir_name = 'image_10000/' +origin_txt_dir_name = 'txt_10000/' +train_image_dir_name = 'images_%s/' % train_task_id +train_label_dir_name = 'labels_%s/' % train_task_id +show_gt_image_dir_name = 'show_gt_images_%s/' % train_task_id +show_act_image_dir_name = 'show_act_images_%s/' % train_task_id +lmdb_trainset_dir_name = data_dir + 'Lmdb_trainset_%s/' % train_task_id +lmdb_valset_dir_name = data_dir + 'Lmdb_valset_%s/' % train_task_id +gen_origin_img = True +draw_gt_quad = True +draw_act_quad = True +val_fname = 'val_%s.txt' % train_task_id +train_fname = 'train_%s.txt' % train_task_id +# in paper it's 0.3, maybe to large to this problem +shrink_ratio = 0.2 +# pixels between 0.2 and 0.6 are side pixels +shrink_side_ratio = 0.6 +epsilon = 1e-4 + +num_channels = 3 +feature_layers_range = range(5, 1, -1) +# feature_layers_range = range(3, 0, -1) +feature_layers_num = len(feature_layers_range) +# pixel_size = 4 +pixel_size = 2 ** feature_layers_range[-1] +locked_layers = False # 是否冻结前两层参数 + +if not os.path.exists('saved_model'): + os.makedirs('saved_model', exist_ok=True) + +saved_model = '' +model_weights_path = 'model/weights_%s.{epoch:03d}-{val_loss:.3f}.h5' \ + % train_task_id +saved_model_file_path = 'saved_model/east_model_%s.h5' % train_task_id +saved_model_weights_file_path = 'saved_model/adEAST_iter_%s.pth'\ + % str(epoch_num + 1) + +pixel_threshold = 0.9 +side_vertex_pixel_threshold = 0.9 +trunc_threshold = 0.1 +iou_threshold = 0.5 +predict_cut_text_line = False +predict_write2txt = True +model_summary = False +quiet = True diff --git a/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/dataset.py b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..261035ba39b4d19a125fde82713267a2445ef87e --- /dev/null +++ b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/dataset.py @@ -0,0 +1,97 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import numpy as np +import time + +import cfg +from PIL import Image +import torch +from torch.utils.data import Dataset +from torchvision import transforms + + +class RawDataset(Dataset): + + def __init__(self, is_val=False): + self.img_h, self.img_w = cfg.max_train_img_size, cfg.max_train_img_size + if is_val: + with open(os.path.join(cfg.data_dir, cfg.val_fname), 'r') as f_val: + f_list = f_val.readlines() + else: + with open(os.path.join(cfg.data_dir, cfg.train_fname), 'r') as f_train: + f_list = f_train.readlines() + + self.image_path_list = [] + self.labels_path_dic = {} + self.gt_xy_list_path_dic = {} + for f_line in f_list: + img_filename = str(f_line).strip().split(',')[0] + img_path = os.path.join(cfg.data_dir, cfg.train_image_dir_name, img_filename) + self.image_path_list.append(img_path) + gt_file = os.path.join(cfg.data_dir, cfg.train_label_dir_name, img_filename[:-4] + '_gt.npy') + gt_xy_list = os.path.join(cfg.data_dir, cfg.train_label_dir_name, img_filename[:-4] + '.npy') + self.labels_path_dic[img_path] = gt_file + self.gt_xy_list_path_dic[img_path] = gt_xy_list + self.image_path_list.sort() + self.nSamples = len(self.image_path_list) + + def __len__(self): + return self.nSamples + + def __getitem__(self, index): + + img_path = self.image_path_list[index] + label = np.load(self.labels_path_dic[img_path]) + gt_xy_list = np.load(self.gt_xy_list_path_dic[img_path]) + try: + img = Image.open(img_path).convert('RGB') # for color image + + except IOError: + print(f'Corrupted image for {index}') + # make dummy image and dummy label for corrupted image. + img = Image.new('RGB', (self.img_w, self.img_h)) + img_tensor = transforms.ToTensor()(img) + label = np.transpose(label, (2, 0, 1)) + + return (img_tensor, label, gt_xy_list) + + +def data_collate(batch): + imgs = [] + labels = [] + gt_xy_list = [] # 长度为N的列表,每个值为该图片中所有矩形框的坐标 + # 例如:[(31, 4, 2), (10, 4, 2), (47, 4, 2), (28, 4, 2)] + for info in batch: + imgs.append(info[0]) + labels.append(info[1]) + gt_xy_list.append(info[2]) + return torch.stack(imgs, 0), torch.tensor(np.array(labels)), gt_xy_list + + +if __name__ == '__main__': + tick = time.time() + train_dataset = RawDataset(is_val=False) + data_loader_A = torch.utils.data.DataLoader( + train_dataset, batch_size=cfg.batch_size, + collate_fn=data_collate, + shuffle=True, + num_workers=int(cfg.workers), + pin_memory=True) + for i, (image_tensors, labels, gt_xy_list) in enumerate(data_loader_A): + print(image_tensors.shape, labels.shape) + tock = time.time() + print(tock-tick) + diff --git a/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/demo.py b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/demo.py new file mode 100644 index 0000000000000000000000000000000000000000..ac5c9ec1f73d465ea34fb3549bc433756655c933 --- /dev/null +++ b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/demo.py @@ -0,0 +1,161 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import numpy as np +from PIL import Image, ImageDraw +import torch +from torchvision import transforms + +import cfg +from label import point_inside_of_quad +from model_VGG import advancedEAST +from preprocess import resize_image +from nms import nms + + +def sigmoid(x): + """`y = 1 / (1 + exp(-x))`""" + return 1 / (1 + np.exp(-x)) + + +def cut_text_line(geo, scale_ratio_w, scale_ratio_h, im_array, img_path, s): + geo /= [scale_ratio_w, scale_ratio_h] + p_min = np.amin(geo, axis=0) + p_max = np.amax(geo, axis=0) + min_xy = p_min.astype(int) + max_xy = p_max.astype(int) + 2 + sub_im_arr = im_array[min_xy[1]:max_xy[1], min_xy[0]:max_xy[0], :].copy() + for m in range(min_xy[1], max_xy[1]): + for n in range(min_xy[0], max_xy[0]): + if not point_inside_of_quad(n, m, geo, p_min, p_max): + sub_im_arr[m - min_xy[1], n - min_xy[0], :] = 255 + sub_im = Image.fromarray(sub_im_arr.astype('uint8')).convert('RGB') + sub_im.save(img_path + '_subim%d.jpg' % s) + + +def predict(east_detect, img_path, pixel_threshold, quiet=False): + img = Image.open(img_path) # 为PIL图像对象,默认RGB + d_wight, d_height = resize_image(img, cfg.max_predict_img_size) + img = img.resize((d_wight, d_height), Image.NEAREST).convert('RGB') + x = transforms.ToTensor()(img) + x = torch.unsqueeze(x, 0) # 增加一个维度 + y = east_detect(x) + y = torch.squeeze(y, 0) # 减少一个维度 + print(y.shape) + y = y.detach().numpy() # 7*64*64 + if y.shape[0] == 7: + y = y.transpose((1, 2, 0)) # CHW->HWC + y[:, :, :3] = sigmoid(y[:, :, :3]) + cond = np.greater_equal(y[:, :, 0], pixel_threshold) + activation_pixels = np.where(cond) + quad_scores, quad_after_nms = nms(y, activation_pixels) + with Image.open(img_path) as im: + im_array = np.array(im.convert('RGB')) # 图片转为numpy数组 + d_wight, d_height = resize_image(im, cfg.max_predict_img_size) + scale_ratio_w = d_wight / im.width + scale_ratio_h = d_height / im.height + im = im.resize((d_wight, d_height), Image.NEAREST).convert('RGB') + quad_im = im.copy() + draw = ImageDraw.Draw(im) + for i, j in zip(activation_pixels[0], activation_pixels[1]): + px = (j + 0.5) * cfg.pixel_size + py = (i + 0.5) * cfg.pixel_size + line_width, line_color = 1, 'red' + if y[i, j, 1] >= cfg.side_vertex_pixel_threshold: + if y[i, j, 2] < cfg.trunc_threshold: + line_width, line_color = 2, 'yellow' + elif y[i, j, 2] >= 1 - cfg.trunc_threshold: + line_width, line_color = 2, 'green' + draw.line([(px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size), + (px + 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size), + (px + 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size), + (px - 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size), + (px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size)], + width=line_width, fill=line_color) + im.save(img_path + '_act.jpg') + quad_draw = ImageDraw.Draw(quad_im) + txt_items = [] + for score, geo, s in zip(quad_scores, quad_after_nms, + range(len(quad_scores))): + if np.amin(score) > 0: + quad_draw.line([tuple(geo[0]), + tuple(geo[1]), + tuple(geo[2]), + tuple(geo[3]), + tuple(geo[0])], width=2, fill='red') + if cfg.predict_cut_text_line: + cut_text_line(geo, scale_ratio_w, scale_ratio_h, im_array, + img_path, s) + rescaled_geo = geo / [scale_ratio_w, scale_ratio_h] # (N, 4, 2)标签坐标 + rescaled_geo_list = np.reshape(rescaled_geo, (8,)).tolist() + txt_item = ','.join(map(str, rescaled_geo_list)) + txt_items.append(txt_item + '\n') + elif not quiet: + print('quad invalid with vertex num less then 4.') + quad_im.save(img_path + '_predict.jpg') + if cfg.predict_write2txt and len(txt_items) > 0: + with open(img_path[:-4] + '.txt', 'w') as f_txt: + f_txt.writelines(txt_items) + + +def predict_txt(east_detect, img_path, txt_path, pixel_threshold, quiet=False): + img = Image.open(img_path) # 为PIL图像对象,默认RGB + d_wight, d_height = resize_image(img, cfg.max_predict_img_size) + scale_ratio_w = d_wight / img.width + scale_ratio_h = d_height / img.height + transform = transforms.Compose([ + transforms.Resize((d_wight, d_height), interpolation=2), + transforms.ToTensor() + ]) + x = transform(img) + x = torch.unsqueeze(x, 0) # 增加一个维度 + y = east_detect(x) + y = torch.squeeze(y, 0) # 减少一个维度 + print(y.shape) + y = y.detach().numpy() # 7*64*64 + if y.shape[0] == 7: + y = y.transpose((1, 2, 0)) # CHW->HWC + y[:, :, :3] = sigmoid(y[:, :, :3]) + cond = np.greater_equal(y[:, :, 0], pixel_threshold) + activation_pixels = np.where(cond) + quad_scores, quad_after_nms = nms(y, activation_pixels) + + txt_items = [] + for score, geo in zip(quad_scores, quad_after_nms): + if np.amin(score) > 0: + rescaled_geo = geo / [scale_ratio_w, scale_ratio_h] + rescaled_geo_list = np.reshape(rescaled_geo, (8,)).tolist() + txt_item = ','.join(map(str, rescaled_geo_list)) + txt_items.append(txt_item + '\n') + elif not quiet: + print('quad invalid with vertex num less then 4.') + if cfg.predict_write2txt and len(txt_items) > 0: + with open(txt_path, 'w') as f_txt: + f_txt.writelines(txt_items) + + +if __name__ == '__main__': + if not os.path.exists('demo'): + os.makedirs('./demo', exist_ok=True) + img_path = cfg.img_path + threshold = float(cfg.predict_threshold) + pth_path = cfg.pth_path if cfg.pth_path else 'saved_model/3T736_latest.pth' + print(img_path, threshold) + + east = advancedEAST() + state_dict = {k.replace('module.', ''): v for k, v in torch.load(pth_path, map_location='cpu').items()} + east.load_state_dict(state_dict) + predict(east, img_path, threshold) diff --git a/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/env_npu.sh b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/env_npu.sh new file mode 100644 index 0000000000000000000000000000000000000000..408016ed1975bcebbe25517da3783c56547d43e3 --- /dev/null +++ b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/env_npu.sh @@ -0,0 +1,69 @@ +#!/bin/bash +export install_path=/usr/local/Ascend + +if [ -d ${install_path}/toolkit ]; then + export LD_LIBRARY_PATH=/usr/include/hdf5/lib/:/usr/local/:/usr/local/lib/:/usr/lib/:${install_path}/fwkacllib/lib64/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons:${path_lib}:${LD_LIBRARY_PATH} + export PATH=${install_path}/fwkacllib/ccec_compiler/bin:${install_path}/fwkacllib/bin:$PATH + export PYTHONPATH=${install_path}/fwkacllib/python/site-packages:${install_path}/tfplugin/python/site-packages:${install_path}/toolkit/python/site-packages:$PYTHONPATH + export PYTHONPATH=/usr/local/python3.7.5/lib/python3.7/site-packages:$PYTHONPATH + export ASCEND_OPP_PATH=${install_path}/opp +else + if [ -d ${install_path}/nnae/latest ];then + export LD_LIBRARY_PATH=/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:${install_path}/nnae/latest/fwkacllib/lib64/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64_64-linux-gnu:$LD_LIBRARY_PATH + export PATH=$PATH:${install_path}/nnae/latest/fwkacllib/ccec_compiler/bin/:${install_path}/nnae/latest/toolkit/tools/ide_daemon/bin/ + export ASCEND_OPP_PATH=${install_path}/nnae/latest/opp/ + export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so + export PYTHONPATH=${install_path}/nnae/latest/fwkacllib/python/site-packages/:${install_path}/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH + export ASCEND_AICPU_PATH=${install_path}/nnae/latest + else + export LD_LIBRARY_PATH=/usr/local/:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64-linux-gnu:$LD_LIBRARY_PATH + export PATH=$PATH:${install_path}/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:${install_path}/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/ + export ASCEND_OPP_PATH=${install_path}/ascend-toolkit/latest/opp/ + export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so + export PYTHONPATH=${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH + export ASCEND_AICPU_PATH=${install_path}/ascend-toolkit/latest + fi +fi + + +#将Host日志输出到串口,0-关闭/1-开启 +export ASCEND_SLOG_PRINT_TO_STDOUT=0 +#设置默认日志级别,0-debug/1-info/2-warning/3-error +export ASCEND_GLOBAL_LOG_LEVEL=3 +#设置Event日志开启标志,0-关闭/1-开启 +export ASCEND_GLOBAL_EVENT_ENABLE=0 +#设置是否开启taskque,0-关闭/1-开启 +export TASK_QUEUE_ENABLE=1 +#设置是否开启PTCopy,0-关闭/1-开启 +export PTCOPY_ENABLE=1 +#设置是否开启combined标志,0-关闭/1-开启 +export COMBINED_ENABLE=1 +#设置特殊场景是否需要重新编译,不需要修改 +export DYNAMIC_OP="ADD#MUL" +#HCCL白名单开关,1-关闭/0-开启 +export HCCL_WHITELIST_DISABLE=1 +export HCCL_IF_IP=$(hostname -I |awk '{print $1}') + +ulimit -SHn 512000 + +path_lib=$(python3.7 -c """ +import sys +import re +result='' +for index in range(len(sys.path)): + match_sit = re.search('-packages', sys.path[index]) + if match_sit is not None: + match_lib = re.search('lib', sys.path[index]) + + if match_lib is not None: + end=match_lib.span()[1] + result += sys.path[index][0:end] + ':' + + result+=sys.path[index] + '/torch/lib:' +print(result)""" +) + +echo ${path_lib} + +export LD_LIBRARY_PATH=/usr/local/python3.7.5/lib/:${path_lib}:$LD_LIBRARY_PATH + diff --git a/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/eval.py b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/eval.py new file mode 100644 index 0000000000000000000000000000000000000000..eee11d8592927e0f43f4ff9aa909403932f2017c --- /dev/null +++ b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/eval.py @@ -0,0 +1,96 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import torch +if torch.__version__>= '1.8.1': + import torch_npu +import torch.utils.data +import multiprocessing +import numpy as np + +from model_VGG import advancedEAST +from losses import quad_loss +from dataset import RawDataset, data_collate +from utils import Averager, eval_pre_rec_f1 +import cfg + +device = torch.device(cfg.device) +#device = torch.device("cpu") + +def eval_func(i, out, gt_xy_list): + eval_p_r_f = eval_pre_rec_f1() + eval_p_r_f.add(out, gt_xy_list) + mPre, mRec, mF1_score = eval_p_r_f.val() + np.save('/home/lcy/AdvancedEast/AdvancedEAST/val_temp/{}.npy'.format(str(i)), [mPre, mRec, mF1_score]) + eval_p_r_f.reset() + + +def eval(): + """ dataset preparation """ + val_dataset = RawDataset(is_val=True) + + valid_loader = torch.utils.data.DataLoader( + val_dataset, + batch_size=4, + collate_fn=data_collate, + shuffle=False, + num_workers=4, + pin_memory=True) + + model = advancedEAST() + state_dict = {k.replace('module.', ''): v for k, v in torch.load(cfg.pth_path, map_location='cpu').items()} + model.load_state_dict(state_dict) + model = model.to(device) + model.eval() + loss_func = quad_loss + val_loss_avg = Averager() + val_Loss_list = [] + thread_pool = multiprocessing.Pool(multiprocessing.cpu_count()) + i = 0 + for image_tensors, labels, gt_xy_list in valid_loader: + batch_x = image_tensors.float().to(device) + batch_y = labels.float().to(device) + + out = model(batch_x) + loss = loss_func(batch_y, out) + + val_loss_avg.add(loss) + val_Loss_list.append(val_loss_avg.val()) + thread_pool.apply_async(eval_func, args=(i, out.cpu().detach(), gt_xy_list)) + i += 1 + + thread_pool.close() + thread_pool.join() + + print('loss:{:.3f}'.format(val_loss_avg.val().item())) + val_loss_avg.reset() + + mPre = mRec = mF1_score = 0 + size = len(valid_loader) + for i in range(size): + arr = np.load('val_temp/{}.npy'.format(str(i))) + mPre += arr[0] + mRec += arr[1] + mF1_score += arr[2] + mPre /= size + mRec /= size + mF1_score /= size + print('precision:{:.2f}% recall:{:.2f}% f1-score:{:.2f}%'.format(mPre, mRec, mF1_score)) + + +if __name__ == '__main__': + os.makedirs('val_temp', exist_ok=True) + eval() diff --git a/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/label.py b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/label.py new file mode 100644 index 0000000000000000000000000000000000000000..4fe9cb3e3c7e402d8b604412bf3e7ee6b2c92f5d --- /dev/null +++ b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/label.py @@ -0,0 +1,200 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import os +from PIL import Image, ImageDraw +import math +import multiprocessing +import cfg + + +def point_inside_of_quad(px, py, quad_xy_list, p_min, p_max): + if (p_min[0] <= px <= p_max[0]) and (p_min[1] <= py <= p_max[1]): + xy_list = np.zeros((4, 2)) + xy_list[:3, :] = quad_xy_list[1:4, :] - quad_xy_list[:3, :] + xy_list[3] = quad_xy_list[0, :] - quad_xy_list[3, :] + yx_list = np.zeros((4, 2)) + yx_list[:, :] = quad_xy_list[:, -1:-3:-1] + a = xy_list * ([py, px] - yx_list) + b = a[:, 0] - a[:, 1] + if np.amin(b) >= 0 or np.amax(b) <= 0: + return True + else: + return False + else: + return False + + +def point_inside_of_nth_quad(px, py, xy_list, shrink_1, long_edge): + nth = -1 + vs = [[[0, 0, 3, 3, 0], [1, 1, 2, 2, 1]], + [[0, 0, 1, 1, 0], [2, 2, 3, 3, 2]]] + for ith in range(2): + quad_xy_list = np.concatenate(( + np.reshape(xy_list[vs[long_edge][ith][0]], (1, 2)), + np.reshape(shrink_1[vs[long_edge][ith][1]], (1, 2)), + np.reshape(shrink_1[vs[long_edge][ith][2]], (1, 2)), + np.reshape(xy_list[vs[long_edge][ith][3]], (1, 2))), axis=0) + p_min = np.amin(quad_xy_list, axis=0) + p_max = np.amax(quad_xy_list, axis=0) + if point_inside_of_quad(px, py, quad_xy_list, p_min, p_max): + if nth == -1: + nth = ith + else: + nth = -1 + break + return nth + + +def shrink(xy_list, ratio=cfg.shrink_ratio): + if ratio == 0.0: + return xy_list, xy_list + diff_1to3 = xy_list[:3, :] - xy_list[1:4, :] + diff_4 = xy_list[3:4, :] - xy_list[0:1, :] + diff = np.concatenate((diff_1to3, diff_4), axis=0) + dis = np.sqrt(np.sum(np.square(diff), axis=-1)) # 计算四条边的长度 + # determine which are long or short edges + long_edge = int(np.argmax(np.sum(np.reshape(dis, (2, 2)), axis=0))) # 0或者1 + short_edge = 1 - long_edge + # cal r length array + r = [np.minimum(dis[i], dis[(i + 1) % 4]) for i in range(4)] + # cal theta array + diff_abs = np.abs(diff) + diff_abs[:, 0] += cfg.epsilon + theta = np.arctan(diff_abs[:, 1] / diff_abs[:, 0]) + # shrink two long edges + temp_new_xy_list = np.copy(xy_list) + shrink_edge(xy_list, temp_new_xy_list, long_edge, r, theta, ratio) + shrink_edge(xy_list, temp_new_xy_list, long_edge + 2, r, theta, ratio) + # shrink two short edges + new_xy_list = np.copy(temp_new_xy_list) + shrink_edge(temp_new_xy_list, new_xy_list, short_edge, r, theta, ratio) + shrink_edge(temp_new_xy_list, new_xy_list, short_edge + 2, r, theta, ratio) + return temp_new_xy_list, new_xy_list, long_edge # 缩短后的长边,缩短后的短边,长边下标 + + +def shrink_edge(xy_list, new_xy_list, edge, r, theta, ratio=cfg.shrink_ratio): # 缩短一条边 + if ratio == 0.0: + return + start_point = edge # 边的起始点下标(0或1) + end_point = (edge + 1) % 4 + long_start_sign_x = np.sign( + xy_list[end_point, 0] - xy_list[start_point, 0]) + new_xy_list[start_point, 0] = \ + xy_list[start_point, 0] + \ + long_start_sign_x * ratio * r[start_point] * np.cos(theta[start_point]) + long_start_sign_y = np.sign( + xy_list[end_point, 1] - xy_list[start_point, 1]) + new_xy_list[start_point, 1] = \ + xy_list[start_point, 1] + \ + long_start_sign_y * ratio * r[start_point] * np.sin(theta[start_point]) + # long edge one, end point + long_end_sign_x = -1 * long_start_sign_x + new_xy_list[end_point, 0] = \ + xy_list[end_point, 0] + \ + long_end_sign_x * ratio * r[end_point] * np.cos(theta[start_point]) + long_end_sign_y = -1 * long_start_sign_y + new_xy_list[end_point, 1] = \ + xy_list[end_point, 1] + \ + long_end_sign_y * ratio * r[end_point] * np.sin(theta[start_point]) + + +def gen_gt_npy(data_dir, f_list): + for line in f_list: + line_cols = str(line).strip().split(',') + img_name, width, height = \ + line_cols[0].strip(), int(line_cols[1].strip()), \ + int(line_cols[2].strip()) + gt = np.zeros((height // cfg.pixel_size, width // cfg.pixel_size, 7)) + train_label_dir = os.path.join(data_dir, cfg.train_label_dir_name) # 'labels_%s/' % train_task_id + xy_list_array = np.load(os.path.join(train_label_dir, + img_name[:-4] + '.npy')) + train_image_dir = os.path.join(data_dir, cfg.train_image_dir_name) + with Image.open(os.path.join(train_image_dir, img_name)) as im: + draw = ImageDraw.Draw(im) + for xy_list in xy_list_array: + _, shrink_xy_list, _ = shrink(xy_list, cfg.shrink_ratio) + shrink_1, _, long_edge = shrink(xy_list, cfg.shrink_side_ratio) + p_min = np.amin(shrink_xy_list, axis=0) + p_max = np.amax(shrink_xy_list, axis=0) + # floor of the float + ji_min = (p_min / cfg.pixel_size - 0.5).astype(int) - 1 + # +1 for ceil of the float and +1 for include the end + ji_max = (p_max / cfg.pixel_size - 0.5).astype(int) + 3 + imin = np.maximum(0, ji_min[1]) + imax = np.minimum(height // cfg.pixel_size, ji_max[1]) + jmin = np.maximum(0, ji_min[0]) + jmax = np.minimum(width // cfg.pixel_size, ji_max[0]) + for i in range(imin, imax): + for j in range(jmin, jmax): + px = (j + 0.5) * cfg.pixel_size + py = (i + 0.5) * cfg.pixel_size + if point_inside_of_quad(px, py, + shrink_xy_list, p_min, p_max): + gt[i, j, 0] = 1 + line_width, line_color = 1, 'red' + ith = point_inside_of_nth_quad(px, py, + xy_list, + shrink_1, + long_edge) + vs = [[[3, 0], [1, 2]], [[0, 1], [2, 3]]] + if ith in range(2): + gt[i, j, 1] = 1 + if ith == 0: + line_width, line_color = 2, 'yellow' + else: + line_width, line_color = 2, 'green' + gt[i, j, 2:3] = ith + gt[i, j, 3:5] = \ + xy_list[vs[long_edge][ith][0]] - [px, py] + gt[i, j, 5:] = \ + xy_list[vs[long_edge][ith][1]] - [px, py] + draw.line([(px - 0.5 * cfg.pixel_size, + py - 0.5 * cfg.pixel_size), + (px + 0.5 * cfg.pixel_size, + py - 0.5 * cfg.pixel_size), + (px + 0.5 * cfg.pixel_size, + py + 0.5 * cfg.pixel_size), + (px - 0.5 * cfg.pixel_size, + py + 0.5 * cfg.pixel_size), + (px - 0.5 * cfg.pixel_size, + py - 0.5 * cfg.pixel_size)], + width=line_width, fill=line_color) + act_image_dir = os.path.join(cfg.data_dir, + cfg.show_act_image_dir_name) + if cfg.draw_act_quad: + im.save(os.path.join(act_image_dir, img_name)) + train_label_dir = os.path.join(data_dir, cfg.train_label_dir_name) # 'labels_%s/' % train_task_id + np.save(os.path.join(train_label_dir, + img_name[:-4] + '_gt.npy'), gt) + + +def process_label(data_dir=cfg.data_dir): + with open(os.path.join(data_dir, cfg.val_fname), 'r') as f_val: + f_list = f_val.readlines() + with open(os.path.join(data_dir, cfg.train_fname), 'r') as f_train: + f_list.extend(f_train.readlines()) + workers = multiprocessing.cpu_count() + batch_size = math.ceil(len(f_list) / workers) + batch_list = [f_list[i * batch_size:(i + 1) * batch_size] for i in range(workers)] + thread_pool = multiprocessing.Pool(workers) + for i in range(workers): + thread_pool.apply_async(gen_gt_npy, args=(data_dir, batch_list[i])) + thread_pool.close() + thread_pool.join() + + +if __name__ == '__main__': + process_label() diff --git a/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/losses.py b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/losses.py new file mode 100644 index 0000000000000000000000000000000000000000..2dd4ae8e3b5d17dd3032d189d79169897e80cc9c --- /dev/null +++ b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/losses.py @@ -0,0 +1,91 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch.nn as nn + +import cfg +import numpy as np + +device = torch.device(cfg.device) + + +def quad_loss(y_true, y_pred): + if y_true.size(1) == 7: + y_true = y_true.permute(0, 2, 3, 1) # NCHW->NHWC + y_pred = y_pred.permute(0, 2, 3, 1) # NCHW->NHWC + # loss for inside_score + logits = y_pred[:, :, :, :1] + labels = y_true[:, :, :, :1] # NHW1 + # balance positive and negative samples in an image + beta = 1 - torch.mean(labels) + # first apply sigmoid activation + predicts = nn.Sigmoid().to(device)(logits) + # log +epsilon for stable cal + inside_score_loss = torch.mean( + -1 * (beta * labels * torch.log(predicts + cfg.epsilon) + + (1 - beta) * (1 - labels) * torch.log(1 - predicts + cfg.epsilon))) + inside_score_loss = inside_score_loss * cfg.lambda_inside_score_loss + + # loss for side_vertex_code + vertex_logits = y_pred[:, :, :, 1:3] + vertex_labels = y_true[:, :, :, 1:3] + vertex_beta = 1 - (torch.mean(y_true[:, :, :, 1:2]) + / (torch.mean(labels) + cfg.epsilon)) + vertex_predicts = nn.Sigmoid().to(device)(vertex_logits) + pos = -1 * vertex_beta * vertex_labels * torch.log(vertex_predicts + cfg.epsilon) + neg = -1 * (1 - vertex_beta) * (1 - vertex_labels) * torch.log( + 1 - vertex_predicts + cfg.epsilon) + positive_weights = torch.eq(y_true[:, :, :, 0], 1).float() + side_vertex_code_loss = \ + torch.sum(torch.sum(pos + neg, dim=-1) * positive_weights) / ( + torch.sum(positive_weights) + cfg.epsilon) + side_vertex_code_loss = side_vertex_code_loss * cfg.lambda_side_vertex_code_loss + + # loss for side_vertex_coord delta + g_hat = y_pred[:, :, :, 3:] # N*W*H*8 + g_true = y_true[:, :, :, 3:] + vertex_weights = torch.eq(y_true[:, :, :, 1], 1).float() + pixel_wise_smooth_l1norm = smooth_l1_loss(g_hat, g_true, vertex_weights) # N*W*H + side_vertex_coord_loss = torch.sum(pixel_wise_smooth_l1norm) / ( + torch.sum(vertex_weights) + cfg.epsilon) + side_vertex_coord_loss = side_vertex_coord_loss * cfg.lambda_side_vertex_coord_loss + return inside_score_loss + side_vertex_code_loss + side_vertex_coord_loss + + +def smooth_l1_loss(prediction_tensor, target_tensor, weights): + n_q = torch.reshape(quad_norm(target_tensor), weights.size()) + pixel_wise_smooth_l1norm = torch.nn.SmoothL1Loss(reduction='none')(prediction_tensor, target_tensor) + pixel_wise_smooth_l1norm = torch.sum(pixel_wise_smooth_l1norm, dim=-1) / n_q * weights # N*W*H + return pixel_wise_smooth_l1norm + + +def quad_norm(g_true): # 尾部短边长度*4 + diff = g_true[:, :, :, 0:2] - g_true[:, :, :, 2:4] + square = diff**2 + distance = torch.sqrt(torch.sum(square, dim=-1)) + distance = distance * 4.0 + distance = distance + cfg.epsilon + return distance + + +if __name__ == '__main__': + gt_1 = np.load('check/1_gt.npy') + gt_2 = np.load('check/2_gt.npy') + gt_1 = gt_1[np.newaxis] + gt_2 = gt_2[np.newaxis] + tensor_1 = torch.from_numpy(gt_1).to(device) + tensor_2 = torch.from_numpy(gt_2).to(device) + print(tensor_1.shape) + print(quad_loss(tensor_1, tensor_2)) # GT=1.0282 diff --git a/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/model_VGG.py b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/model_VGG.py new file mode 100644 index 0000000000000000000000000000000000000000..7481d7d982cb46e97ea4832c404dbb23530f1388 --- /dev/null +++ b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/model_VGG.py @@ -0,0 +1,111 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch.nn as nn +import torch.nn.functional as F + +import cfg + + +class advancedEAST(nn.Module): + def __init__(self): + super(advancedEAST, self).__init__() + + # Bottom-up layers + self.layer2 = self.make_layers([64, 64, 'M', 128, 128, 'M'], in_channels=3) + self.layer3 = self.make_layers([256, 256, 256, 'M'], in_channels=128) + self.layer4 = self.make_layers([512, 512, 512, 'M'], in_channels=256) + self.layer5 = self.make_layers([512, 512, 512, 'M'], in_channels=512) + # Top-down + self.merging1 = self.merging(i=2) + self.merging2 = self.merging(i=3) + self.merging3 = self.merging(i=4) + # before output layers + self.last_bn = nn.BatchNorm2d(32) + self.conv_last = nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1) + self.inside_score_conv = nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0) + self.side_v_code_conv = nn.Conv2d(32, 2, kernel_size=1, stride=1, padding=0) + self.side_v_coord_conv = nn.Conv2d(32, 4, kernel_size=1, stride=1, padding=0) + # locked first two conv layers + if cfg.locked_layers: + i = 1 + for m in self.layer2.children(): + if isinstance(m, nn.Conv2d) and i <= 2: + print('冻结第{}层参数,层属性:{}'.format(i, m)) + for param in m.parameters(): + param.requires_grad = False + i += 1 + + def make_layers(self, cfg_list, in_channels=3, batch_norm=True): # VGG part + layers = [] + for v in cfg_list: + if v == 'M': + layers += [nn.MaxPool2d(kernel_size=2, stride=2)] + else: + conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) + if batch_norm: + layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] + else: + layers += [conv2d, nn.ReLU(inplace=True)] + in_channels = v + return nn.Sequential(*layers) + + def merging(self, i=2): + in_size = {'2': 1024, '3': 384, '4': 192} + layers = [ + nn.BatchNorm2d(in_size[str(i)]), + nn.Conv2d(in_size[str(i)], 128 // 2 ** (i - 2), kernel_size=1, stride=1, padding=0), + nn.ReLU(), + nn.BatchNorm2d(128 // 2 ** (i - 2)), + nn.Conv2d(128 // 2 ** (i - 2), 128 // 2 ** (i - 2), kernel_size=3, stride=1, padding=1), + nn.ReLU()] + return nn.Sequential(*layers) + + def forward(self, x): + # Bottom-up + f4 = self.layer2(x) # 128 + f3 = self.layer3(f4) # 256 + f2 = self.layer4(f3) # 512 + f1 = self.layer5(f2) # 512 + # Top-down + h1 = f1 + H1 = nn.UpsamplingNearest2d(scale_factor=2)(h1) + concat1 = torch.cat((H1, f2), axis=1) # 1024 + h2 = self.merging1(concat1) # 128 + H2 = nn.UpsamplingNearest2d(scale_factor=2)(h2) + concat2 = torch.cat((H2, f3), axis=1) # 128+256 + h3 = self.merging2(concat2) # 64 + H3 = nn.UpsamplingNearest2d(scale_factor=2)(h3) + concat3 = torch.cat((H3, f4), axis=1) # 64+128 + h4 = self.merging3(concat3) # 32 + # before output layers + bn = self.last_bn(h4) + before_output = F.relu(self.conv_last(bn)) + inside_score = self.inside_score_conv(before_output) + side_v_code = self.side_v_code_conv(before_output) + side_v_coord = self.side_v_coord_conv(before_output) + east_detect = torch.cat((inside_score, side_v_code, side_v_coord), axis=1) + return east_detect + + +if __name__ == '__main__': + net = advancedEAST() + if cfg.model_summary: + try: + from torchsummary import summary + summary(net, input_size=(3, 128, 128)) + except ImportError: + print("\"torchsummary\" not found, please install to visualize the model architecture.") + cfg.model_summary = False diff --git a/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/nms.py b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/nms.py new file mode 100644 index 0000000000000000000000000000000000000000..45df8853e94a2e9ab9afb13e7590553cead25f88 --- /dev/null +++ b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/nms.py @@ -0,0 +1,98 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np + +import cfg + + +def should_merge(region, i, j): + neighbor = {(i, j - 1)} + return not region.isdisjoint(neighbor) + + +def region_neighbor(region_set): + region_pixels = np.array(list(region_set)) + j_min = np.amin(region_pixels, axis=0)[1] - 1 + j_max = np.amax(region_pixels, axis=0)[1] + 1 + i_m = np.amin(region_pixels, axis=0)[0] + 1 + region_pixels[:, 0] += 1 + neighbor = {(region_pixels[n, 0], region_pixels[n, 1]) for n in + range(len(region_pixels))} + neighbor.add((i_m, j_min)) + neighbor.add((i_m, j_max)) + return neighbor + + +def region_group(region_list): + S = [i for i in range(len(region_list))] + D = [] + while len(S) > 0: + m = S.pop(0) + if len(S) == 0: + # S has only one element, put it to D + D.append([m]) + else: + D.append(rec_region_merge(region_list, m, S)) + return D + + +def rec_region_merge(region_list, m, S): + rows = [m] + tmp = [] + for n in S: + if not region_neighbor(region_list[m]).isdisjoint(region_list[n]) or \ + not region_neighbor(region_list[n]).isdisjoint(region_list[m]): + # 第m与n相交 + tmp.append(n) + for d in tmp: + S.remove(d) + for e in tmp: + rows.extend(rec_region_merge(region_list, e, S)) + return rows + + +def nms(predict, activation_pixels, threshold=cfg.side_vertex_pixel_threshold): + region_list = [] + for i, j in zip(activation_pixels[0], activation_pixels[1]): + merge = False + for k in range(len(region_list)): + if should_merge(region_list[k], i, j): + region_list[k].add((i, j)) + merge = True + # Fixme 重叠文本区域处理,存在和多个区域邻接的pixels,先都merge试试 + # break + if not merge: + region_list.append({(i, j)}) + D = region_group(region_list) + quad_list = np.zeros((len(D), 4, 2)) + score_list = np.zeros((len(D), 4)) + for group, g_th in zip(D, range(len(D))): + total_score = np.zeros((4, 2)) + for row in group: + for ij in region_list[row]: + score = predict[ij[0], ij[1], 1] # 边界像素 + if score >= threshold: + ith_score = predict[ij[0], ij[1], 2:3] # 头还是尾 + if not (cfg.trunc_threshold <= ith_score < 1 - + cfg.trunc_threshold): # 判断是否为头跟尾 + ith = int(np.around(ith_score)) + total_score[ith * 2:(ith + 1) * 2] += score + px = (ij[1] + 0.5) * cfg.pixel_size + py = (ij[0] + 0.5) * cfg.pixel_size + p_v = [px, py] + np.reshape(predict[ij[0], ij[1], 3:7], (2, 2)) # 4位geo + quad_list[g_th, ith * 2:(ith + 1) * 2] += score * p_v + score_list[g_th] = total_score[:, 0] + quad_list[g_th] /= (total_score + cfg.epsilon) + return score_list, quad_list diff --git a/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/nohup.out b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/nohup.out new file mode 100644 index 0000000000000000000000000000000000000000..04544ebbba561930dca8ab79d8a6292a25992f79 --- /dev/null +++ b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/nohup.out @@ -0,0 +1,4 @@ +3T736 +loss:0.602 +precision:81.80% recall:54.24% f1-score:62.73% +THPModule_npu_shutdown success. diff --git a/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/preprocess.py b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..6ed57772b4f9e28750f8e888c510be6bd69257d8 --- /dev/null +++ b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/preprocess.py @@ -0,0 +1,216 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +from PIL import Image, ImageDraw +import os +import multiprocessing +import math + +import cfg +from label import shrink + + +def batch_reorder_vertexes(xy_list_array): + reorder_xy_list_array = np.zeros_like(xy_list_array) + for xy_list, i in zip(xy_list_array, range(len(xy_list_array))): + reorder_xy_list_array[i] = reorder_vertexes(xy_list) + return reorder_xy_list_array + + +def reorder_vertexes(xy_list): + reorder_xy_list = np.zeros_like(xy_list) + # determine the first point with the smallest x, + # if two has same x, choose that with smallest y, + ordered = np.argsort(xy_list, axis=0) + xmin1_index = ordered[0, 0] + xmin2_index = ordered[1, 0] + if xy_list[xmin1_index, 0] == xy_list[xmin2_index, 0]: + if xy_list[xmin1_index, 1] <= xy_list[xmin2_index, 1]: + reorder_xy_list[0] = xy_list[xmin1_index] + first_v = xmin1_index + else: + reorder_xy_list[0] = xy_list[xmin2_index] + first_v = xmin2_index + else: + reorder_xy_list[0] = xy_list[xmin1_index] + first_v = xmin1_index + # connect the first point to others, the third point on the other side of + # the line with the middle slope + # 计算第一个点与其他三个点连线的斜率,取斜率居中的点作为第三个点 + others = list(range(4)) + others.remove(first_v) + k = np.zeros((len(others),)) + for index, i in zip(others, range(len(others))): + k[i] = (xy_list[index, 1] - xy_list[first_v, 1]) \ + / (xy_list[index, 0] - xy_list[first_v, 0] + cfg.epsilon) + k_mid = np.argsort(k)[1] + third_v = others[k_mid] + reorder_xy_list[2] = xy_list[third_v] + # determine the second point which on the bigger side of the middle line + others.remove(third_v) + b_mid = xy_list[first_v, 1] - k[k_mid] * xy_list[first_v, 0] # 得到中间那条线的截距b + second_v, fourth_v = 0, 0 + for index, i in zip(others, range(len(others))): + # delta = y - (k * x + b) + delta_y = xy_list[index, 1] - (k[k_mid] * xy_list[index, 0] + b_mid) + if delta_y > 0: # y点在中间那条线之上就设置为第二个点,否则为第四个点 + second_v = index + else: + fourth_v = index + reorder_xy_list[1] = xy_list[second_v] + reorder_xy_list[3] = xy_list[fourth_v] + # compare slope of 13 and 24, determine the final order + # 经过上面的步骤k13有两种情况,要么大于0.要么小于0,k24的斜率正好相反。 + k13 = k[k_mid] + k24 = (xy_list[second_v, 1] - xy_list[fourth_v, 1]) / ( + xy_list[second_v, 0] - xy_list[fourth_v, 0] + cfg.epsilon) + if k13 < k24: # 当k13小于k24的时候,点4变点3,3->2,2->1,1->4 + tmp_x, tmp_y = reorder_xy_list[3, 0], reorder_xy_list[3, 1] + for i in range(2, -1, -1): + reorder_xy_list[i + 1] = reorder_xy_list[i] + reorder_xy_list[0, 0], reorder_xy_list[0, 1] = tmp_x, tmp_y + return reorder_xy_list + + +def resize_image(im, max_img_size=cfg.max_train_img_size): # 把原图长宽根据max_train_img_size变成32的整数倍 + im_width = np.minimum(im.width, max_img_size) + if im_width == max_img_size < im.width: + im_height = int((im_width / im.width) * im.height) + else: + im_height = im.height + o_height = np.minimum(im_height, max_img_size) + if o_height == max_img_size < im_height: + o_width = int((o_height / im_height) * im_width) + else: + o_width = im_width + d_wight = o_width - (o_width % 32) + d_height = o_height - (o_height % 32) + return d_wight, d_height + + +def gen_npy(o_img_list): + data_dir = cfg.data_dir + origin_image_dir = os.path.join(data_dir, cfg.origin_image_dir_name) # 'image_10000/' + origin_txt_dir = os.path.join(data_dir, cfg.origin_txt_dir_name) # 'txt_10000/' + train_image_dir = os.path.join(data_dir, cfg.train_image_dir_name) # 'images_%s/' % train_task_id + train_label_dir = os.path.join(data_dir, cfg.train_label_dir_name) # 'labels_%s/' % train_task_id + draw_gt_quad = cfg.draw_gt_quad # True + show_gt_image_dir = os.path.join(data_dir, cfg.show_gt_image_dir_name) # 'show_gt_images_%s/' % train_task_id + + for o_img_fname in o_img_list: + with Image.open(os.path.join(origin_image_dir, o_img_fname)) as im: # 打开每张图片 + # d_wight, d_height = resize_image(im) + d_wight, d_height = cfg.max_train_img_size, cfg.max_train_img_size + scale_ratio_w = d_wight / im.width + scale_ratio_h = d_height / im.height + im = im.resize((d_wight, d_height), Image.NEAREST).convert('RGB') # 图片缩放 + show_gt_im = im.copy() + # draw on the img + draw = ImageDraw.Draw(show_gt_im) + with open(os.path.join(origin_txt_dir, o_img_fname[:-4] + '.txt'), 'r', encoding='UTF-8') as f: + anno_list = f.readlines() + xy_list_array = np.zeros((len(anno_list), 4, 2)) + for anno, i in zip(anno_list, range(len(anno_list))): + anno_colums = anno.strip().split(',') + anno_array = np.array(anno_colums) + xy_list = np.reshape(anno_array[:8].astype(float), (4, 2)) + xy_list[:, 0] = xy_list[:, 0] * scale_ratio_w # 坐标缩放 + xy_list[:, 1] = xy_list[:, 1] * scale_ratio_h + xy_list = reorder_vertexes(xy_list) # 坐标顺序转换为统一格式 + xy_list_array[i] = xy_list + # 将groundtruth文本框内缩,论文为0.3,实际中发现太大,改为0.2 + _, shrink_xy_list, _ = shrink(xy_list, cfg.shrink_ratio) # shrink_ratio=0.2,返回长短边都缩后的结果 + shrink_1, _, long_edge = shrink(xy_list, cfg.shrink_side_ratio) # shrink_side_ratio=0.6,返回仅长边收缩的结果以及长边下标 + if draw_gt_quad: + draw.line([tuple(xy_list[0]), tuple(xy_list[1]), + tuple(xy_list[2]), tuple(xy_list[3]), + tuple(xy_list[0]) + ], + width=2, fill='green') + draw.line([tuple(shrink_xy_list[0]), + tuple(shrink_xy_list[1]), + tuple(shrink_xy_list[2]), + tuple(shrink_xy_list[3]), + tuple(shrink_xy_list[0]) + ], + width=2, fill='blue') + vs = [[[0, 0, 3, 3, 0], [1, 1, 2, 2, 1]], + [[0, 0, 1, 1, 0], [2, 2, 3, 3, 2]]] + for q_th in range(2): # 框出头跟尾巴的像素 + draw.line([tuple(xy_list[vs[long_edge][q_th][0]]), + tuple(shrink_1[vs[long_edge][q_th][1]]), + tuple(shrink_1[vs[long_edge][q_th][2]]), + tuple(xy_list[vs[long_edge][q_th][3]]), + tuple(xy_list[vs[long_edge][q_th][4]])], + width=3, fill='yellow') + if cfg.gen_origin_img: + im.save(os.path.join(train_image_dir, o_img_fname)) + np.save(os.path.join( + train_label_dir, + o_img_fname[:-4] + '.npy'), + xy_list_array) # 保存顺序一致处理后的坐标点集 + if draw_gt_quad: + show_gt_im.save(os.path.join(show_gt_image_dir, o_img_fname)) + + +def preprocess(): + data_dir = cfg.data_dir + origin_image_dir = os.path.join(data_dir, cfg.origin_image_dir_name) # 'image_10000/' + origin_txt_dir = os.path.join(data_dir, cfg.origin_txt_dir_name) # 'txt_10000/' + train_image_dir = os.path.join(data_dir, cfg.train_image_dir_name) # 'images_%s/' % train_task_id + train_label_dir = os.path.join(data_dir, cfg.train_label_dir_name) # 'labels_%s/' % train_task_id + if not os.path.exists(train_image_dir): + os.mkdir(train_image_dir) + if not os.path.exists(train_label_dir): + os.mkdir(train_label_dir) + draw_gt_quad = cfg.draw_gt_quad # True + show_gt_image_dir = os.path.join(data_dir, cfg.show_gt_image_dir_name) # 'show_gt_images_%s/' % train_task_id + if not os.path.exists(show_gt_image_dir): + os.mkdir(show_gt_image_dir) + show_act_image_dir = os.path.join(cfg.data_dir, cfg.show_act_image_dir_name) # 'show_act_images_%s/' % train_task_id + if not os.path.exists(show_act_image_dir): + os.mkdir(show_act_image_dir) + + o_img_list = os.listdir(origin_image_dir) + print('found %d origin images.' % len(o_img_list)) + train_val_set = [] + workers = multiprocessing.cpu_count() + batch_size = math.ceil(len(o_img_list) / workers) + batch_list = [o_img_list[i * batch_size:(i + 1) * batch_size] for i in range(workers)] + thread_pool = multiprocessing.Pool(workers) + for i in range(workers): + thread_pool.apply_async(gen_npy, args=(batch_list[i], )) + thread_pool.close() + thread_pool.join() + size = cfg.max_train_img_size + for o_img_fname in o_img_list: + train_val_set.append('{},{},{}\n'.format(o_img_fname, size, size)) + train_img_list = os.listdir(train_image_dir) + print('found %d train images.' % len(train_img_list)) + train_label_list = os.listdir(train_label_dir) + print('found %d train labels.' % len(train_label_list)) + + # random.shuffle(train_val_set) + train_val_set.sort() + # 确保每次生成的训练集和验证集一致,不shuffle + val_count = int(cfg.validation_split_ratio * len(train_val_set)) + with open(os.path.join(data_dir, cfg.val_fname), 'w') as f_val: + f_val.writelines(train_val_set[:val_count]) + with open(os.path.join(data_dir, cfg.train_fname), 'w') as f_train: + f_train.writelines(train_val_set[val_count:]) + + +if __name__ == '__main__': + preprocess() diff --git a/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/pth2onnx.py b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/pth2onnx.py new file mode 100644 index 0000000000000000000000000000000000000000..b2d4c729216fe69830cfb0e4e17cc8cc455c49af --- /dev/null +++ b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/pth2onnx.py @@ -0,0 +1,48 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch + +import cfg +from model_VGG import advancedEAST + + +def pth2onnx(input_file, output_file): + model = advancedEAST() + state_dict = {k.replace('module.', ''): v for k, v in torch.load( + input_file, map_location='cpu').items()} + model.load_state_dict(state_dict) + + model.eval() + input_names = ["input_1"] + output_names = ["output_1"] + dynamic_axes = {'input_1': {0: '-1'}, 'output_1': {0: '-1'}} + size = cfg.max_predict_img_size + dummy_input = torch.randn(1, 3, size, size) + + torch.onnx.export( + model, + dummy_input, + output_file, + input_names=input_names, + output_names=output_names, + dynamic_axes = dynamic_axes, + verbose=True, + opset_version=11) + + +if __name__ == "__main__": + input_file = cfg.pth_path if cfg.pth_path else 'saved_model/3T736_latest.pth' + output_file = cfg.train_task_id + '.onnx' + pth2onnx(input_file, output_file) diff --git a/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/requirements.txt b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..0f9b521ce9e28ecd8dcd488aa6600aec9514defe --- /dev/null +++ b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/requirements.txt @@ -0,0 +1,7 @@ +torch +torchvision +apex +onnx +Pillow +opencv-python +Shapely \ No newline at end of file diff --git a/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/test/env.sh b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/test/env.sh new file mode 100644 index 0000000000000000000000000000000000000000..70adb430b8d967911b966b9a44a81478e809cfe3 --- /dev/null +++ b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/test/env.sh @@ -0,0 +1,77 @@ +#!/bin/bash +export install_path=/usr/local/Ascend + +if [ -d ${install_path}/toolkit ]; then + export LD_LIBRARY_PATH=/usr/include/hdf5/lib/:/usr/local/:/usr/local/lib/:/usr/lib/:${install_path}/fwkacllib/lib64/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons:${path_lib}:${LD_LIBRARY_PATH} + export PATH=${install_path}/fwkacllib/ccec_compiler/bin:${install_path}/fwkacllib/bin:$PATH + export PYTHONPATH=${install_path}/fwkacllib/python/site-packages:${install_path}/tfplugin/python/site-packages:${install_path}/toolkit/python/site-packages:$PYTHONPATH + export PYTHONPATH=/usr/local/python3.7.5/lib/python3.7/site-packages:$PYTHONPATH + export ASCEND_OPP_PATH=${install_path}/opp +else + if [ -d ${install_path}/nnae/latest ];then + export LD_LIBRARY_PATH=/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:${install_path}/nnae/latest/fwkacllib/lib64/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64_64-linux-gnu:$LD_LIBRARY_PATH + export PATH=$PATH:${install_path}/nnae/latest/fwkacllib/ccec_compiler/bin/:${install_path}/nnae/latest/toolkit/tools/ide_daemon/bin/ + export ASCEND_OPP_PATH=${install_path}/nnae/latest/opp/ + export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so + export PYTHONPATH=${install_path}/nnae/latest/fwkacllib/python/site-packages/:${install_path}/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH + export ASCEND_AICPU_PATH=${install_path}/nnae/latest + else + export LD_LIBRARY_PATH=/usr/local/:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64-linux-gnu:$LD_LIBRARY_PATH + export PATH=$PATH:${install_path}/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:${install_path}/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/ + export ASCEND_OPP_PATH=${install_path}/ascend-toolkit/latest/opp/ + export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so + export PYTHONPATH=${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH + export ASCEND_AICPU_PATH=${install_path}/ascend-toolkit/latest + fi +fi + + +#将Host日志输出到串口,0-关闭/1-开启 +export ASCEND_SLOG_PRINT_TO_STDOUT=0 +#设置默认日志级别,0-debug/1-info/2-warning/3-error +export ASCEND_GLOBAL_LOG_LEVEL=3 +#设置Host侧Event日志开启标志,0-关闭/1-开启 +export ASCEND_GLOBAL_EVENT_ENABLE=0 +#设置是否开启taskque,0-关闭/1-开启 +export TASK_QUEUE_ENABLE=1 +#设置是否开启PTCopy,0-关闭/1-开启 +export PTCOPY_ENABLE=1 +#设置是否开启combined标志,0-关闭/1-开启 +export COMBINED_ENABLE=1 +#设置特殊场景是否需要重新编译,不需要修改 +export DYNAMIC_OP="ADD#MUL" +#HCCL白名单开关,1-关闭/0-开启 +export HCCL_WHITELIST_DISABLE=1 +#设置Device侧日志等级为error +${install_path}/driver/tools/msnpureport -g error -d 0 +${install_path}/driver/tools/msnpureport -g error -d 1 +${install_path}/driver/tools/msnpureport -g error -d 2 +${install_path}/driver/tools/msnpureport -g error -d 3 +${install_path}/driver/tools/msnpureport -g error -d 4 +${install_path}/driver/tools/msnpureport -g error -d 5 +${install_path}/driver/tools/msnpureport -g error -d 6 +${install_path}/driver/tools/msnpureport -g error -d 7 +#关闭Device侧Event日志 +${install_path}/driver/tools/msnpureport -e disable + + +path_lib=$(python3.7 -c """ +import sys +import re +result='' +for index in range(len(sys.path)): + match_sit = re.search('-packages', sys.path[index]) + if match_sit is not None: + match_lib = re.search('lib', sys.path[index]) + + if match_lib is not None: + end=match_lib.span()[1] + result += sys.path[index][0:end] + ':' + + result+=sys.path[index] + '/torch/lib:' +print(result)""" +) + +echo ${path_lib} + +export LD_LIBRARY_PATH=/usr/local/python3.7.5/lib/:${path_lib}:$LD_LIBRARY_PATH diff --git a/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/test/prep_dataset.sh b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/test/prep_dataset.sh new file mode 100644 index 0000000000000000000000000000000000000000..95d812d490ece664eac0b51c3e6e9cb1e0bab592 --- /dev/null +++ b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/test/prep_dataset.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +SIZES="256 384 512 640 736" + +for SIZE in $SIZES +do + python3.7 preprocess.py --size $SIZE + python3.7 label.py --size $SIZE +done diff --git a/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/test/train_eval.sh b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/test/train_eval.sh new file mode 100644 index 0000000000000000000000000000000000000000..8402cd2202c37aa23a70f9f43159c9b766bdea22 --- /dev/null +++ b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/test/train_eval.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +source test/env.sh + +if [ -n "$*" ] +then + SIZES=$* +else + SIZES="736" +fi + +for SIZE in $SIZES +do + PTH_PATH + nohup python3.7 eval.py --pth_path saved_model/3T${SIZE}_latest.pth --size $SIZE + sleep 5s +done diff --git a/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/test/train_finetune_1p.sh b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/test/train_finetune_1p.sh new file mode 100644 index 0000000000000000000000000000000000000000..e9204aca249c2a74f8d35504005be8f99ebe1b2e --- /dev/null +++ b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/test/train_finetune_1p.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +source test/env.sh + +nohup python3.7 -u train.py --size 736 --apex --pth_path saved_model/3T736_latest.pth --epoch_num 3 --val_interval 1 \ No newline at end of file diff --git a/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/test/train_full_1p.sh b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/test/train_full_1p.sh new file mode 100644 index 0000000000000000000000000000000000000000..a2713dd631cf15fd60b729570681e33a777ed409 --- /dev/null +++ b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/test/train_full_1p.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +source test/env.sh + +if [ -n "$*" ] +then + SIZES=$* +else + SIZES="256 384 512 640 736" +fi + +for SIZE in $SIZES +do + nohup python -u train.py --size $SIZE --apex + sleep 5s +done diff --git a/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/test/train_full_8p.sh b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/test/train_full_8p.sh new file mode 100644 index 0000000000000000000000000000000000000000..0ed3ca33fd14ba9fa9a1524db7ee9b3c182ce6fb --- /dev/null +++ b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/test/train_full_8p.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +#source test/env.sh + +RANK_ID_START=0 +KERNEL_NUM=$(($(nproc)/8)) +export WORLD_SIZE=8 +export MASTER_ADDR='127.0.0.1' +export MASTER_PORT='29688' + +if [ -n "$*" ] +then + SIZES=$* +else + SIZES="256 384 512 640 736" +fi + +for SIZE in $SIZES +do + for((RANK_ID=$RANK_ID_START;RANK_ID<$((WORLD_SIZE+RANK_ID_START));RANK_ID++)); + do + PID_START=$((KERNEL_NUM*RANK_ID)) + PID_END=$((PID_START+KERNEL_NUM-1)) + if [ $RANK_ID == $((WORLD_SIZE+RANK_ID_START-1)) ] + then + nohup taskset -c $PID_START-$PID_END python3.7 -u train.py --size $SIZE --local_rank $RANK_ID --apex + else + nohup taskset -c $PID_START-$PID_END python3.7 -u train.py --size $SIZE --local_rank $RANK_ID --apex & + fi + done + sleep 5s +done diff --git a/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/test/train_performance_1p.sh b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/test/train_performance_1p.sh new file mode 100644 index 0000000000000000000000000000000000000000..bfe053205cfbca5d2b97e4cec7b18c56470ce517 --- /dev/null +++ b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/test/train_performance_1p.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +source test/env.sh + +if [ -n "$*" ] +then + SIZES=$* +else + SIZES="256 384 512 640 736" +fi + +for SIZE in $SIZES +do + nohup python -u train.py --size $SIZE --apex --epoch_num 3 --val_interval 1 + sleep 5s +done diff --git a/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/test/train_performance_8p.sh b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/test/train_performance_8p.sh new file mode 100644 index 0000000000000000000000000000000000000000..47eab845154dd15d7026fa08f1ac1ad733479f06 --- /dev/null +++ b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/test/train_performance_8p.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +source test/env.sh + +RANK_ID_START=0 +KERNEL_NUM=$(($(nproc)/8)) +export WORLD_SIZE=8 +export MASTER_ADDR='127.0.0.1' +export MASTER_PORT='29688' + +if [ -n "$*" ] +then + SIZES=$* +else + SIZES="256 384 512 640 736" +fi + +for SIZE in $SIZES +do + for((RANK_ID=$RANK_ID_START;RANK_ID<$((WORLD_SIZE+RANK_ID_START));RANK_ID++)); + do + PID_START=$((KERNEL_NUM*RANK_ID)) + PID_END=$((PID_START+KERNEL_NUM-1)) + if [ $RANK_ID == $((WORLD_SIZE+RANK_ID_START-1)) ] + then + nohup taskset -c $PID_START-$PID_END python3.7 -u train.py --size $SIZE --local_rank $RANK_ID --apex --epoch_num 3 --val_interval 1 + else + nohup taskset -c $PID_START-$PID_END python3.7 -u train.py --size $SIZE --local_rank $RANK_ID --apex --epoch_num 3 --val_interval 1 & + fi + done + sleep 5s +done diff --git a/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/train.py b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/train.py new file mode 100644 index 0000000000000000000000000000000000000000..65d65b6d657f87d171595c31d587d6433cccee0b --- /dev/null +++ b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/train.py @@ -0,0 +1,176 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import time + +import torch +if torch.__version__>= '1.8.1': + import torch_npu +import torch.utils.data +import torch.optim as optim +from torch import distributed as dist + +from model_VGG import advancedEAST +from losses import quad_loss +from dataset import RawDataset, data_collate +from utils import Averager +import cfg + +device = torch.device(cfg.device) + + +def train(): + if cfg.distributed: + torch.npu.set_device(cfg.local_rank) + dist.init_process_group(backend='hccl', world_size=cfg.world_size, rank=cfg.local_rank) + + """ dataset preparation """ + train_dataset = RawDataset(is_val=False) + val_dataset = RawDataset(is_val=True) + + if cfg.distributed: + train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset, shuffle=True) + val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset, shuffle=False) + else: + train_sampler = None + val_sampler = None + + train_loader = torch.utils.data.DataLoader( + train_dataset, + batch_size=cfg.batch_size, + collate_fn=data_collate, + shuffle=(train_sampler is None), + num_workers=int(cfg.workers), + pin_memory=True, + sampler=train_sampler) + valid_loader = torch.utils.data.DataLoader( + val_dataset, + batch_size=cfg.batch_size, + collate_fn=data_collate, + shuffle=False, + num_workers=int(cfg.workers), + pin_memory=True, + sampler=val_sampler) + + # --------------------训练过程--------------------------------- + model = advancedEAST() + if cfg.pth_path: + model.load_state_dict(torch.load(cfg.pth_path, map_location='cpu')) + if cfg.is_master_node: + print('Load {}'.format(cfg.pth_path)) + elif int(cfg.train_task_id[-3:]) != 256: + id_num = cfg.train_task_id[-3:] + idx_dic = {'384': 256, '512': 384, '640': 512, '736': 640} + state_dict = {k.replace('module.', ''): v for k, v in torch.load( + './saved_model/3T{}_latest.pth'.format(idx_dic[id_num]), map_location='cpu').items()} + model.load_state_dict(state_dict) + if cfg.is_master_node: + print('Load ./saved_model/3T{}_latest.pth'.format(idx_dic[id_num])) + + model = model.to(device) + optimizer = optim.Adam(model.parameters(), lr=cfg.lr, weight_decay=cfg.decay) + + # Apex + if cfg.amp: + import apex + optimizer = apex.optimizers.NpuFusedAdam(model.parameters(), lr=cfg.lr, weight_decay=cfg.decay) + model, optimizer = apex.amp.initialize(model, optimizer, opt_level="O1", loss_scale="dynamic", combine_grad=True) + + if cfg.distributed: + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[cfg.local_rank], broadcast_buffers=False) + + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, cfg.epoch_num) + + loss_func = quad_loss + + '''start training''' + start_iter = 0 + start_time = time.time() + i = start_iter + step_num = 0 + loss_avg = Averager() + val_loss_avg = Averager() + total_train_img = int(cfg.total_img * (1 - cfg.validation_split_ratio)) + + while(True): + model.train() + if cfg.distributed: + train_sampler.set_epoch(i) + # train part + # training----------------------------- + epoch_start_time = time.time() + for image_tensors, labels, gt_xy_list in train_loader: + step_num += 1 + batch_x = image_tensors.float().to(device) + batch_y = labels.float().to(device) # float64转float32 + out = model(batch_x) + loss = loss_func(batch_y, out) + optimizer.zero_grad() + if cfg.amp: + with apex.amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() + optimizer.step() + + loss_avg.add(loss) + + loss = loss_avg.val() + if cfg.distributed: + dist.all_reduce(loss) + loss = loss / cfg.world_size + loss = loss.item() + + if cfg.is_master_node: + print('Epoch:[{}/{}] Training loss:{:.3f} FPS:{:.3f} LR:{:.3e}'.format(i + 1, cfg.epoch_num, + loss, total_train_img / (time.time() - epoch_start_time), optimizer.param_groups[0]['lr'])) + loss_avg.reset() + + scheduler.step() + + # evaluation-------------------------------- + if (i + 1) % cfg.val_interval == 0: + elapsed_time = time.time() - start_time + if cfg.is_master_node: + print('Elapsed time:{}s'.format(round(elapsed_time))) + model.eval() + for image_tensors, labels, gt_xy_list in valid_loader: + batch_x = image_tensors.float().to(device) + batch_y = labels.float().to(device) # float64转float32 + + out = model(batch_x) + loss = loss_func(batch_y, out) + + val_loss_avg.add(loss) + + loss = val_loss_avg.val() + if cfg.distributed: + dist.all_reduce(loss) + loss = loss / cfg.world_size + loss = loss.item() + + if cfg.is_master_node: + print('Validation loss:{:.3f}'.format(loss)) + val_loss_avg.reset() + + if i + 1 == cfg.epoch_num: + if cfg.is_master_node: + torch.save(model.state_dict(), './saved_model/{}_latest.pth'.format(cfg.train_task_id)) + print('End the training') + break + i += 1 + + +if __name__ == '__main__': + train() diff --git a/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/utils.py b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..dea5a6981aabe3266b550a831bccf7f33b804324 --- /dev/null +++ b/modelzoo/contrib/Research/cv/advancedeast/AdvancedEast_pytorch_QilongXue/utils.py @@ -0,0 +1,138 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +from nms import nms +import cfg +from shapely.geometry import Polygon + + +class Averager(object): + """Compute average for torch.Tensor, used for loss average.""" + + def __init__(self): + self.reset() + + def add(self, v): + count = v.data.numel() + v = v.data.sum() + self.n_count += count + self.sum += v + + def reset(self): + self.n_count = 0 + self.sum = 0 + + def val(self): + res = 0 + if self.n_count != 0: + res = self.sum / float(self.n_count) + return res + + +class eval_pre_rec_f1(object): + '''输入每个batch的预测结果跟图片真实矩形框,计算查准率precision/召回率recall/F1 score''' + + def __init__(self): + self.pixel_threshold = float(cfg.pixel_threshold) + self.reset() + + def reset(self): + self.img_num = 0 + self.pre = 0 + self.rec = 0 + self.f1_score = 0 + + def val(self): + mpre = self.pre / self.img_num * 100 + mrec = self.rec / self.img_num * 100 + mf1_score = self.f1_score / self.img_num * 100 + return mpre, mrec, mf1_score + + def sigmoid(self, x): + """`y = 1 / (1 + exp(-x))`""" + return 1 / (1 + np.exp(-x)) + + def get_iou(self, g, p): + g = Polygon(g) + p = Polygon(p) + if not g.is_valid or not p.is_valid: + return 0 + inter = Polygon(g).intersection(Polygon(p)).area + union = g.area + p.area - inter + if union == 0: + return 0 + else: + return inter/union + + def eval_one(self, quad_scores, quad_after_nms, gt_xy, quiet=cfg.quiet): + num_gts = len(gt_xy) + quad_scores_no_zero = [] # 剔除残缺quad,并储存每个quad的score + quad_after_nms_no_zero = [] # 剔除残缺quad + for score, geo in zip(quad_scores, quad_after_nms): + if np.amin(score) > 0: + quad_scores_no_zero.append(sum(score)) + quad_after_nms_no_zero.append(geo) + elif not quiet: + print('quad invalid with vertex num less then 4.') + continue + num_quads = len(quad_after_nms_no_zero) + if num_quads == 0: + return 0, 0, 0 + quad_flag = np.zeros(num_quads) # 记录quad是否被匹配 + gt_flag = np.zeros(num_gts) # 记录gt是否被匹配 + quad_scores_no_zero = np.array(quad_scores_no_zero) + scores_idx = np.argsort(quad_scores_no_zero)[::-1] # 记录quad_scores从大到小坐标 + for i in range(num_quads): + idx = scores_idx[i] + geo = quad_after_nms_no_zero[idx] # 按score值从大到小依次取出对应矩形框 + for j in range(num_gts): + if gt_flag[j] == 0: + gt_geo = gt_xy[j] + iou = self.get_iou(geo, gt_geo) + if iou >= cfg.iou_threshold: + gt_flag[j] = 1 # 记录被匹配的gt框 + quad_flag[i] = 1 # 记录被匹配的quad框 + tp = np.sum(quad_flag) + fp = num_quads - tp + fn = num_gts - tp + pre = tp / (tp + fp) # 查准率 + rec = tp / (tp + fn) # 查全率 + if pre + rec == 0: + f1_score = 0 + else: + f1_score = 2 * pre * rec / (pre + rec) + return pre, rec, f1_score + + def add(self, out, gt_xy_list): + self.img_num += len(gt_xy_list) + ys = out.cpu().detach().numpy() # (N, 7, 64, 64) + if ys.shape[1] == 7: + ys = ys.transpose((0, 2, 3, 1)) # NCHW->NHWC + for y, gt_xy in zip(ys, gt_xy_list): # 取出每张图片的预测结果与矩形框 + y[:, :, :3] = self.sigmoid(y[:, :, :3]) + cond = np.greater_equal(y[:, :, 0], self.pixel_threshold) + activation_pixels = np.where(cond) + quad_scores, quad_after_nms = nms(y, activation_pixels) + # nms返回的quad_scores为:[[a, a, b, b], [c, c, d, d]...] + # 每个矩形框返回四个score,四个score中头两个相同,后两个相同分别代表头部跟尾部的分数 + if (len(quad_after_nms) == 0) or (sum(sum(quad_scores)) == 0): + if not cfg.quiet: + print('NMS后不存在矩形框!!') + continue + else: + pre, rec, f1_score = self.eval_one(quad_scores, quad_after_nms, gt_xy) + self.pre += pre + self.rec += rec + self.f1_score += f1_score