4 Star 0 Fork 0

安安好了 / PytorchVisionFasterRCNN

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
detection.py 13.15 KB
一键复制 编辑 原始数据 按行查看 历史
import torch
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
import torchvision
from PIL import Image
from xml.dom.minidom import parse
import utils
import transforms as T
from engine import train_one_epoch, evaluate
import xml.etree.cElementTree as ET
import collections
import pandas as pd
from torchvision.transforms import functional
import random
from aip import AipFace
import base64
import time
""" 你的 APPID AK SK """
APP_ID = '22912073'
API_KEY = '3zbPMNiqWOrsD5BspgX1pBoR'
SECRET_KEY = 'nxSoiCZnSdOV9DVTPhHrlMvAYvTHZNa4'
# aip_face对象
aipFace = AipFace(APP_ID, API_KEY, SECRET_KEY)
faces_name_dict = {
'Paulmale': 'Paul Male',
'Danielmale': 'Daniel Male',
'Fishermale': 'Fisher Male',
'Jackmale': 'Jack Male',
'Kevinmale': 'Kevin Male',
'lilyfemale': 'lily Female',
'Rosefemale': 'Rose Female',
'Maryfemale': 'Mary Female',
'Michaelmale': 'Michael Male',
'stevenmale': 'Steven Male',
'Jamesmale': 'James Male'
}
def predict_faces(img_path):
faces = []
# 读取图片,转base64
filepath = img_path
with open(filepath, "rb") as fp:
base64_data = base64.b64encode(fp.read())
image = str(base64_data, 'utf-8')
imageType = "BASE64"
# 配置参数
groupIdList = 'main'
options = {}
options["max_face_num"] = 10
options["match_threshold"] = 10
options["quality_control"] = "NONE"
options["liveness_control"] = "NONE"
# options["user_id"] = "233451"
options["max_user_num"] = 1
""" 带参数调用人脸搜索 """
response = aipFace.multiSearch(image, imageType, groupIdList, options)
if response['error_msg'] == 'pic not has face':
return faces
print(response)
result = response['result']
face_list = result['face_list']
print(result)
for idx in range(result['face_num']):
face_dict = {}
location = face_list[idx]['location']
score = face_list[idx]['user_list'][0]['score']
user_id = face_list[idx]['user_list'][0]['user_info']
user_id = user_id.split('.')[0]
user_id = faces_name_dict[user_id]
face_dict['location'] = location
face_dict['user_id'] = user_id
face_dict['score'] = score
faces.append(face_dict)
# time.sleep(0.5)
return faces
def predict_once(model, img):
# 将模型切换成预测模式
model.eval()
img_tensor = functional.to_tensor(img)
with torch.no_grad():
# 下方是原注释
'''
prediction形如:
[{'boxes': tensor([[1492.6672, 238.4670, 1765.5385, 315.0320],
[ 887.1390, 256.8106, 1154.6687, 330.2953]], device='cuda:0'),
'labels': tensor([1, 1], device='cuda:0'),
'scores': tensor([1.0000, 1.0000], device='cuda:0')}]
'''
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
prediction = model([img_tensor.to(device)])
# 先将预测结果打印出来看一下
print(prediction)
return prediction
def random_color():
b = random.randint(50, 200)
g = random.randint(50, 200)
r = random.randint(50, 200)
return (b, g, r)
def main(args):
root = r'dataset'
score_threshold = 0.5
model_selected = '32'
# model_selected = 'last_model'
# 解析label_list文件
with open(os.path.join(root, "label_list.txt"), 'r') as file:
label_list = file.readlines()
# map(str.rstrip, label_list) # 去掉末尾的\n # map中,传进去一个函数,而不是传进去一个函数的返回值
label_list = [label.rstrip() for label in label_list] # 去掉空字符
label_list = [label for label in label_list if label != ''] # 去掉空行
# 自动设置类别数量
num_classes = len(label_list)
print(label_list)
print(num_classes)
# 加载模型
# model_selected = '15'
model_path = os.path.join(root, 'models', model_selected + '.pkl')
# model_path = os.path.join(root, 'models', model_selected + '.pkl')
# model = torch.load(r'test4\models\last_model.pkl')
model = torch.load(model_path)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)
# 设置为检测模式
model.eval()
# 对每一个图片画框
img_root = os.path.join(root, 'test')
img_names = os.listdir(img_root)
img_name_count = 0
for img_name in img_names:
img_name_count += 1
src = os.path.join(img_root, img_name)
dst = os.path.join(img_root, str(img_name_count) + '_' + str(int(time.time())) + '.jpg')
os.rename(src, dst)
img_names = os.listdir(img_root)
# print(img_names)
for img_name in img_names:
# 记录时间
time_start = time.time()
print(time_start)
# cv2读取照片
img_path = os.path.join(img_root, img_name)
src_img = cv2.imread(img_path)
img = cv2.cvtColor(src_img, cv2.COLOR_BGR2RGB)
# 用模型predict一次
prediction = predict_once(model=model, img=img)
print(prediction[0]['labels'])
print(prediction[0]['scores'])
# 解析prediction
boxes = prediction[0]['boxes']
scores = prediction[0]['scores']
labels = prediction[0]['labels']
labels = [label_list[label] for label in labels]
# 画框字典
draw_list = {}
draw_list['label'] = []
draw_list['max_score'] = []
draw_list['idx'] = []
# 搜索最佳识别结果
for idx in range(boxes.shape[0]):
draw_dict = {}
if labels[idx] not in draw_list['label']:
draw_list['label'].append(labels[idx])
draw_list['max_score'].append(float(0))
draw_list['idx'].append(int(idx))
draw_idx = draw_list['label'].index(labels[idx])
if scores[idx] > draw_list['max_score'][draw_idx]:
draw_list['max_score'][draw_idx] = float(scores[idx])
draw_list['idx'][draw_idx] = int(idx)
# draw_list按照score排序
draw_dict = sorted(draw_dict.items(), key=lambda kv: (kv[1], kv[0]))
print(draw_list)
# 只画前15个框
count = 0
for idx in draw_list['idx']:
count += 1
if count > 15:
break
x1, y1, x2, y2 = boxes[idx][0], boxes[idx][1], boxes[idx][2], boxes[idx][3]
color = random_color()
cv2.rectangle(src_img, (int(x1), int(y1)), (int(x2), int(y2)), color=color, thickness=2)
cv2.rectangle(src_img, (int(x1), int(y1)), (int(x2), int(y1 - 16)), color=color, thickness=-1)
cv2.putText(src_img, labels[idx], (int(x1), int(y1)), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.5,
color=(0, 0, 0))
cv2.putText(src_img, str(round(float(scores[idx]), 2)), (int(x1), int(y2)), fontFace=cv2.FONT_HERSHEY_TRIPLEX, fontScale=1,
color=color, thickness=2)
# 人脸检测
faces = predict_faces(img_path)
# 画人脸框
for face in faces:
color = random_color()
location = face['location']
score = face['score']
user_id = face['user_id']
left = location['left']
top = location['top']
width = location['width']
height = location['height']
cv2.rectangle(src_img, (int(left), int(top)), (int(left + width), int(top + height)), color=color, thickness=1)
cv2.rectangle(src_img, (int(left), int(top)), (int(left + width), int(top - 16)), color=color,
thickness=-1)
cv2.putText(src_img, user_id, (int(left), int(top)), fontFace=cv2.FONT_HERSHEY_TRIPLEX, fontScale=0.5,
color=(0, 0, 0), thickness=1)
cv2.putText(src_img, str(round(score, 2)), (int(left), int(top+height)),
fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1,
color=color, thickness=2)
# 标注性别
time_mid1 = time.time()
if time_mid1 - time_start < 0.5:
time.sleep(0.5 - (time_mid1 - time_start))
'''
faces = []
# 读取图片,转base64
filepath = img_path
with open(filepath, "rb") as fp:
base64_data = base64.b64encode(fp.read())
image = str(base64_data, 'utf-8')
imageType = "BASE64"
# 配置参数
groupIdList = 'main'
options = {}
options["face_field"] = "age,gender"
options["max_face_num"] = 10
options["face_type"] = "LIVE"
# options["liveness_control"] = "LOW"
""" 带参数调用人脸检测 """
response = aipFace.detect(image, imageType, options)
if not response['error_msg'] == 'pic not has face':
print(response)
result = response['result']
face_list = result['face_list']
print(result)
for idx in range(result['face_num']):
face_dict = {}
location = face_list[idx]['location']
left = location['left']
top = location['top']
width = location['width']
height = location['height']
gender = face_list[idx]['gender']['type']
gender_prop = face_list[idx]['gender']['probability']
face_dict['location'] = location
face_dict['gender'] = gender
face_dict['gender_prop'] = gender_prop
faces.append(face_dict)
cv2.putText(src_img, gender, (int(left), int(top + height/2)),
fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.5,
color=color)
'''
# 真正显示图片
# src_img = cv2.resize(src_img, (1280, 720))
cv2.imshow('result', src_img)
# 保存图片
print(os.path.join(root, 'prediction', img_name))
cv2.imwrite(os.path.join(root, 'prediction', img_name), src_img, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
# 响应按键
key = cv2.waitKey()
if 27 == key:
break
time_end = time.time()
if time_end - time_mid1 < 0.5:
time.sleep(0.5 - (time_end - time_mid1))
cv2.destroyAllWindows()
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description=__doc__)
# parser.add_argument('--data-path', default='/datasets01/COCO/022719/', help='dataset')
# parser.add_argument('--dataset', default='coco', help='dataset')
# parser.add_argument('--model', default='maskrcnn_resnet50_fpn', help='model')
# parser.add_argument('--device', default='cuda', help='device')
# parser.add_argument('-b', '--batch-size', default=2, type=int,
# help='images per gpu, the total batch size is $NGPU x batch_size')
parser.add_argument('--epochs', default=26, type=int, metavar='N',
help='number of total epochs to run')
# parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
# help='number of data loading workers (default: 4)')
# parser.add_argument('--lr', default=0.02, type=float,
# help='initial learning rate, 0.02 is the default value for training '
# 'on 8 gpus and 2 images_per_gpu')
# parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
# help='momentum')
# parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
# metavar='W', help='weight decay (default: 1e-4)',
# dest='weight_decay')
# parser.add_argument('--lr-step-size', default=8, type=int, help='decrease lr every step-size epochs')
# parser.add_argument('--lr-steps', default=[16, 22], nargs='+', type=int, help='decrease lr every step-size epochs')
# parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma')
parser.add_argument('--print-freq', default=20, type=int, help='print frequency')
# parser.add_argument('--output-dir', default='.', help='path where to save')
# parser.add_argument('--resume', default='', help='resume from checkpoint')
# parser.add_argument('--start_epoch', default=0, type=int, help='start epoch')
# parser.add_argument('--aspect-ratio-group-factor', default=3, type=int)
# parser.add_argument(
# "--test-only",
# dest="test_only",
# help="Only test the model",
# action="store_true",
# )
# parser.add_argument(
# "--pretrained",
# dest="pretrained",
# help="Use pre-trained models from the modelzoo",
# action="store_true",
# )
# distributed training parameters
# parser.add_argument('--world-size', default=1, type=int,
# help='number of distributed processes')
# parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')
args = parser.parse_args()
# if args.output_dir:
# utils.mkdir(args.output_dir)
# 将这些命令行参数传入主函数中运行
main(args)
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/wqy11888/pytorch-vision-faster-rcnn.git
git@gitee.com:wqy11888/pytorch-vision-faster-rcnn.git
wqy11888
pytorch-vision-faster-rcnn
PytorchVisionFasterRCNN
master

搜索帮助

344bd9b3 5694891 D2dac590 5694891