代码拉取完成,页面将自动刷新
import argparse
import csv
import os
import pprint
from collections import OrderedDict
import cv2
import numpy as np
import torch
import lib.models as models
from lib.config import (
config,
update_config,
)
from lib.core.evaluation import decode_preds
from lib.utils import utils
from lib.utils.transforms import crop
def parse_args():
parser = argparse.ArgumentParser(description="Face Mask Overlay")
parser.add_argument(
"--cfg", help="experiment configuration filename", required=True, type=str,
)
parser.add_argument(
"--landmark_model",
help="path to model for landmarks exctraction",
required=True,
type=str,
)
parser.add_argument(
"--detector_model",
help="path to detector model",
type=str,
default="detection/face_detector.prototxt",
)
parser.add_argument(
"--detector_weights",
help="path to detector weights",
type=str,
default="detection/face_detector.caffemodel",
)
parser.add_argument(
"--mask_image", help="path to a .png file with a mask", required=True, type=str,
)
parser.add_argument("--device", default="cpu", help="Device to inference on")
args = parser.parse_args()
update_config(config, args)
return args
def main():
# parsing script arguments
args = parse_args()
device = torch.device(args.device)
# initialize logger
logger, final_output_dir, tb_log_dir = utils.create_logger(config, args.cfg, "demo")
# log arguments and config values
logger.info(pprint.pformat(args))
logger.info(pprint.pformat(config))
# init landmark model
model = models.get_face_alignment_net(config)
# get input size from the config
input_size = config.MODEL.IMAGE_SIZE
# load model
state_dict = torch.load(args.landmark_model, map_location=device)
# remove `module.` prefix from the pre-trained weights
new_state_dict = OrderedDict()
for key, value in state_dict.items():
name = key[7:]
new_state_dict[name] = value
# load weights without the prefix
model.load_state_dict(new_state_dict)
# run model on device
model = model.to(device)
# init mean and std values for the landmark model's input
mean = config.MODEL.MEAN
mean = np.array(mean, dtype=np.float32)
std = config.MODEL.STD
std = np.array(std, dtype=np.float32)
# defining prototxt and caffemodel paths
detector_model = args.detector_model
detector_weights = args.detector_weights
# load model
detector = cv2.dnn.readNetFromCaffe(detector_model, detector_weights)
capture = cv2.VideoCapture(0)
frame_num = 0
while True:
# capture frame-by-frame
success, frame = capture.read()
# break if no frame
if not success:
break
frame_num += 1
print("frame_num: ", frame_num)
landmarks_img = frame.copy()
result = frame.copy()
result = result.astype(np.float32) / 255.0
# get frame's height and width
height, width = frame.shape[:2] # 640x480
# resize and subtract BGR mean values, since Caffe uses BGR images for input
blob = cv2.dnn.blobFromImage(
frame, scalefactor=1.0, size=(300, 300), mean=(104.0, 177.0, 123.0),
)
# passing blob through the network to detect faces
detector.setInput(blob)
# detector output format:
# [image_id, class, confidence, left, bottom, right, top]
face_detections = detector.forward()
# loop over the detections
for i in range(0, face_detections.shape[2]):
# extract confidence
confidence = face_detections[0, 0, i, 2]
# filter detections by confidence greater than the minimum threshold
if confidence > 0.5:
# get coordinates of the bounding box
box = face_detections[0, 0, i, 3:7] * np.array(
[width, height, width, height],
)
(x1, y1, x2, y2) = box.astype("int")
# show original image
cv2.imshow("original image", frame)
# crop to detection and resize
resized = crop(
frame,
torch.Tensor([x1 + (x2 - x1) / 2, y1 + (y2 - y1) / 2]),
1.5,
tuple(input_size),
)
# convert from BGR to RGB since HRNet expects RGB format
resized = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
img = resized.astype(np.float32) / 255.0
# normalize landmark net input
normalized_img = (img - mean) / std
# predict face landmarks
model = model.eval()
with torch.no_grad():
input = torch.Tensor(normalized_img.transpose([2, 0, 1]))
input = input.to(device)
output = model(input.unsqueeze(0))
score_map = output.data.cpu()
preds = decode_preds(
score_map,
[torch.Tensor([x1 + (x2 - x1) / 2, y1 + (y2 - y1) / 2])],
[1.5],
score_map.shape[2:4],
)
preds = preds.squeeze(0)
landmarks = preds.data.cpu().detach().numpy()
# draw landmarks
for k, landmark in enumerate(landmarks, 1):
landmarks_img = cv2.circle(
landmarks_img,
center=(landmark[0], landmark[1]),
radius=3,
color=(0, 0, 255),
thickness=-1,
)
# draw landmarks' labels
landmarks_img = cv2.putText(
img=landmarks_img,
text=str(k),
org=(int(landmark[0]) + 5, int(landmark[1]) + 5),
fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=0.5,
color=(0, 0, 255),
)
# show results by drawing predicted landmarks and their labels
cv2.imshow("image with landmarks", landmarks_img)
# get chosen landmarks 2-16, 30 as destination points
# note that landmarks numbering starts from 0
dst_pts = np.array(
[
landmarks[1],
landmarks[2],
landmarks[3],
landmarks[4],
landmarks[5],
landmarks[6],
landmarks[7],
landmarks[8],
landmarks[9],
landmarks[10],
landmarks[11],
landmarks[12],
landmarks[13],
landmarks[14],
landmarks[15],
landmarks[29],
],
dtype="float32",
)
# load mask annotations from csv file to source points
mask_annotation = os.path.splitext(os.path.basename(args.mask_image))[0]
mask_annotation = os.path.join(
os.path.dirname(args.mask_image), mask_annotation + ".csv",
)
with open(mask_annotation) as csv_file:
csv_reader = csv.reader(csv_file, delimiter=",")
src_pts = []
for i, row in enumerate(csv_reader):
# skip head or empty line if it's there
try:
src_pts.append(np.array([float(row[1]), float(row[2])]))
except ValueError:
continue
src_pts = np.array(src_pts, dtype="float32")
# overlay with a mask only if all landmarks have positive coordinates:
if (landmarks > 0).all():
# load mask image
mask_img = cv2.imread(args.mask_image, cv2.IMREAD_UNCHANGED)
mask_img = mask_img.astype(np.float32)
mask_img = mask_img / 255.0
# get the perspective transformation matrix
M, _ = cv2.findHomography(src_pts, dst_pts)
# transformed masked image
transformed_mask = cv2.warpPerspective(
mask_img,
M,
(result.shape[1], result.shape[0]),
None,
cv2.INTER_LINEAR,
cv2.BORDER_CONSTANT,
)
# mask overlay
alpha_mask = transformed_mask[:, :, 3]
alpha_image = 1.0 - alpha_mask
for c in range(0, 3):
result[:, :, c] = (
alpha_mask * transformed_mask[:, :, c]
+ alpha_image * result[:, :, c]
)
# display the resulting frame
cv2.imshow("image with mask overlay", result)
# waiting for the escape button to exit
k = cv2.waitKey(1)
if k == 27:
break
# when everything done, release the capture
capture.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
main()
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。