99 Star 794 Fork 1.4K

MindSpore / models

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
dataset.py 21.04 KB
一键复制 编辑 原始数据 按行查看 历史
wangzeyangyi 提交于 2022-01-28 11:11 . update
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562
# Copyright 2020-2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""FasterRcnn dataset"""
from __future__ import division
import os
import numpy as np
from numpy import random
import cv2
import mindspore as ms
import mindspore.dataset as de
from mindspore.mindrecord import FileWriter
def bbox_overlaps(bboxes1, bboxes2, mode='iou'):
"""Calculate the ious between each bbox of bboxes1 and bboxes2.
Args:
bboxes1(ndarray): shape (n, 4)
bboxes2(ndarray): shape (k, 4)
mode(str): iou (intersection over union) or iof (intersection
over foreground)
Returns:
ious(ndarray): shape (n, k)
"""
assert mode in ['iou', 'iof']
bboxes1 = bboxes1.astype(np.float32)
bboxes2 = bboxes2.astype(np.float32)
rows = bboxes1.shape[0]
cols = bboxes2.shape[0]
ious = np.zeros((rows, cols), dtype=np.float32)
if rows * cols == 0:
return ious
exchange = False
if bboxes1.shape[0] > bboxes2.shape[0]:
bboxes1, bboxes2 = bboxes2, bboxes1
ious = np.zeros((cols, rows), dtype=np.float32)
exchange = True
area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (bboxes1[:, 3] - bboxes1[:, 1] + 1)
area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (bboxes2[:, 3] - bboxes2[:, 1] + 1)
for i in range(bboxes1.shape[0]):
x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0])
y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1])
x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2])
y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3])
overlap = np.maximum(x_end - x_start + 1, 0) * np.maximum(
y_end - y_start + 1, 0)
if mode == 'iou':
union = area1[i] + area2 - overlap
else:
union = area1[i] if not exchange else area2
ious[i, :] = overlap / union
if exchange:
ious = ious.T
return ious
class PhotoMetricDistortion:
"""Photo Metric Distortion"""
def __init__(self,
brightness_delta=32,
contrast_range=(0.5, 1.5),
saturation_range=(0.5, 1.5),
hue_delta=18):
self.brightness_delta = brightness_delta
self.contrast_lower, self.contrast_upper = contrast_range
self.saturation_lower, self.saturation_upper = saturation_range
self.hue_delta = hue_delta
def __call__(self, img, boxes, labels):
# random brightness
img = img.astype('float32')
if random.randint(2):
delta = random.uniform(-self.brightness_delta,
self.brightness_delta)
img += delta
# mode == 0 --> do random contrast first
# mode == 1 --> do random contrast last
mode = random.randint(2)
if mode == 1:
if random.randint(2):
alpha = random.uniform(self.contrast_lower,
self.contrast_upper)
img *= alpha
# convert color from BGR to HSV
img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# random saturation
if random.randint(2):
img[..., 1] *= random.uniform(self.saturation_lower,
self.saturation_upper)
# random hue
if random.randint(2):
img[..., 0] += random.uniform(-self.hue_delta, self.hue_delta)
img[..., 0][img[..., 0] > 360] -= 360
img[..., 0][img[..., 0] < 0] += 360
# convert color from HSV to BGR
img = cv2.cvtColor(img, cv2.COLOR_HSV2BGR)
# random contrast
if mode == 0:
if random.randint(2):
alpha = random.uniform(self.contrast_lower,
self.contrast_upper)
img *= alpha
# randomly swap channels
if random.randint(2):
img = img[..., random.permutation(3)]
return img, boxes, labels
class Expand:
"""expand image"""
def __init__(self, mean=(0, 0, 0), to_rgb=True, ratio_range=(1, 4)):
if to_rgb:
self.mean = mean[::-1]
else:
self.mean = mean
self.min_ratio, self.max_ratio = ratio_range
def __call__(self, img, boxes, labels):
if random.randint(2):
return img, boxes, labels
h, w, c = img.shape
ratio = random.uniform(self.min_ratio, self.max_ratio)
expand_img = np.full((int(h * ratio), int(w * ratio), c),
self.mean).astype(img.dtype)
left = int(random.uniform(0, w * ratio - w))
top = int(random.uniform(0, h * ratio - h))
expand_img[top:top + h, left:left + w] = img
img = expand_img
boxes += np.tile((left, top), 2)
return img, boxes, labels
def rescale_with_tuple(img, scale):
h, w = img.shape[:2]
scale_factor = min(max(scale) / max(h, w), min(scale) / min(h, w))
new_size = int(w * float(scale_factor) + 0.5), int(h * float(scale_factor) + 0.5)
rescaled_img = cv2.resize(img, new_size, interpolation=cv2.INTER_LINEAR)
return rescaled_img, scale_factor
def rescale_with_factor(img, scale_factor):
h, w = img.shape[:2]
new_size = int(w * float(scale_factor) + 0.5), int(h * float(scale_factor) + 0.5)
return cv2.resize(img, new_size, interpolation=cv2.INTER_NEAREST)
def rescale_column(img, img_shape, gt_bboxes, gt_label, gt_num, config):
"""rescale operation for image"""
img_data, scale_factor = rescale_with_tuple(img, (config.img_width, config.img_height))
if img_data.shape[0] > config.img_height:
img_data, scale_factor2 = rescale_with_tuple(img_data, (config.img_height, config.img_height))
scale_factor = scale_factor*scale_factor2
gt_bboxes = gt_bboxes * scale_factor
gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_data.shape[1] - 1)
gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_data.shape[0] - 1)
pad_h = config.img_height - img_data.shape[0]
pad_w = config.img_width - img_data.shape[1]
assert ((pad_h >= 0) and (pad_w >= 0))
pad_img_data = np.zeros((config.img_height, config.img_width, 3)).astype(img_data.dtype)
pad_img_data[0:img_data.shape[0], 0:img_data.shape[1], :] = img_data
img_shape = (config.img_height, config.img_width, 1.0)
img_shape = np.asarray(img_shape, dtype=np.float32)
return (pad_img_data, img_shape, gt_bboxes, gt_label, gt_num)
def rescale_column_test(img, img_shape, gt_bboxes, gt_label, gt_num, config):
"""rescale operation for image of eval"""
img_data, scale_factor = rescale_with_tuple(img, (config.img_width, config.img_height))
if img_data.shape[0] > config.img_height:
img_data, scale_factor2 = rescale_with_tuple(img_data, (config.img_height, config.img_height))
scale_factor = scale_factor*scale_factor2
pad_h = config.img_height - img_data.shape[0]
pad_w = config.img_width - img_data.shape[1]
assert ((pad_h >= 0) and (pad_w >= 0))
pad_img_data = np.zeros((config.img_height, config.img_width, 3)).astype(img_data.dtype)
pad_img_data[0:img_data.shape[0], 0:img_data.shape[1], :] = img_data
img_shape = np.append(img_shape, (scale_factor, scale_factor))
img_shape = np.asarray(img_shape, dtype=np.float32)
return (pad_img_data, img_shape, gt_bboxes, gt_label, gt_num)
def resize_column(img, img_shape, gt_bboxes, gt_label, gt_num, config):
"""resize operation for image"""
img_data = img
h, w = img_data.shape[:2]
img_data = cv2.resize(
img_data, (config.img_width, config.img_height), interpolation=cv2.INTER_LINEAR)
h_scale = config.img_height / h
w_scale = config.img_width / w
scale_factor = np.array(
[w_scale, h_scale, w_scale, h_scale], dtype=np.float32)
img_shape = (config.img_height, config.img_width, 1.0)
img_shape = np.asarray(img_shape, dtype=np.float32)
gt_bboxes = gt_bboxes * scale_factor
gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1)
gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1)
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
def resize_column_test(img, img_shape, gt_bboxes, gt_label, gt_num, config):
"""resize operation for image of eval"""
img_data = img
h, w = img_data.shape[:2]
img_data = cv2.resize(
img_data, (config.img_width, config.img_height), interpolation=cv2.INTER_LINEAR)
h_scale = config.img_height / h
w_scale = config.img_width / w
scale_factor = np.array(
[w_scale, h_scale, w_scale, h_scale], dtype=np.float32)
img_shape = np.append(img_shape, (h_scale, w_scale))
img_shape = np.asarray(img_shape, dtype=np.float32)
gt_bboxes = gt_bboxes * scale_factor
gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1)
gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1)
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
def impad_to_multiple_column(img, img_shape, gt_bboxes, gt_label, gt_num, config):
"""impad operation for image"""
img_data = cv2.copyMakeBorder(img,
0, config.img_height - img.shape[0], 0, config.img_width - img.shape[1],
cv2.BORDER_CONSTANT,
value=0)
img_data = img_data.astype(np.float32)
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
def imnormalize_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""imnormalize operation for image"""
mean = np.asarray([123.675, 116.28, 103.53])
std = np.asarray([58.395, 57.12, 57.375])
img_data = img.copy().astype(np.float32)
cv2.cvtColor(img_data, cv2.COLOR_BGR2RGB, img_data) # inplace
cv2.subtract(img_data, np.float64(mean.reshape(1, -1)), img_data) # inplace
cv2.multiply(img_data, 1 / np.float64(std.reshape(1, -1)), img_data) # inplace
img_data = img_data.astype(np.float32)
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
def flip_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""flip operation for image"""
img_data = img
img_data = np.flip(img_data, axis=1)
flipped = gt_bboxes.copy()
_, w, _ = img_data.shape
flipped[..., 0::4] = w - gt_bboxes[..., 2::4] - 1
flipped[..., 2::4] = w - gt_bboxes[..., 0::4] - 1
return (img_data, img_shape, flipped, gt_label, gt_num)
def transpose_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""transpose operation for image"""
img_data = img.transpose(2, 0, 1).copy()
img_data = img_data.astype(np.float32)
img_shape = img_shape.astype(np.float32)
gt_bboxes = gt_bboxes.astype(np.float32)
gt_label = gt_label.astype(np.int32)
gt_num = gt_num.astype(np.bool)
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
def photo_crop_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""photo crop operation for image"""
random_photo = PhotoMetricDistortion()
img_data, gt_bboxes, gt_label = random_photo(img, gt_bboxes, gt_label)
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
def expand_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""expand operation for image"""
expand = Expand()
img, gt_bboxes, gt_label = expand(img, gt_bboxes, gt_label)
return (img, img_shape, gt_bboxes, gt_label, gt_num)
def preprocess_fn(image, box, is_training, config):
"""Preprocess function for dataset."""
def _infer_data(image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert):
image_shape = image_shape[:2]
input_data = image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert
if config.keep_ratio:
input_data = rescale_column_test(*input_data, config=config)
else:
input_data = resize_column_test(*input_data, config=config)
input_data = imnormalize_column(*input_data)
output_data = transpose_column(*input_data)
return output_data
def _data_aug(image, box, is_training):
"""Data augmentation function."""
image_bgr = image.copy()
image_bgr[:, :, 0] = image[:, :, 2]
image_bgr[:, :, 1] = image[:, :, 1]
image_bgr[:, :, 2] = image[:, :, 0]
image_shape = image_bgr.shape[:2]
gt_box = box[:, :4]
gt_label = box[:, 4]
gt_iscrowd = box[:, 5]
pad_max_number = 128
gt_box_new = np.pad(gt_box, ((0, pad_max_number - box.shape[0]), (0, 0)), mode="constant", constant_values=0)
gt_label_new = np.pad(gt_label, ((0, pad_max_number - box.shape[0])), mode="constant", constant_values=-1)
gt_iscrowd_new = np.pad(gt_iscrowd, ((0, pad_max_number - box.shape[0])), mode="constant", constant_values=1)
gt_iscrowd_new_revert = (~(gt_iscrowd_new.astype(np.bool))).astype(np.int32)
if not is_training:
return _infer_data(image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert)
flip = (np.random.rand() < config.flip_ratio)
expand = (np.random.rand() < config.expand_ratio)
input_data = image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert
if expand:
input_data = expand_column(*input_data)
if config.keep_ratio:
input_data = rescale_column(*input_data, config=config)
else:
input_data = resize_column(*input_data, config=config)
input_data = imnormalize_column(*input_data)
if flip:
input_data = flip_column(*input_data)
output_data = transpose_column(*input_data)
return output_data
return _data_aug(image, box, is_training)
def create_coco_label(is_training, config):
"""Get image path and annotation from COCO."""
from pycocotools.coco import COCO
coco_root = config.coco_root
data_type = config.val_data_type
if is_training:
data_type = config.train_data_type
# Classes need to train or test.
train_cls = config.coco_classes
train_cls_dict = {}
for i, cls in enumerate(train_cls):
train_cls_dict[cls] = i
anno_json = os.path.join(coco_root, config.instance_set.format(data_type))
coco = COCO(anno_json)
classs_dict = {}
cat_ids = coco.loadCats(coco.getCatIds())
for cat in cat_ids:
classs_dict[cat["id"]] = cat["name"]
image_ids = coco.getImgIds()
image_files = []
image_anno_dict = {}
for img_id in image_ids:
image_info = coco.loadImgs(img_id)
file_name = image_info[0]["file_name"]
anno_ids = coco.getAnnIds(imgIds=img_id, iscrowd=None)
anno = coco.loadAnns(anno_ids)
image_path = os.path.join(coco_root, data_type, file_name)
annos = []
for label in anno:
bbox = label["bbox"]
class_name = classs_dict[label["category_id"]]
if class_name in train_cls:
x1, x2 = bbox[0], bbox[0] + bbox[2]
y1, y2 = bbox[1], bbox[1] + bbox[3]
annos.append([x1, y1, x2, y2] + [train_cls_dict[class_name]] + [int(label["iscrowd"])])
image_files.append(image_path)
if annos:
image_anno_dict[image_path] = np.array(annos)
else:
image_anno_dict[image_path] = np.array([0, 0, 0, 0, 0, 1])
return image_files, image_anno_dict
def parse_json_annos_from_txt(anno_file, config):
"""for user defined annotations text file, parse it to json format data"""
if not os.path.isfile(anno_file):
raise RuntimeError("Evaluation annotation file {} is not valid.".format(anno_file))
annos = {
"images": [],
"annotations": [],
"categories": []
}
# set categories field
for i, cls_name in enumerate(config.coco_classes):
annos["categories"].append({"id": i, "name": cls_name})
with open(anno_file, "rb") as f:
lines = f.readlines()
img_id = 1
anno_id = 1
for line in lines:
line_str = line.decode("utf-8").strip()
line_split = str(line_str).split(' ')
# set image field
file_name = line_split[0]
annos["images"].append({"file_name": file_name, "id": img_id})
# set annotations field
for anno_info in line_split[1:]:
anno = anno_info.split(",")
x = float(anno[0])
y = float(anno[1])
w = float(anno[2]) - float(anno[0])
h = float(anno[3]) - float(anno[1])
category_id = int(anno[4])
iscrowd = int(anno[5])
annos["annotations"].append({"bbox": [x, y, w, h],
"area": w * h,
"category_id": category_id,
"iscrowd": iscrowd,
"image_id": img_id,
"id": anno_id})
anno_id += 1
img_id += 1
return annos
def create_train_data_from_txt(image_dir, anno_path):
"""Filter valid image file, which both in image_dir and anno_path."""
def anno_parser(annos_str):
"""Parse annotation from string to list."""
annos = []
for anno_str in annos_str:
anno = anno_str.strip().split(",")
xmin, ymin, xmax, ymax = list(map(float, anno[:4]))
cls_id = int(anno[4])
iscrowd = int(anno[5])
annos.append([xmin, ymin, xmax, ymax, cls_id, iscrowd])
return annos
image_files = []
image_anno_dict = {}
if not os.path.isdir(image_dir):
raise RuntimeError("Path given is not valid.")
if not os.path.isfile(anno_path):
raise RuntimeError("Annotation file is not valid.")
with open(anno_path, "rb") as f:
lines = f.readlines()
for line in lines:
line_str = line.decode("utf-8").strip()
line_split = str(line_str).split(' ')
file_name = line_split[0]
image_path = os.path.join(image_dir, file_name)
if os.path.isfile(image_path):
image_anno_dict[image_path] = anno_parser(line_split[1:])
image_files.append(image_path)
return image_files, image_anno_dict
def data_to_mindrecord_byte_image(config, dataset="coco", is_training=True, prefix="fasterrcnn.mindrecord", file_num=8):
"""Create MindRecord file."""
mindrecord_dir = config.mindrecord_dir
mindrecord_path = os.path.join(mindrecord_dir, prefix)
writer = FileWriter(mindrecord_path, file_num)
if dataset == "coco":
image_files, image_anno_dict = create_coco_label(is_training, config=config)
else:
image_files, image_anno_dict = create_train_data_from_txt(config.image_dir, config.anno_path)
fasterrcnn_json = {
"image": {"type": "bytes"},
"annotation": {"type": "int32", "shape": [-1, 6]},
}
writer.add_schema(fasterrcnn_json, "fasterrcnn_json")
for image_name in image_files:
with open(image_name, 'rb') as f:
img = f.read()
annos = np.array(image_anno_dict[image_name], dtype=np.int32)
row = {"image": img, "annotation": annos}
writer.write_raw_data([row])
writer.commit()
def create_fasterrcnn_dataset(config, mindrecord_file, batch_size=2, device_num=1, rank_id=0, is_training=True,
num_parallel_workers=8, python_multiprocessing=False):
"""Create FasterRcnn dataset with MindDataset."""
cv2.setNumThreads(0)
de.config.set_prefetch_size(8)
ds = de.MindDataset(mindrecord_file, columns_list=["image", "annotation"], num_shards=device_num, shard_id=rank_id,
num_parallel_workers=4, shuffle=is_training)
decode = ms.dataset.vision.c_transforms.Decode()
ds = ds.map(input_columns=["image"], operations=decode)
compose_map_func = (lambda image, annotation: preprocess_fn(image, annotation, is_training, config=config))
if is_training:
ds = ds.map(input_columns=["image", "annotation"],
output_columns=["image", "image_shape", "box", "label", "valid_num"],
column_order=["image", "image_shape", "box", "label", "valid_num"],
operations=compose_map_func, python_multiprocessing=python_multiprocessing,
num_parallel_workers=num_parallel_workers)
ds = ds.batch(batch_size, drop_remainder=True)
else:
ds = ds.map(input_columns=["image", "annotation"],
output_columns=["image", "image_shape", "box", "label", "valid_num"],
column_order=["image", "image_shape", "box", "label", "valid_num"],
operations=compose_map_func,
num_parallel_workers=num_parallel_workers)
ds = ds.batch(batch_size, drop_remainder=True)
return ds
1
https://gitee.com/mindspore/models.git
git@gitee.com:mindspore/models.git
mindspore
models
models
r1.6

搜索帮助

53164aa7 5694891 3bd8fe86 5694891