代码拉取完成,页面将自动刷新
同步操作将从 PaddlePaddle/PaddleDetection 强制同步,此操作会覆盖自 Fork 仓库以来所做的任何修改,且无法恢复!!!
确定后同步将在后台操作,完成时将刷新页面,请耐心等待。
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import json
import argparse
import numpy as np
def save_json(path, images, annotations, categories):
new_json = {
'images': images,
'annotations': annotations,
'categories': categories,
}
with open(path, 'w') as f:
json.dump(new_json, f)
print('{} saved, with {} images and {} annotations.'.format(
path, len(images), len(annotations)))
def gen_semi_data(data_dir,
json_file,
percent=10.0,
seed=1,
seed_offset=0,
txt_file=None):
json_name = json_file.split('/')[-1].split('.')[0]
json_file = os.path.join(data_dir, json_file)
anno = json.load(open(json_file, 'r'))
categories = anno['categories']
all_images = anno['images']
all_anns = anno['annotations']
print(
'Totally {} images and {} annotations, about {} gts per image.'.format(
len(all_images), len(all_anns), len(all_anns) / len(all_images)))
if txt_file:
print('Using percent {} and seed {}.'.format(percent, seed))
txt_file = os.path.join(data_dir, txt_file)
sup_idx = json.load(open(txt_file, 'r'))[str(percent)][str(seed)]
# max(sup_idx) = 117262 # 10%, sup_idx is not image_id
else:
np.random.seed(seed + seed_offset)
sup_len = int(percent / 100.0 * len(all_images))
sup_idx = np.random.choice(
range(len(all_images)), size=sup_len, replace=False)
labeled_images, labeled_anns = [], []
labeled_im_ids = []
unlabeled_images, unlabeled_anns = [], []
for i in range(len(all_images)):
if i in sup_idx:
labeled_im_ids.append(all_images[i]['id'])
labeled_images.append(all_images[i])
else:
unlabeled_images.append(all_images[i])
for an in all_anns:
im_id = an['image_id']
if im_id in labeled_im_ids:
labeled_anns.append(an)
else:
continue
save_path = '{}/{}'.format(data_dir, 'semi_annotations')
if not os.path.exists(save_path):
os.mkdir(save_path)
sup_name = '{}.{}@{}.json'.format(json_name, seed, int(percent))
sup_path = os.path.join(save_path, sup_name)
save_json(sup_path, labeled_images, labeled_anns, categories)
unsup_name = '{}.{}@{}-unlabeled.json'.format(json_name, seed, int(percent))
unsup_path = os.path.join(save_path, unsup_name)
save_json(unsup_path, unlabeled_images, unlabeled_anns, categories)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--data_dir', type=str, default='./dataset/coco')
parser.add_argument(
'--json_file', type=str, default='annotations/instances_train2017.json')
parser.add_argument('--percent', type=float, default=10.0)
parser.add_argument('--seed', type=int, default=1)
parser.add_argument('--seed_offset', type=int, default=0)
parser.add_argument('--txt_file', type=str, default='COCO_supervision.txt')
args = parser.parse_args()
print(args)
gen_semi_data(args.data_dir, args.json_file, args.percent, args.seed,
args.seed_offset, args.txt_file)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。