diff --git a/cv/detection/ssd/tensorflow/dataset/dataset_common.py b/cv/detection/ssd/tensorflow/dataset/dataset_common.py index 9c17c0eea470df2d18c119b195e5313782f78aed..791c809746052b2e9e7dd219a2eaabcbc871401f 100644 --- a/cv/detection/ssd/tensorflow/dataset/dataset_common.py +++ b/cv/detection/ssd/tensorflow/dataset/dataset_common.py @@ -1,24 +1,8 @@ -# Copyright 2018 Changan Wang - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================= from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow.compat.v1 as tf -import tf_slim as slim - VOC_LABELS = { 'none': (0, 'Background'), @@ -44,195 +28,100 @@ VOC_LABELS = { 'tvmonitor': (20, 'Indoor'), } -COCO_LABELS = { - "bench": (14, 'outdoor') , - "skateboard": (37, 'sports') , - "toothbrush": (80, 'indoor') , - "person": (1, 'person') , - "donut": (55, 'food') , - "none": (0, 'background') , - "refrigerator": (73, 'appliance') , - "horse": (18, 'animal') , - "elephant": (21, 'animal') , - "book": (74, 'indoor') , - "car": (3, 'vehicle') , - "keyboard": (67, 'electronic') , - "cow": (20, 'animal') , - "microwave": (69, 'appliance') , - "traffic light": (10, 'outdoor') , - "tie": (28, 'accessory') , - "dining table": (61, 'furniture') , - "toaster": (71, 'appliance') , - "baseball glove": (36, 'sports') , - "giraffe": (24, 'animal') , - "cake": (56, 'food') , - "handbag": (27, 'accessory') , - "scissors": (77, 'indoor') , - "bowl": (46, 'kitchen') , - "couch": (58, 'furniture') , - "chair": (57, 'furniture') , - "boat": (9, 'vehicle') , - "hair drier": (79, 'indoor') , - "airplane": (5, 'vehicle') , - "pizza": (54, 'food') , - "backpack": (25, 'accessory') , - "kite": (34, 'sports') , - "sheep": (19, 'animal') , - "umbrella": (26, 'accessory') , - "stop sign": (12, 'outdoor') , - "truck": (8, 'vehicle') , - "skis": (31, 'sports') , - "sandwich": (49, 'food') , - "broccoli": (51, 'food') , - "wine glass": (41, 'kitchen') , - "surfboard": (38, 'sports') , - "sports ball": (33, 'sports') , - "cell phone": (68, 'electronic') , - "dog": (17, 'animal') , - "bed": (60, 'furniture') , - "toilet": (62, 'furniture') , - "fire hydrant": (11, 'outdoor') , - "oven": (70, 'appliance') , - "zebra": (23, 'animal') , - "tv": (63, 'electronic') , - "potted plant": (59, 'furniture') , - "parking meter": (13, 'outdoor') , - "spoon": (45, 'kitchen') , - "bus": (6, 'vehicle') , - "laptop": (64, 'electronic') , - "cup": (42, 'kitchen') , - "bird": (15, 'animal') , - "sink": (72, 'appliance') , - "remote": (66, 'electronic') , - "bicycle": (2, 'vehicle') , - "tennis racket": (39, 'sports') , - "baseball bat": (35, 'sports') , - "cat": (16, 'animal') , - "fork": (43, 'kitchen') , - "suitcase": (29, 'accessory') , - "snowboard": (32, 'sports') , - "clock": (75, 'indoor') , - "apple": (48, 'food') , - "mouse": (65, 'electronic') , - "bottle": (40, 'kitchen') , - "frisbee": (30, 'sports') , - "carrot": (52, 'food') , - "bear": (22, 'animal') , - "hot dog": (53, 'food') , - "teddy bear": (78, 'indoor') , - "knife": (44, 'kitchen') , - "train": (7, 'vehicle') , - "vase": (76, 'indoor') , - "banana": (47, 'food') , - "motorcycle": (4, 'vehicle') , - "orange": (50, 'food') - } - -# use dataset_inspect.py to get these summary data_splits_num = { 'train': 22136, 'val': 4952, } -def slim_get_batch(num_classes, batch_size, split_name, file_pattern, num_readers, num_preprocessing_threads, image_preprocessing_fn, anchor_encoder, num_epochs=None, is_training=True): - """Gets a dataset tuple with instructions for reading Pascal VOC dataset. - - Args: - num_classes: total class numbers in dataset. - batch_size: the size of each batch. - split_name: 'train' of 'val'. - file_pattern: The file pattern to use when matching the dataset sources (full path). - num_readers: the max number of reader used for reading tfrecords. - num_preprocessing_threads: the max number of threads used to run preprocessing function. - image_preprocessing_fn: the function used to dataset augumentation. - anchor_encoder: the function used to encoder all anchors. - num_epochs: total epoches for iterate this dataset. - is_training: whether we are in traing phase. - - Returns: - A batch of [image, shape, loc_targets, cls_targets, match_scores]. - """ +def tf1_get_batch(num_classes, batch_size, split_name, file_pattern, num_readers, num_preprocessing_threads, image_preprocessing_fn, anchor_encoder, num_epochs=None, is_training=True): if split_name not in data_splits_num: raise ValueError('split name %s was not recognized.' % split_name) - # Features in Pascal VOC TFRecords. - keys_to_features = { - 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), - 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'), - 'image/filename': tf.FixedLenFeature((), tf.string, default_value=''), - 'image/height': tf.FixedLenFeature([1], tf.int64), - 'image/width': tf.FixedLenFeature([1], tf.int64), - 'image/channels': tf.FixedLenFeature([1], tf.int64), - 'image/shape': tf.FixedLenFeature([3], tf.int64), - 'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32), - 'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32), - 'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32), - 'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32), - 'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64), - 'image/object/bbox/difficult': tf.VarLenFeature(dtype=tf.int64), - 'image/object/bbox/truncated': tf.VarLenFeature(dtype=tf.int64), - } - items_to_handlers = { - 'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'), - 'filename': slim.tfexample_decoder.Tensor('image/filename'), - 'shape': slim.tfexample_decoder.Tensor('image/shape'), - 'object/bbox': slim.tfexample_decoder.BoundingBox( - ['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'), - 'object/label': slim.tfexample_decoder.Tensor('image/object/bbox/label'), - 'object/difficult': slim.tfexample_decoder.Tensor('image/object/bbox/difficult'), - 'object/truncated': slim.tfexample_decoder.Tensor('image/object/bbox/truncated'), - } - decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) - - labels_to_names = {} - for name, pair in VOC_LABELS.items(): - labels_to_names[pair[0]] = name - - dataset = slim.dataset.Dataset( - data_sources=file_pattern, - reader=tf.TFRecordReader, - decoder=decoder, - num_samples=data_splits_num[split_name], - items_to_descriptions=None, - num_classes=num_classes, - labels_to_names=labels_to_names) - - with tf.name_scope('dataset_data_provider'): - provider = slim.dataset_data_provider.DatasetDataProvider( - dataset, - num_readers=num_readers, - common_queue_capacity=32 * batch_size, - common_queue_min=8 * batch_size, - shuffle=is_training, - num_epochs=num_epochs) - - [org_image, filename, shape, glabels_raw, gbboxes_raw, isdifficult] = provider.get(['image', 'filename', 'shape', - 'object/label', - 'object/bbox', - 'object/difficult']) - - if is_training: - # if all is difficult, then keep the first one - isdifficult_mask =tf.cond(tf.count_nonzero(isdifficult, dtype=tf.int32) < tf.shape(isdifficult)[0], - lambda : isdifficult < tf.ones_like(isdifficult), - lambda : tf.one_hot(0, tf.shape(isdifficult)[0], on_value=True, off_value=False, dtype=tf.bool)) - - glabels_raw = tf.boolean_mask(glabels_raw, isdifficult_mask) - gbboxes_raw = tf.boolean_mask(gbboxes_raw, isdifficult_mask) - - # Pre-processing image, labels and bboxes. - - if is_training: - image, glabels, gbboxes = image_preprocessing_fn(org_image, glabels_raw, gbboxes_raw) + def parse_tfrecord(example_proto): + keys_to_features = { + 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), + 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'), + 'image/filename': tf.FixedLenFeature((), tf.string, default_value=''), + 'image/shape': tf.FixedLenFeature([3], tf.int64), + 'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32), + 'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32), + 'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32), + 'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32), + 'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64), + 'image/object/bbox/difficult': tf.VarLenFeature(dtype=tf.int64), + } + parsed_features = tf.parse_single_example(example_proto, keys_to_features) + + image = tf.image.decode_jpeg(parsed_features['image/encoded'], channels=3) + shape = parsed_features['image/shape'] + filename = parsed_features['image/filename'] + + xmin = tf.sparse.to_dense(parsed_features['image/object/bbox/xmin']) + ymin = tf.sparse.to_dense(parsed_features['image/object/bbox/ymin']) + xmax = tf.sparse.to_dense(parsed_features['image/object/bbox/xmax']) + ymax = tf.sparse.to_dense(parsed_features['image/object/bbox/ymax']) + gbboxes_raw = tf.stack([ymin, xmin, ymax, xmax], axis=1) + glabels_raw = tf.sparse.to_dense(parsed_features['image/object/bbox/label']) + isdifficult = tf.sparse.to_dense(parsed_features['image/object/bbox/difficult']) + + return image, filename, shape, glabels_raw, gbboxes_raw, isdifficult + + # 构建 tf.data 管道 + dataset = tf.data.Dataset.list_files(file_pattern, shuffle=is_training) # 支持通配符 + dataset = dataset.interleave( + lambda x: tf.data.TFRecordDataset(x, num_parallel_reads=num_readers), + cycle_length=num_readers, + num_parallel_calls=tf.data.experimental.AUTOTUNE + ) + dataset = dataset.map(parse_tfrecord, num_parallel_calls=num_preprocessing_threads) + + total_samples = data_splits_num[split_name] + dataset = dataset.take(total_samples) + print(f"split_name:{split_name}, batch_size:{batch_size}, num_epochs:{num_epochs}, samples_per_epoch: {total_samples}, batch_nums_per_epoch:{total_samples // batch_size}") + + if num_epochs is not None: + dataset = dataset.repeat(num_epochs) else: - image = image_preprocessing_fn(org_image, glabels_raw, gbboxes_raw) - glabels, gbboxes = glabels_raw, gbboxes_raw - - gt_targets, gt_labels, gt_scores = anchor_encoder(glabels, gbboxes) + dataset = dataset.repeat() # 无限重复 - return tf.train.batch([image, filename, shape, gt_targets, gt_labels, gt_scores], - dynamic_pad=False, - batch_size=batch_size, - allow_smaller_final_batch=(not is_training), - num_threads=num_preprocessing_threads, - capacity=64 * batch_size) + if is_training: + dataset = dataset.map(lambda img, fn, sh, lbl, box, diff: ( + img, fn, sh, tf.cond( + tf.count_nonzero(diff, dtype=tf.int32) < tf.shape(diff)[0], + lambda: tf.boolean_mask(lbl, diff < tf.ones_like(diff)), + lambda: tf.boolean_mask(lbl, tf.one_hot(0, tf.shape(diff)[0], on_value=True, off_value=False, dtype=tf.bool)) + ), + tf.cond( + tf.count_nonzero(diff, dtype=tf.int32) < tf.shape(diff)[0], + lambda: tf.boolean_mask(box, diff < tf.ones_like(diff)), + lambda: tf.boolean_mask(box, tf.one_hot(0, tf.shape(diff)[0], on_value=True, off_value=False, dtype=tf.bool)) + ) + ), num_parallel_calls=num_preprocessing_threads) + + # 预处理 + def preprocess(image, filename, shape, glabels_raw, gbboxes_raw): + if is_training: + image, glabels, gbboxes = image_preprocessing_fn(image, glabels_raw, gbboxes_raw) + else: + image = image_preprocessing_fn(image, glabels_raw, gbboxes_raw) + glabels, gbboxes = glabels_raw, gbboxes_raw + return image, filename, shape, glabels, gbboxes + + dataset = dataset.map(preprocess, num_parallel_calls=num_preprocessing_threads) + + # anchor 编码 + def encode(glabels, gbboxes): + gt_targets, gt_labels, gt_scores = anchor_encoder(glabels, gbboxes) + return gt_targets, gt_labels, gt_scores + + dataset = dataset.map(lambda img, fn, sh, lbl, box: ( + img, fn, sh, *encode(lbl, box) + ), num_parallel_calls=num_preprocessing_threads) + + # 批处理 + dataset = dataset.batch(batch_size, drop_remainder=True) + dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE) + + # 创建 iterator + iterator = dataset.make_one_shot_iterator() + return iterator.get_next() # 返回 (image, filename, shape, gt_targets, gt_labels, gt_scores) \ No newline at end of file diff --git a/cv/detection/ssd/tensorflow/net/ssd_net.py b/cv/detection/ssd/tensorflow/net/ssd_net.py index c584a7f283597b0490c9f6b9fd519e5b7afbcbe3..52d7304a033bd9d4e48c996947f95e3778ace042 100644 --- a/cv/detection/ssd/tensorflow/net/ssd_net.py +++ b/cv/detection/ssd/tensorflow/net/ssd_net.py @@ -56,7 +56,7 @@ _USE_FUSED_BN = True # vgg_16/conv3/conv3_3/biases # vgg_16/conv1/conv1_2/weights -class ReLuLayer(tf.layers.Layer): +class ReLuLayer(tf.keras.layers.Layer): def __init__(self, name, **kwargs): super(ReLuLayer, self).__init__(name=name, trainable=trainable, **kwargs) self._name = name @@ -72,9 +72,9 @@ class ReLuLayer(tf.layers.Layer): def forward_module(m, inputs, training=False): - if isinstance(m, tf.layers.BatchNormalization) or isinstance(m, tf.layers.Dropout): - return m.apply(inputs, training=training) - return m.apply(inputs) + if isinstance(m, tf.keras.layers.BatchNormalization) or isinstance(m, tf.keras.layers.Dropout): + return m(inputs, training=training) + return m(inputs) def get_backbone(backbone, training, **kwargs): @@ -124,18 +124,18 @@ def ssd_conv_block( with tf.variable_scope(name): conv_blocks = [] conv_blocks.append( - tf.layers.Conv2D(filters=filters, kernel_size=1, strides=1, padding=padding, + tf.keras.layers.Conv2D(filters=filters, kernel_size=1, strides=1, padding=padding, data_format=data_format, activation=tf.nn.relu, use_bias=True, kernel_initializer=kernel_initializer, bias_initializer=tf.zeros_initializer(), - name='{}_1'.format(name), _scope='{}_1'.format(name), _reuse=None) + name='{}_1'.format(name)) ) conv_blocks.append( - tf.layers.Conv2D(filters=filters * 2, kernel_size=3, strides=strides, padding=padding, + tf.keras.layers.Conv2D(filters=filters * 2, kernel_size=3, strides=strides, padding=padding, data_format=data_format, activation=tf.nn.relu, use_bias=True, kernel_initializer=kernel_initializer, bias_initializer=tf.zeros_initializer(), - name='{}_2'.format(name), _scope='{}_2'.format(name), _reuse=None) + name='{}_2'.format(name)) ) return conv_blocks @@ -203,28 +203,28 @@ class VGG16Backbone(object): # VGG layers self._conv1_block = self.conv_block(2, 64, 3, (1, 1), 'conv1') # down_1 - self._pool1 = tf.layers.MaxPooling2D(2, 2, padding='same', data_format=self._data_format, name='pool1') + self._pool1 = tf.keras.layers.MaxPooling2D(2, 2, padding='same', data_format=self._data_format, name='pool1') self._conv2_block = self.conv_block(2, 128, 3, (1, 1), 'conv2') # down_2 - self._pool2 = tf.layers.MaxPooling2D(2, 2, padding='same', data_format=self._data_format, name='pool2') + self._pool2 = tf.keras.layers.MaxPooling2D(2, 2, padding='same', data_format=self._data_format, name='pool2') self._conv3_block = self.conv_block(3, 256, 3, (1, 1), 'conv3') # down_3 - self._pool3 = tf.layers.MaxPooling2D(2, 2, padding='same', data_format=self._data_format, name='pool3') + self._pool3 = tf.keras.layers.MaxPooling2D(2, 2, padding='same', data_format=self._data_format, name='pool3') self._conv4_block = self.conv_block(3, 512, 3, (1, 1), 'conv4') # down_4 - self._pool4 = tf.layers.MaxPooling2D(2, 2, padding='same', data_format=self._data_format, name='pool4') + self._pool4 = tf.keras.layers.MaxPooling2D(2, 2, padding='same', data_format=self._data_format, name='pool4') self._conv5_block = self.conv_block(3, 512, 3, (1, 1), 'conv5') - self._pool5 = tf.layers.MaxPooling2D(3, 1, padding='same', data_format=self._data_format, name='pool5') - self._conv6 = tf.layers.Conv2D(filters=1024, kernel_size=3, strides=1, padding='same', dilation_rate=6, + self._pool5 = tf.keras.layers.MaxPooling2D(3, 1, padding='same', data_format=self._data_format, name='pool5') + self._conv6 = tf.keras.layers.Conv2D(filters=1024, kernel_size=3, strides=1, padding='same', dilation_rate=6, data_format=self._data_format, activation=tf.nn.relu, use_bias=True, kernel_initializer=self._conv_initializer(), bias_initializer=tf.zeros_initializer(), - name='fc6', _scope='fc6', _reuse=None) - self._conv7 = tf.layers.Conv2D(filters=1024, kernel_size=1, strides=1, padding='same', + name='fc6') + self._conv7 = tf.keras.layers.Conv2D(filters=1024, kernel_size=1, strides=1, padding='same', data_format=self._data_format, activation=tf.nn.relu, use_bias=True, kernel_initializer=self._conv_initializer(), bias_initializer=tf.zeros_initializer(), - name='fc7', _scope='fc7', _reuse=None) + name='fc7') def l2_normalize(self, x, name): with tf.name_scope(name, "l2_normalize", [x]) as name: @@ -239,13 +239,13 @@ class VGG16Backbone(object): # forward vgg layers for conv in self._conv1_block: inputs = forward_module(conv, inputs, training=training) - inputs = self._pool1.apply(inputs) + inputs = self._pool1(inputs) for conv in self._conv2_block: inputs = forward_module(conv, inputs, training=training) - inputs = self._pool2.apply(inputs) + inputs = self._pool2(inputs) for conv in self._conv3_block: inputs = forward_module(conv, inputs, training=training) - inputs = self._pool3.apply(inputs) + inputs = self._pool3(inputs) for conv in self._conv4_block: inputs = forward_module(conv, inputs, training=training) # conv4_3 @@ -258,13 +258,13 @@ class VGG16Backbone(object): feature_layers.append(tf.multiply(weight_scale, self.l2_normalize(inputs, name='norm'), name='rescale') ) - inputs = self._pool4.apply(inputs) + inputs = self._pool4(inputs) for conv in self._conv5_block: inputs = forward_module(conv, inputs, training=training) - inputs = self._pool5.apply(inputs) + inputs = self._pool5(inputs) # forward fc layers - inputs = self._conv6.apply(inputs) - inputs = self._conv7.apply(inputs) + inputs = self._conv6(inputs) + inputs = self._conv7(inputs) # fc7 feature_layers.append(inputs) @@ -275,11 +275,11 @@ class VGG16Backbone(object): conv_blocks = [] for ind in range(1, num_blocks + 1): conv_blocks.append( - tf.layers.Conv2D(filters=filters, kernel_size=kernel_size, strides=strides, padding='same', + tf.keras.layers.Conv2D(filters=filters, kernel_size=kernel_size, strides=strides, padding='same', data_format=self._data_format, activation=tf.nn.relu, use_bias=True, kernel_initializer=self._conv_initializer(), bias_initializer=tf.zeros_initializer(), - name='{}_{}'.format(name, ind), _scope='{}_{}'.format(name, ind), _reuse=None) + name='{}_{}'.format(name, ind)) ) return conv_blocks @@ -287,32 +287,32 @@ class VGG16Backbone(object): with tf.variable_scope(name): conv_bn_blocks = [] conv_bn_blocks.append( - tf.layers.Conv2D(filters=filters, kernel_size=1, strides=1, padding='same', + tf.keras.layers.Conv2D(filters=filters, kernel_size=1, strides=1, padding='same', data_format=self._data_format, activation=None, use_bias=False, kernel_initializer=self._conv_bn_initializer(), bias_initializer=None, - name='{}_1'.format(name), _scope='{}_1'.format(name), _reuse=None) + name='{}_1'.format(name)) ) conv_bn_blocks.append( - tf.layers.BatchNormalization(axis=self._bn_axis, momentum=BN_MOMENTUM, epsilon=BN_EPSILON, fused=USE_FUSED_BN, - name='{}_bn1'.format(name), _scope='{}_bn1'.format(name), _reuse=None) + tf.keras.layers.BatchNormalization(axis=self._bn_axis, momentum=BN_MOMENTUM, epsilon=BN_EPSILON, fused=USE_FUSED_BN, + name='{}_bn1'.format(name)) ) conv_bn_blocks.append( - ReLuLayer('{}_relu1'.format(name), _scope='{}_relu1'.format(name), _reuse=None) + ReLuLayer('{}_relu1'.format(name)) ) conv_bn_blocks.append( - tf.layers.Conv2D(filters=filters * 2, kernel_size=3, strides=strides, padding='same', + tf.keras.layers.Conv2D(filters=filters * 2, kernel_size=3, strides=strides, padding='same', data_format=self._data_format, activation=None, use_bias=False, kernel_initializer=self._conv_bn_initializer(), bias_initializer=None, - name='{}_2'.format(name), _scope='{}_2'.format(name), _reuse=None) + name='{}_2'.format(name)) ) conv_bn_blocks.append( - tf.layers.BatchNormalization(axis=self._bn_axis, momentum=BN_MOMENTUM, epsilon=BN_EPSILON, fused=USE_FUSED_BN, - name='{}_bn2'.format(name), _scope='{}_bn2'.format(name), _reuse=None) + tf.keras.layers.BatchNormalization(axis=self._bn_axis, momentum=BN_MOMENTUM, epsilon=BN_EPSILON, fused=USE_FUSED_BN, + name='{}_bn2'.format(name)) ) conv_bn_blocks.append( - ReLuLayer('{}_relu2'.format(name), _scope='{}_relu2'.format(name), _reuse=None) + ReLuLayer('{}_relu2'.format(name)) ) return conv_bn_blocks @@ -322,15 +322,15 @@ def multibox_head(feature_layers, num_classes, num_anchors_depth_per_layer, data cls_preds = [] loc_preds = [] for ind, feat in enumerate(feature_layers): - loc_preds.append(tf.layers.conv2d(feat, num_anchors_depth_per_layer[ind] * 4, (3, 3), use_bias=True, + loc_preds.append(tf.keras.layers.Conv2D(num_anchors_depth_per_layer[ind] * 4, (3, 3), use_bias=True, name='loc_{}'.format(ind), strides=(1, 1), padding='same', data_format=data_format, activation=None, kernel_initializer=tf.glorot_uniform_initializer(), - bias_initializer=tf.zeros_initializer())) - cls_preds.append(tf.layers.conv2d(feat, num_anchors_depth_per_layer[ind] * num_classes, (3, 3), use_bias=True, + bias_initializer=tf.zeros_initializer())(feat)) + cls_preds.append(tf.keras.layers.Conv2D(num_anchors_depth_per_layer[ind] * num_classes, (3, 3), use_bias=True, name='cls_{}'.format(ind), strides=(1, 1), padding='same', data_format=data_format, activation=None, kernel_initializer=tf.glorot_uniform_initializer(), - bias_initializer=tf.zeros_initializer())) + bias_initializer=tf.zeros_initializer())(feat)) return loc_preds, cls_preds diff --git a/cv/detection/ssd/tensorflow/preprocessing/ssd_preprocessing.py b/cv/detection/ssd/tensorflow/preprocessing/ssd_preprocessing.py index 739305df2d18d7eca660a01b4e7cddcab0679fc2..fb6da46a0d314e2f12f95dc3c20f1b92f8da84da 100644 --- a/cv/detection/ssd/tensorflow/preprocessing/ssd_preprocessing.py +++ b/cv/detection/ssd/tensorflow/preprocessing/ssd_preprocessing.py @@ -34,7 +34,7 @@ from __future__ import print_function import tensorflow.compat.v1 as tf from tensorflow.python.ops import control_flow_ops -import tf_slim as slim +# import tf_slim as slim _R_MEAN = 123.68 diff --git a/cv/detection/ssd/tensorflow/train_ssd.py b/cv/detection/ssd/tensorflow/train_ssd.py index 736c084e1585d0ec2168067f349a0b5891b91de9..0a7e3fce0a395914c375af84d5ab9fcd0407574c 100644 --- a/cv/detection/ssd/tensorflow/train_ssd.py +++ b/cv/detection/ssd/tensorflow/train_ssd.py @@ -1,17 +1,3 @@ -# Copyright 2018 Changan Wang - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================= from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -130,33 +116,6 @@ tf.app.flags.DEFINE_string( 'The backbone for feature extraction: vgg16/resnet18/resnet34/resnet50/resnet101.') FLAGS = tf.app.flags.FLAGS -#CUDA_VISIBLE_DEVICES -def validate_batch_size_for_multi_gpu(batch_size): - """For multi-gpu, batch-size must be a multiple of the number of - available GPUs. - - Note that this should eventually be handled by replicate_model_fn - directly. Multi-GPU support is currently experimental, however, - so doing the work here until that feature is in place. - """ - if FLAGS.multi_gpu: - from tensorflow.python.client import device_lib - - local_device_protos = device_lib.list_local_devices() - num_gpus = sum([1 for d in local_device_protos if d.device_type == 'GPU']) - if not num_gpus: - raise ValueError('Multi-GPU mode was specified, but no GPUs ' - 'were found. To use CPU, run --multi_gpu=False.') - - remainder = batch_size % num_gpus - if remainder: - err = ('When running with multiple GPUs, batch size ' - 'must be a multiple of the number of available GPUs. ' - 'Found {} GPUs with a batch size of {}; try --batch_size={} instead.' - ).format(num_gpus, batch_size, batch_size - remainder) - raise ValueError(err) - return num_gpus - return 0 def get_init_fn(): return scaffolds.get_init_fn_for_scaffold(FLAGS.model_dir, FLAGS.checkpoint_path, @@ -164,59 +123,50 @@ def get_init_fn(): FLAGS.checkpoint_exclude_scopes, FLAGS.ignore_missing_vars, name_remap={'/kernel': '/weights', '/bias': '/biases'}) -# couldn't find better way to pass params from input_fn to model_fn -# some tensors used by model_fn must be created in input_fn to ensure they are in the same graph -# but when we put these tensors to labels's dict, the replicate_model_fn will split them into each GPU -# the problem is that they shouldn't be splited global_anchor_info = dict() def input_pipeline(dataset_pattern='train-*', is_training=True, batch_size=FLAGS.batch_size): - def input_fn(): - out_shape = [FLAGS.train_image_size] * 2 - anchor_creator = anchor_manipulator.AnchorCreator(out_shape, - layers_shapes = [(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)], - anchor_scales = [(0.1,), (0.2,), (0.375,), (0.55,), (0.725,), (0.9,)], - extra_anchor_scales = [(0.1414,), (0.2739,), (0.4541,), (0.6315,), (0.8078,), (0.9836,)], - anchor_ratios = [(1., 2., .5), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., .5), (1., 2., .5)], - layer_steps = [8, 16, 32, 64, 100, 300]) - all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors() - - num_anchors_per_layer = [] - for ind in range(len(all_anchors)): - num_anchors_per_layer.append(all_num_anchors_depth[ind] * all_num_anchors_spatial[ind]) - - anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(allowed_borders = [1.0] * 6, - positive_threshold = FLAGS.match_threshold, - ignore_threshold = FLAGS.neg_threshold, - prior_scaling=[0.1, 0.1, 0.2, 0.2]) - - image_preprocessing_fn = lambda image_, labels_, bboxes_ : ssd_preprocessing.preprocess_image(image_, labels_, bboxes_, out_shape, is_training=is_training, data_format=FLAGS.data_format, output_rgb=False) - anchor_encoder_fn = lambda glabels_, gbboxes_: anchor_encoder_decoder.encode_all_anchors(glabels_, gbboxes_, all_anchors, all_num_anchors_depth, all_num_anchors_spatial) - - image, _, shape, loc_targets, cls_targets, match_scores = dataset_common.slim_get_batch(FLAGS.num_classes, - batch_size, - ('train' if is_training else 'val'), - os.path.join(FLAGS.data_dir, dataset_pattern), - FLAGS.num_readers, - FLAGS.num_preprocessing_threads, - image_preprocessing_fn, - anchor_encoder_fn, - num_epochs=FLAGS.train_epochs, - is_training=is_training) - global global_anchor_info - global_anchor_info = {'decode_fn': lambda pred : anchor_encoder_decoder.decode_all_anchors(pred, num_anchors_per_layer), - 'num_anchors_per_layer': num_anchors_per_layer, - 'all_num_anchors_depth': all_num_anchors_depth } - - return image, {'shape': shape, 'loc_targets': loc_targets, 'cls_targets': cls_targets, 'match_scores': match_scores} - return input_fn + out_shape = [FLAGS.train_image_size] * 2 + anchor_creator = anchor_manipulator.AnchorCreator(out_shape, + layers_shapes = [(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)], + anchor_scales = [(0.1,), (0.2,), (0.375,), (0.55,), (0.725,), (0.9,)], + extra_anchor_scales = [(0.1414,), (0.2739,), (0.4541,), (0.6315,), (0.8078,), (0.9836,)], + anchor_ratios = [(1., 2., .5), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., .5), (1., 2., .5)], + layer_steps = [8, 16, 32, 64, 100, 300]) + all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors() + + num_anchors_per_layer = [] + for ind in range(len(all_anchors)): + num_anchors_per_layer.append(all_num_anchors_depth[ind] * all_num_anchors_spatial[ind]) + + prior_scaling = [0.1, 0.1, 0.2, 0.2] + anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(allowed_borders = [1.0] * 6, + positive_threshold = FLAGS.match_threshold, + ignore_threshold = FLAGS.neg_threshold, + prior_scaling=prior_scaling) + + image_preprocessing_fn = lambda image_, labels_, bboxes_ : ssd_preprocessing.preprocess_image(image_, labels_, bboxes_, out_shape, is_training=is_training, data_format=FLAGS.data_format, output_rgb=False) + anchor_encoder_fn = lambda glabels_, gbboxes_: anchor_encoder_decoder.encode_all_anchors(glabels_, gbboxes_, all_anchors, all_num_anchors_depth, all_num_anchors_spatial) + + image, _, shape, loc_targets, cls_targets, match_scores = dataset_common.tf1_get_batch(FLAGS.num_classes, + batch_size, + ('train' if is_training else 'val'), + os.path.join(FLAGS.data_dir, dataset_pattern), + FLAGS.num_readers, + FLAGS.num_preprocessing_threads, + image_preprocessing_fn, + anchor_encoder_fn, + num_epochs=FLAGS.train_epochs, + is_training=is_training) + global global_anchor_info + global_anchor_info = {'decode_fn': lambda pred : anchor_encoder_decoder.ext_decode_all_anchors(pred, all_anchors, all_num_anchors_depth, all_num_anchors_spatial), + 'num_anchors_per_layer': num_anchors_per_layer, + 'all_num_anchors_depth': all_num_anchors_depth } + + return image, {'shape': shape, 'loc_targets': loc_targets, 'cls_targets': cls_targets, 'match_scores': match_scores} + def modified_smooth_l1(bbox_pred, bbox_targets, bbox_inside_weights=1., bbox_outside_weights=1., sigma=1.): - """ - ResultLoss = outside_weights * SmoothL1(inside_weights * (bbox_pred - bbox_targets)) - SmoothL1(x) = 0.5 * (sigma * x)^2, if |x| < 1 / sigma^2 - |x| - 0.5 / sigma^2, otherwise - """ with tf.name_scope('smooth_l1', [bbox_pred, bbox_targets]): sigma2 = sigma * sigma @@ -231,28 +181,25 @@ def modified_smooth_l1(bbox_pred, bbox_targets, bbox_inside_weights=1., bbox_out outside_mul = tf.multiply(bbox_outside_weights, smooth_l1_result) return outside_mul - - -def ssd_model_fn(features, labels, mode, params): - """model_fn for SSD to be used with our Estimator.""" - shape = labels['shape'] + +def build_model_graph(features, labels, params, is_training): + """ + 构建完整的计算图,返回 loss, train_op, predictions, metrics 等 + """ loc_targets = labels['loc_targets'] cls_targets = labels['cls_targets'] - match_scores = labels['match_scores'] global global_anchor_info decode_fn = global_anchor_info['decode_fn'] num_anchors_per_layer = global_anchor_info['num_anchors_per_layer'] all_num_anchors_depth = global_anchor_info['all_num_anchors_depth'] - #print(all_num_anchors_depth) with tf.variable_scope(params['model_scope'], default_name=None, values=[features], reuse=tf.AUTO_REUSE): ssd_backbone = ssd_net.SSDBackbone( FLAGS.backbone, - training=(mode == tf.estimator.ModeKeys.TRAIN), + training=is_training, data_format=params['data_format']) feature_layers = ssd_backbone.forward(features) - #print(feature_layers) location_pred, cls_pred = ssd_net.multibox_head(feature_layers, params['num_classes'], all_num_anchors_depth, data_format=params['data_format']) if params['data_format'] == 'channels_first': @@ -271,125 +218,130 @@ def ssd_model_fn(features, labels, mode, params): with tf.device('/cpu:0'): with tf.control_dependencies([cls_pred, location_pred]): with tf.name_scope('post_forward'): - #bboxes_pred = decode_fn(location_pred) - bboxes_pred = tf.map_fn(lambda _preds : decode_fn(_preds), - tf.reshape(location_pred, [tf.shape(features)[0], -1, 4]), - dtype=[tf.float32] * len(num_anchors_per_layer), back_prop=False) - #cls_targets = tf.Print(cls_targets, [tf.shape(bboxes_pred[0]),tf.shape(bboxes_pred[1]),tf.shape(bboxes_pred[2]),tf.shape(bboxes_pred[3])]) + bboxes_pred = tf.map_fn( + lambda _preds: decode_fn(_preds), + tf.reshape(location_pred, [tf.shape(features)[0], -1, 4]), + dtype=[tf.float32] * len(num_anchors_per_layer), + back_prop=False + ) + bboxes_pred = [tf.reshape(preds, [-1, 4]) for preds in bboxes_pred] bboxes_pred = tf.concat(bboxes_pred, axis=0) flaten_cls_targets = tf.reshape(cls_targets, [-1]) - flaten_match_scores = tf.reshape(match_scores, [-1]) flaten_loc_targets = tf.reshape(loc_targets, [-1, 4]) - # each positive examples has one label positive_mask = flaten_cls_targets > 0 - n_positives = tf.count_nonzero(positive_mask) - batch_n_positives = tf.count_nonzero(cls_targets, -1) - - batch_negtive_mask = tf.equal(cls_targets, 0)#tf.logical_and(tf.equal(cls_targets, 0), match_scores > 0.) + batch_negtive_mask = tf.equal(cls_targets, 0) batch_n_negtives = tf.count_nonzero(batch_negtive_mask, -1) batch_n_neg_select = tf.cast(params['negative_ratio'] * tf.cast(batch_n_positives, tf.float32), tf.int32) batch_n_neg_select = tf.minimum(batch_n_neg_select, tf.cast(batch_n_negtives, tf.int32)) - # hard negative mining for classification predictions_for_bg = tf.nn.softmax(tf.reshape(cls_pred, [tf.shape(features)[0], -1, params['num_classes']]))[:, :, 0] - prob_for_negtives = tf.where(batch_negtive_mask, - 0. - predictions_for_bg, - # ignore all the positives - 0. - tf.ones_like(predictions_for_bg)) + prob_for_negtives = tf.where( + batch_negtive_mask, + 0. - predictions_for_bg, + 0. - tf.ones_like(predictions_for_bg) + ) topk_prob_for_bg, _ = tf.nn.top_k(prob_for_negtives, k=tf.shape(prob_for_negtives)[1]) score_at_k = tf.gather_nd(topk_prob_for_bg, tf.stack([tf.range(tf.shape(features)[0]), batch_n_neg_select - 1], axis=-1)) - selected_neg_mask = prob_for_negtives >= tf.expand_dims(score_at_k, axis=-1) - # include both selected negtive and all positive examples - final_mask = tf.stop_gradient(tf.logical_or(tf.reshape(tf.logical_and(batch_negtive_mask, selected_neg_mask), [-1]), positive_mask)) - total_examples = tf.count_nonzero(final_mask) + final_mask = tf.stop_gradient( + tf.logical_or( + tf.reshape(tf.logical_and(batch_negtive_mask, selected_neg_mask), [-1]), + positive_mask + ) + ) - cls_pred = tf.boolean_mask(cls_pred, final_mask) - location_pred = tf.boolean_mask(location_pred, tf.stop_gradient(positive_mask)) - flaten_cls_targets = tf.boolean_mask(tf.clip_by_value(flaten_cls_targets, 0, params['num_classes']), final_mask) - flaten_loc_targets = tf.stop_gradient(tf.boolean_mask(flaten_loc_targets, positive_mask)) + cls_pred_masked = tf.boolean_mask(cls_pred, final_mask) + location_pred_masked = tf.boolean_mask(location_pred, tf.stop_gradient(positive_mask)) + flaten_cls_targets_masked = tf.boolean_mask( + tf.clip_by_value(flaten_cls_targets, 0, params['num_classes']), final_mask) + flaten_loc_targets_masked = tf.stop_gradient( + tf.boolean_mask(flaten_loc_targets, positive_mask)) predictions = { - 'classes': tf.argmax(cls_pred, axis=-1), - 'probabilities': tf.reduce_max(tf.nn.softmax(cls_pred, name='softmax_tensor'), axis=-1), - 'loc_predict': bboxes_pred } - - cls_accuracy = tf.metrics.accuracy(flaten_cls_targets, predictions['classes']) - metrics = {'cls_accuracy': cls_accuracy} - - # Create a tensor named train_accuracy for logging purposes. - tf.identity(cls_accuracy[1], name='cls_accuracy') - tf.summary.scalar('cls_accuracy', cls_accuracy[1]) + 'classes': tf.argmax(cls_pred_masked, axis=-1), + 'probabilities': tf.reduce_max(tf.nn.softmax(cls_pred_masked, name='softmax_tensor'), axis=-1), + # 'loc_predict': bboxes_pred + } - if mode == tf.estimator.ModeKeys.PREDICT: - return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) + cls_accuracy = tf.metrics.accuracy(flaten_cls_targets_masked, predictions['classes']) - cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=flaten_cls_targets, logits=cls_pred) * (params['negative_ratio'] + 1.) - # Create a tensor named cross_entropy for logging purposes. + # 损失计算 + cross_entropy = tf.losses.sparse_softmax_cross_entropy( + labels=flaten_cls_targets_masked, logits=cls_pred_masked) * (params['negative_ratio'] + 1.) tf.identity(cross_entropy, name='cross_entropy_loss') - tf.summary.scalar('cross_entropy_loss', cross_entropy) - #loc_loss = tf.cond(n_positives > 0, lambda: modified_smooth_l1(location_pred, tf.stop_gradient(flaten_loc_targets), sigma=1.), lambda: tf.zeros_like(location_pred)) - loc_loss = modified_smooth_l1(location_pred, flaten_loc_targets, sigma=1.) - #loc_loss = modified_smooth_l1(location_pred, tf.stop_gradient(gtargets)) + loc_loss = modified_smooth_l1(location_pred_masked, flaten_loc_targets_masked, sigma=1.) loc_loss = tf.reduce_mean(tf.reduce_sum(loc_loss, axis=-1), name='location_loss') - tf.summary.scalar('location_loss', loc_loss) tf.losses.add_loss(loc_loss) + # L2 正则化 l2_loss_vars = [] - for trainable_var in tf.trainable_variables(): - if '_bn' not in trainable_var.name: - if 'conv4_3_scale' not in trainable_var.name: - l2_loss_vars.append(tf.nn.l2_loss(trainable_var)) + for var in tf.trainable_variables(): + if '_bn' not in var.name: + if 'conv4_3_scale' not in var.name: + l2_loss_vars.append(tf.nn.l2_loss(var)) else: - l2_loss_vars.append(tf.nn.l2_loss(trainable_var) * 0.1) - # Add weight decay to the loss. We exclude the batch norm variables because - # doing so leads to a small improvement in accuracy. - total_loss = tf.add(cross_entropy + loc_loss, tf.multiply(params['weight_decay'], tf.add_n(l2_loss_vars), name='l2_loss'), name='total_loss') - - if mode == tf.estimator.ModeKeys.TRAIN: - global_step = tf.train.get_or_create_global_step() - - lr_values = [params['learning_rate'] * decay for decay in params['lr_decay_factors']] - learning_rate = tf.train.piecewise_constant(tf.cast(global_step, tf.int32), - [int(_) for _ in params['decay_boundaries']], - lr_values) - truncated_learning_rate = tf.maximum(learning_rate, tf.constant(params['end_learning_rate'], dtype=learning_rate.dtype), name='learning_rate') - # Create a tensor named learning_rate for logging purposes. - tf.summary.scalar('learning_rate', truncated_learning_rate) - - optimizer = tf.train.MomentumOptimizer(learning_rate=truncated_learning_rate, - momentum=params['momentum']) - if params['use_amp']: - optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite(optimizer) - # optimizer = tf.contrib.estimator.TowerOptimizer(optimizer) - - # Batch norm requires update_ops to be added as a train_op dependency. - update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) - with tf.control_dependencies(update_ops): - train_op = optimizer.minimize(total_loss, global_step) - else: - train_op = None - - return tf.estimator.EstimatorSpec( - mode=mode, - predictions=predictions, - loss=total_loss, - train_op=train_op, - eval_metric_ops=metrics, - scaffold=tf.train.Scaffold(init_fn=get_init_fn())) + l2_loss_vars.append(tf.nn.l2_loss(var) * 0.1) + l2_loss = tf.multiply(params['weight_decay'], tf.add_n(l2_loss_vars), name='l2_loss') + total_loss = tf.add(cross_entropy + loc_loss, l2_loss, name='total_loss') + + # 优化器 + global_step = tf.train.get_or_create_global_step() + lr_values = [params['learning_rate'] * decay for decay in params['lr_decay_factors']] + learning_rate = tf.train.piecewise_constant( + tf.cast(global_step, tf.int32), + [int(_) for _ in params['decay_boundaries']], + lr_values + ) + truncated_learning_rate = tf.maximum(learning_rate, params['end_learning_rate'], name='learning_rate') + + optimizer = tf.train.MomentumOptimizer(learning_rate=truncated_learning_rate, momentum=params['momentum']) + if params['use_amp']: + optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite(optimizer) + + update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) + with tf.control_dependencies(update_ops): + train_op = optimizer.minimize(total_loss, global_step=global_step) + + return total_loss, train_op, predictions, cls_accuracy, truncated_learning_rate + +def validate_batch_size_for_multi_gpu(batch_size): + """For multi-gpu, batch-size must be a multiple of the number of + available GPUs. + + Note that this should eventually be handled by replicate_model_fn + directly. Multi-GPU support is currently experimental, however, + so doing the work here until that feature is in place. + """ + if FLAGS.multi_gpu: + from tensorflow.python.client import device_lib + + local_device_protos = device_lib.list_local_devices() + num_gpus = sum([1 for d in local_device_protos if d.device_type == 'GPU']) + if not num_gpus: + raise ValueError('Multi-GPU mode was specified, but no GPUs ' + 'were found. To use CPU, run --multi_gpu=False.') + + remainder = batch_size % num_gpus + if remainder: + err = ('When running with multiple GPUs, batch size ' + 'must be a multiple of the number of available GPUs. ' + 'Found {} GPUs with a batch size of {}; try --batch_size={} instead.' + ).format(num_gpus, batch_size, batch_size - remainder) + raise ValueError(err) + return num_gpus + return 0 def parse_comma_list(args): return [float(s.strip()) for s in args.split(',')] def main(_): - # Using the Winograd non-fused algorithms provides a small performance boost. os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' try: @@ -398,58 +350,74 @@ def main(_): except: pass - gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) - config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False, intra_op_parallelism_threads=FLAGS.num_cpu_threads, inter_op_parallelism_threads=FLAGS.num_cpu_threads, gpu_options=gpu_options) - + # 参数 num_gpus = validate_batch_size_for_multi_gpu(FLAGS.batch_size) - - # Set up a RunConfig to only save checkpoints once per training cycle. - run_config = tf.estimator.RunConfig().replace( - save_checkpoints_secs=FLAGS.save_checkpoints_secs).replace( - save_checkpoints_steps=None).replace( - save_summary_steps=FLAGS.save_summary_steps).replace( - keep_checkpoint_max=5).replace( - tf_random_seed=FLAGS.tf_random_seed).replace( - log_step_count_steps=FLAGS.log_every_n_steps).replace( - session_config=config) - - # replicate_ssd_model_fn = tf.contrib.estimator.replicate_model_fn(ssd_model_fn, loss_reduction=tf.losses.Reduction.MEAN) - replicate_ssd_model_fn =ssd_model_fn - ssd_detector = tf.estimator.Estimator( - model_fn=replicate_ssd_model_fn, model_dir=FLAGS.model_dir, config=run_config, - params={ - 'num_gpus': num_gpus, - 'data_format': FLAGS.data_format, - 'batch_size': FLAGS.batch_size, - 'model_scope': FLAGS.model_scope, - 'num_classes': FLAGS.num_classes, - 'negative_ratio': FLAGS.negative_ratio, - 'match_threshold': FLAGS.match_threshold, - 'neg_threshold': FLAGS.neg_threshold, - 'weight_decay': FLAGS.weight_decay, - 'momentum': FLAGS.momentum, - 'learning_rate': FLAGS.learning_rate, - 'end_learning_rate': FLAGS.end_learning_rate, - 'decay_boundaries': parse_comma_list(FLAGS.decay_boundaries), - 'lr_decay_factors': parse_comma_list(FLAGS.lr_decay_factors), - 'use_amp':FLAGS.use_amp, - }) - tensors_to_log = { - 'lr': 'learning_rate', - 'ce': 'cross_entropy_loss', - 'loc': 'location_loss', - 'loss': 'total_loss', - 'l2': 'l2_loss', - 'acc': 'post_forward/cls_accuracy', + params = { + 'num_gpus': num_gpus, + 'data_format': FLAGS.data_format, + 'batch_size': FLAGS.batch_size, + 'model_scope': FLAGS.model_scope, + 'num_classes': FLAGS.num_classes, + 'negative_ratio': FLAGS.negative_ratio, + 'match_threshold': FLAGS.match_threshold, + 'neg_threshold': FLAGS.neg_threshold, + 'weight_decay': FLAGS.weight_decay, + 'momentum': FLAGS.momentum, + 'learning_rate': FLAGS.learning_rate, + 'end_learning_rate': FLAGS.end_learning_rate, + 'decay_boundaries': parse_comma_list(FLAGS.decay_boundaries), + 'lr_decay_factors': parse_comma_list(FLAGS.lr_decay_factors), + 'use_amp': FLAGS.use_amp, } - logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=FLAGS.log_every_n_steps, - formatter=lambda dicts: (', '.join(['%s=%.6f' % (k, v) for k, v in dicts.items()]))) - #hook = tf.train.ProfilerHook(save_steps=50, output_dir='.', show_memory=True) - print('Starting a training cycle.') - ssd_detector.train(input_fn=input_pipeline(dataset_pattern='train-*', is_training=True, batch_size=FLAGS.batch_size), - hooks=[logging_hook], max_steps=FLAGS.max_number_of_steps) + # 构建图-数据 + features, labels = input_pipeline( + dataset_pattern='train-*', + is_training=True, + batch_size=FLAGS.batch_size + ) + + # 构建图-模型及模型后处理 + total_loss, train_op, predictions, cls_metrics, lr = build_model_graph(features, labels, params, is_training=True) + + # Session 配置 + gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) + config = tf.ConfigProto( + allow_soft_placement=True, + log_device_placement=False, + intra_op_parallelism_threads=FLAGS.num_cpu_threads, + inter_op_parallelism_threads=FLAGS.num_cpu_threads, + gpu_options=gpu_options + ) + + scaffold = tf.train.Scaffold(init_fn=get_init_fn()) + + # 训练循环 + with tf.train.MonitoredTrainingSession( + master='', + is_chief=True, + checkpoint_dir=FLAGS.model_dir, + save_checkpoint_secs=None, + scaffold=scaffold, + config=config + ) as sess: + + step_ = 0 + print('Starting a training cycle.') + + while step_ < FLAGS.max_number_of_steps: + try: + _, total_loss_, lr_, step_, acc_ = sess.run([train_op, total_loss, lr, tf.train.get_global_step(), cls_metrics[1]]) + + if step_ % FLAGS.log_every_n_steps == 0: + tf.logging.info('global_step %d: loss = %.4f, lr = %.6f, acc = %.4f', step_, total_loss_, lr_, acc_) + + except tf.errors.OutOfRangeError: + tf.logging.info('Epoch finished after %d steps.', step_) + break + + print('Training completed.') if __name__ == '__main__': - tf.logging.set_verbosity(tf.logging.INFO) - tf.app.run() + tf.logging.set_verbosity(tf.logging.INFO) + tf.app.run() \ No newline at end of file