diff --git a/PyTorch/contrib/cv/pose_estimation/VoxelPose/run/train_3d.py b/PyTorch/contrib/cv/pose_estimation/VoxelPose/run/train_3d.py index 44ef94dd71d4677b8e69810011f109dd02b66b93..fbfd3a77553c2eb3f98756cfb715ef0d82cdc9a0 100644 --- a/PyTorch/contrib/cv/pose_estimation/VoxelPose/run/train_3d.py +++ b/PyTorch/contrib/cv/pose_estimation/VoxelPose/run/train_3d.py @@ -251,22 +251,23 @@ def main(): # lr_scheduler.step() train_3d(config, model, optimizer, train_loader, epoch, final_output_dir, writer_dict, len(gpus), device=device, is_master_node=args.is_master_node, use_apex=args.apex) - precision = validate_3d(config, model, test_loader, final_output_dir, device=device, is_master_node=args.is_master_node) + if args.distributed: + precision = validate_3d(config, model, test_loader, final_output_dir, device=device, is_master_node=args.is_master_node) - if precision > best_precision: - best_precision = precision - best_model = True - else: - best_model = False - if args.is_master_node: - logger.info('=> saving checkpoint to {} (Best: {})'.format(final_output_dir, best_model)) - model_copy=copy.deepcopy(model).cpu() - save_checkpoint({ - 'epoch': epoch + 1, - 'state_dict': model_copy.module.state_dict(), - 'precision': best_precision, - 'optimizer': optimizer.state_dict(), - }, best_model, final_output_dir) + if precision > best_precision: + best_precision = precision + best_model = True + else: + best_model = False + if args.is_master_node: + logger.info('=> saving checkpoint to {} (Best: {})'.format(final_output_dir, best_model)) + model_copy=copy.deepcopy(model).cpu() + save_checkpoint({ + 'epoch': epoch + 1, + 'state_dict': model_copy.module.state_dict(), + 'precision': best_precision, + 'optimizer': optimizer.state_dict(), + }, best_model, final_output_dir) final_model_state_file = os.path.join(final_output_dir, 'final_state.pth.tar')