diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..378a9c45781c71a3f3c763bd9f733122d56980e4
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,24 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# Shared objects
+*.so
+
+# Distribution / packaging
+build/
+*.egg-info/
+*.egg
+
+# Temporary files
+*.swn
+*.swo
+*.swp
+
+# Dataset symlinks
+detectron/datasets/data/*
+!detectron/datasets/data/README.md
+
+# Generated C files
+detectron/utils/cython_*.c
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000000000000000000000000000000000000..105ce2da2d6447d11dfe32bfb846c3d5b199fc99
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000000000000000000000000000000000000..ff66225c8bd42e019f730040948f53d2bc7752d0
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.1" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000000000000000000000000000000000000..e918a3d5d7982626551173c118c3a63245112ec2
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/retina-net-r-main.iml" filepath="$PROJECT_DIR$/.idea/retina-net-r-main.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/retina-net-r-main.iml b/.idea/retina-net-r-main.iml
new file mode 100644
index 0000000000000000000000000000000000000000..6711606311e2664bd835f92b5c114681d2e284f5
--- /dev/null
+++ b/.idea/retina-net-r-main.iml
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="TestRunnerService">
+    <option name="PROJECT_TEST_RUNNER" value="Unittests" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
new file mode 100644
index 0000000000000000000000000000000000000000..65f2988990c79dfa5fea98380e970fe66dcfb479
--- /dev/null
+++ b/.idea/workspace.xml
@@ -0,0 +1,68 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ChangeListManager">
+    <list default="true" id="99a38788-9a0c-4bec-b012-6851e7065d0b" name="Default Changelist" comment="" />
+    <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
+    <option name="SHOW_DIALOG" value="false" />
+    <option name="HIGHLIGHT_CONFLICTS" value="true" />
+    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
+    <option name="LAST_RESOLUTION" value="IGNORE" />
+  </component>
+  <component name="ProjectId" id="2B0Rkhn7FEypNnHRh6jgpKkLVNV" />
+  <component name="PropertiesComponent">
+    <property name="last_opened_file_path" value="$PROJECT_DIR$" />
+  </component>
+  <component name="RecentsManager">
+    <key name="MoveFile.RECENT_KEYS">
+      <recent name="C:\Users\李茂源\Desktop\retina-net-r-main\configs" />
+    </key>
+  </component>
+  <component name="RunDashboard">
+    <option name="ruleStates">
+      <list>
+        <RuleState>
+          <option name="name" value="ConfigurationTypeDashboardGroupingRule" />
+        </RuleState>
+        <RuleState>
+          <option name="name" value="StatusDashboardGroupingRule" />
+        </RuleState>
+      </list>
+    </option>
+  </component>
+  <component name="RunManager">
+    <configuration name="test1" type="PythonConfigurationType" factoryName="Python">
+      <module name="retina-net-r-main" />
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+      </envs>
+      <option name="SDK_HOME" value="D:\anaconda\envs\pytorch1\python.exe" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
+      <option name="IS_MODULE_SDK" value="false" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/setup.py" />
+      <option name="PARAMETERS" value="1111" />
+      <option name="SHOW_COMMAND_LINE" value="false" />
+      <option name="EMULATE_TERMINAL" value="false" />
+      <option name="MODULE_MODE" value="false" />
+      <option name="REDIRECT_INPUT" value="false" />
+      <option name="INPUT_FILE" value="" />
+      <method v="2" />
+    </configuration>
+  </component>
+  <component name="SvnConfiguration">
+    <configuration />
+  </component>
+  <component name="TaskManager">
+    <task active="true" id="Default" summary="Default task">
+      <changelist id="99a38788-9a0c-4bec-b012-6851e7065d0b" name="Default Changelist" comment="" />
+      <created>1656041160941</created>
+      <option name="number" value="Default" />
+      <option name="presentableId" value="Default" />
+      <updated>1656041160941</updated>
+    </task>
+    <servers />
+  </component>
+</project>
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fd06f17afda144a125b14f89b90ac2e512d45e38
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,56 @@
+cmake_minimum_required(VERSION 2.8.12 FATAL_ERROR)
+
+# Find the Caffe2 package.
+# Caffe2 exports the required targets, so find_package should work for
+# the standard Caffe2 installation. If you encounter problems with finding
+# the Caffe2 package, make sure you have run `make install` when installing
+# Caffe2 (`make install` populates your share/cmake/Caffe2).
+find_package(Caffe2 REQUIRED)
+
+if (${CAFFE2_VERSION} VERSION_LESS 0.8.2)
+  # Pre-0.8.2 caffe2 does not have proper interface libraries set up, so we
+  # will rely on the old path.
+  message(WARNING
+      "You are using an older version of Caffe2 (version " ${CAFFE2_VERSION}
+      "). Please consider moving to a newer version.")
+  include(cmake/legacy/legacymake.cmake)
+  return()
+endif()
+
+# Add compiler flags.
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c11")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -O2 -fPIC -Wno-narrowing")
+
+# Print configuration summary.
+include(cmake/Summary.cmake)
+detectron_print_config_summary()
+
+# Collect custom ops sources.
+file(GLOB CUSTOM_OPS_CPU_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/detectron/ops/*.cc)
+file(GLOB CUSTOM_OPS_GPU_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/detectron/ops/*.cu)
+
+# Install custom CPU ops lib.
+add_library(
+    caffe2_detectron_custom_ops SHARED
+    ${CUSTOM_OPS_CPU_SRCS})
+
+target_include_directories(
+    caffe2_detectron_custom_ops PRIVATE
+    ${CAFFE2_INCLUDE_DIRS})
+
+target_link_libraries(caffe2_detectron_custom_ops caffe2_library)
+install(TARGETS caffe2_detectron_custom_ops DESTINATION lib)
+
+# Install custom GPU ops lib, if gpu is present.
+if (CAFFE2_USE_CUDA OR CAFFE2_FOUND_CUDA)
+  # Additional -I prefix is required for CMake versions before commit (< 3.7):
+  # https://github.com/Kitware/CMake/commit/7ded655f7ba82ea72a82d0555449f2df5ef38594
+  list(APPEND CUDA_INCLUDE_DIRS -I${CAFFE2_INCLUDE_DIRS})
+  CUDA_ADD_LIBRARY(
+      caffe2_detectron_custom_ops_gpu SHARED
+      ${CUSTOM_OPS_CPU_SRCS}
+      ${CUSTOM_OPS_GPU_SRCS})
+
+  target_link_libraries(caffe2_detectron_custom_ops_gpu caffe2_gpu_library)
+  install(TARGETS caffe2_detectron_custom_ops_gpu DESTINATION lib)
+endif()
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..cd482d89761c2eb60d1fc8c72c1708b7f47b8c82
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,201 @@
+Apache License
+Version 2.0, January 2004
+http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+"License" shall mean the terms and conditions for use, reproduction,
+and distribution as defined by Sections 1 through 9 of this document.
+
+"Licensor" shall mean the copyright owner or entity authorized by
+the copyright owner that is granting the License.
+
+"Legal Entity" shall mean the union of the acting entity and all
+other entities that control, are controlled by, or are under common
+control with that entity. For the purposes of this definition,
+"control" means (i) the power, direct or indirect, to cause the
+direction or management of such entity, whether by contract or
+otherwise, or (ii) ownership of fifty percent (50%) or more of the
+outstanding shares, or (iii) beneficial ownership of such entity.
+
+"You" (or "Your") shall mean an individual or Legal Entity
+exercising permissions granted by this License.
+
+"Source" form shall mean the preferred form for making modifications,
+including but not limited to software source code, documentation
+source, and configuration files.
+
+"Object" form shall mean any form resulting from mechanical
+transformation or translation of a Source form, including but
+not limited to compiled object code, generated documentation,
+and conversions to other media types.
+
+"Work" shall mean the work of authorship, whether in Source or
+Object form, made available under the License, as indicated by a
+copyright notice that is included in or attached to the work
+(an example is provided in the Appendix below).
+
+"Derivative Works" shall mean any work, whether in Source or Object
+form, that is based on (or derived from) the Work and for which the
+editorial revisions, annotations, elaborations, or other modifications
+represent, as a whole, an original work of authorship. For the purposes
+of this License, Derivative Works shall not include works that remain
+separable from, or merely link (or bind by name) to the interfaces of,
+the Work and Derivative Works thereof.
+
+"Contribution" shall mean any work of authorship, including
+the original version of the Work and any modifications or additions
+to that Work or Derivative Works thereof, that is intentionally
+submitted to Licensor for inclusion in the Work by the copyright owner
+or by an individual or Legal Entity authorized to submit on behalf of
+the copyright owner. For the purposes of this definition, "submitted"
+means any form of electronic, verbal, or written communication sent
+to the Licensor or its representatives, including but not limited to
+communication on electronic mailing lists, source code control systems,
+and issue tracking systems that are managed by, or on behalf of, the
+Licensor for the purpose of discussing and improving the Work, but
+excluding communication that is conspicuously marked or otherwise
+designated in writing by the copyright owner as "Not a Contribution."
+
+"Contributor" shall mean Licensor and any individual or Legal Entity
+on behalf of whom a Contribution has been received by Licensor and
+subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+this License, each Contributor hereby grants to You a perpetual,
+worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+copyright license to reproduce, prepare Derivative Works of,
+publicly display, publicly perform, sublicense, and distribute the
+Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+this License, each Contributor hereby grants to You a perpetual,
+worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+(except as stated in this section) patent license to make, have made,
+use, offer to sell, sell, import, and otherwise transfer the Work,
+where such license applies only to those patent claims licensable
+by such Contributor that are necessarily infringed by their
+Contribution(s) alone or by combination of their Contribution(s)
+with the Work to which such Contribution(s) was submitted. If You
+institute patent litigation against any entity (including a
+cross-claim or counterclaim in a lawsuit) alleging that the Work
+or a Contribution incorporated within the Work constitutes direct
+or contributory patent infringement, then any patent licenses
+granted to You under this License for that Work shall terminate
+as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+Work or Derivative Works thereof in any medium, with or without
+modifications, and in Source or Object form, provided that You
+meet the following conditions:
+
+(a) You must give any other recipients of the Work or
+Derivative Works a copy of this License; and
+
+(b) You must cause any modified files to carry prominent notices
+stating that You changed the files; and
+
+(c) You must retain, in the Source form of any Derivative Works
+that You distribute, all copyright, patent, trademark, and
+attribution notices from the Source form of the Work,
+excluding those notices that do not pertain to any part of
+the Derivative Works; and
+
+(d) If the Work includes a "NOTICE" text file as part of its
+distribution, then any Derivative Works that You distribute must
+include a readable copy of the attribution notices contained
+within such NOTICE file, excluding those notices that do not
+pertain to any part of the Derivative Works, in at least one
+of the following places: within a NOTICE text file distributed
+as part of the Derivative Works; within the Source form or
+documentation, if provided along with the Derivative Works; or,
+within a display generated by the Derivative Works, if and
+wherever such third-party notices normally appear. The contents
+of the NOTICE file are for informational purposes only and
+do not modify the License. You may add Your own attribution
+notices within Derivative Works that You distribute, alongside
+or as an addendum to the NOTICE text from the Work, provided
+that such additional attribution notices cannot be construed
+as modifying the License.
+
+You may add Your own copyright statement to Your modifications and
+may provide additional or different license terms and conditions
+for use, reproduction, or distribution of Your modifications, or
+for any such Derivative Works as a whole, provided Your use,
+reproduction, and distribution of the Work otherwise complies with
+the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+any Contribution intentionally submitted for inclusion in the Work
+by You to the Licensor shall be under the terms and conditions of
+this License, without any additional terms or conditions.
+Notwithstanding the above, nothing herein shall supersede or modify
+the terms of any separate license agreement you may have executed
+with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+names, trademarks, service marks, or product names of the Licensor,
+except as required for reasonable and customary use in describing the
+origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+agreed to in writing, Licensor provides the Work (and each
+Contributor provides its Contributions) on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+implied, including, without limitation, any warranties or conditions
+of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+PARTICULAR PURPOSE. You are solely responsible for determining the
+appropriateness of using or redistributing the Work and assume any
+risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+whether in tort (including negligence), contract, or otherwise,
+unless required by applicable law (such as deliberate and grossly
+negligent acts) or agreed to in writing, shall any Contributor be
+liable to You for damages, including any direct, indirect, special,
+incidental, or consequential damages of any character arising as a
+result of this License or out of the use or inability to use the
+Work (including but not limited to damages for loss of goodwill,
+work stoppage, computer failure or malfunction, or any and all
+other commercial damages or losses), even if such Contributor
+has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+the Work or Derivative Works thereof, You may choose to offer,
+and charge a fee for, acceptance of support, warranty, indemnity,
+or other liability obligations and/or rights consistent with this
+License. However, in accepting such obligations, You may act only
+on Your own behalf and on Your sole responsibility, not on behalf
+of any other Contributor, and only if You agree to indemnify,
+defend, and hold each Contributor harmless for any liability
+incurred by, or claims asserted against, such Contributor by reason
+of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work.
+
+To apply the Apache License to your work, attach the following
+boilerplate notice, with the fields enclosed by brackets "[]"
+replaced with your own identifying information. (Don't include
+the brackets!)  The text should be enclosed in the appropriate
+comment syntax for the file format. We also recommend that a
+file or class name and description of purpose be included on the
+same "printed page" as the copyright notice for easier
+identification within third-party archives.
+
+Copyright [yyyy] [name of copyright owner]
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..396a58ad3132c035c4e369ce8714949b975d44a8
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,22 @@
+# Don't use the --user flag for setup.py develop mode with virtualenv.
+DEV_USER_FLAG=$(shell python -c "import sys; print('' if hasattr(sys, 'real_prefix') else '--user')")
+
+.PHONY: default
+default: dev
+
+.PHONY: install
+install:
+	python setup.py install
+
+.PHONY: ops
+ops:
+	mkdir -p build && cd build && cmake .. && make -j$(shell nproc)
+
+.PHONY: dev
+dev:
+	python setup.py develop $(DEV_USER_FLAG)
+
+.PHONY: clean
+clean:
+	python setup.py develop --uninstall $(DEV_USER_FLAG)
+	rm -rf build
diff --git a/NOTICE b/NOTICE
new file mode 100644
index 0000000000000000000000000000000000000000..5fcc013a09a9df06eed031ef2ef3770c7bdaf848
--- /dev/null
+++ b/NOTICE
@@ -0,0 +1,29 @@
+Portions of this software are derived from py-faster-rcnn.
+
+==============================================================================
+py-faster-rcnn licence
+==============================================================================
+
+Faster R-CNN
+
+The MIT License (MIT)
+
+Copyright (c) 2015 Microsoft Corporation
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..c00fea5f4e48a12e5d8f9ba74382e665cfc04de7
--- /dev/null
+++ b/cmake/Summary.cmake
@@ -0,0 +1,36 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+# Adapted from https://github.com/caffe2/caffe2/blob/master/cmake/Summary.cmake
+
+# Prints configuration summary.
+function (detectron_print_config_summary)
+  message(STATUS "Summary:")
+  message(STATUS "  CMake version        : ${CMAKE_VERSION}")
+  message(STATUS "  CMake command        : ${CMAKE_COMMAND}")
+  message(STATUS "  System name          : ${CMAKE_SYSTEM_NAME}")
+  message(STATUS "  C++ compiler         : ${CMAKE_CXX_COMPILER}")
+  message(STATUS "  C++ compiler version : ${CMAKE_CXX_COMPILER_VERSION}")
+  message(STATUS "  CXX flags            : ${CMAKE_CXX_FLAGS}")
+  message(STATUS "  Caffe2 version       : ${CAFFE2_VERSION}")
+  message(STATUS "  Caffe2 include path  : ${CAFFE2_INCLUDE_DIRS}")
+  if (CAFFE2_USE_CUDA OR CAFFE2_FOUND_CUDA)
+    message(STATUS "  Caffe2 found CUDA    : True")
+    message(STATUS "    CUDA version       : ${CUDA_VERSION}")
+    message(STATUS "    CuDNN version      : ${CUDNN_VERSION}")
+  else()
+    message(STATUS "  Caffe2 found CUDA    : False")
+  endif()
+endfunction()
diff --git a/cmake/legacy/Cuda.cmake b/cmake/legacy/Cuda.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..af252e9bcb9b77b3bfebd9a25a16615cdca353c7
--- /dev/null
+++ b/cmake/legacy/Cuda.cmake
@@ -0,0 +1,259 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+# Copied from https://github.com/caffe2/caffe2/blob/master/cmake/Cuda.cmake
+
+# Caffe2 cmake utility to prepare for cuda build.
+# This cmake file is called from Dependencies.cmake. You do not need to
+# manually invoke it.
+
+# Known NVIDIA GPU achitectures Caffe2 can be compiled for.
+# Default is set to cuda 9. If we detect the cuda architectores to be less than
+# 9, we will lower it to the corresponding known archs.
+set(Caffe2_known_gpu_archs "30 35 50 52 60 61 70") # for CUDA 9.x
+set(Caffe2_known_gpu_archs8 "20 21(20) 30 35 50 52 60 61") # for CUDA 8.x
+set(Caffe2_known_gpu_archs7 "20 21(20) 30 35 50 52") # for CUDA 7.x
+
+
+################################################################################################
+# Function for selecting GPU arch flags for nvcc based on CUDA_ARCH_NAME
+# Usage:
+#   caffe_select_nvcc_arch_flags(out_variable)
+function(caffe2_select_nvcc_arch_flags out_variable)
+  # List of arch names
+  set(__archs_names "Kepler" "Maxwell" "Pascal" "Volta" "All" "Manual")
+  set(__archs_name_default "All")
+
+  # Set CUDA_ARCH_NAME strings (so it will be seen as dropbox in the CMake GUI)
+  set(CUDA_ARCH_NAME ${__archs_name_default} CACHE STRING "Select target NVIDIA GPU architecture")
+  set_property(CACHE CUDA_ARCH_NAME PROPERTY STRINGS "" ${__archs_names})
+  mark_as_advanced(CUDA_ARCH_NAME)
+
+  # Verify CUDA_ARCH_NAME value
+  if(NOT ";${__archs_names};" MATCHES ";${CUDA_ARCH_NAME};")
+    string(REPLACE ";" ", " __archs_names "${__archs_names}")
+    message(FATAL_ERROR "Invalid CUDA_ARCH_NAME, supported values: ${__archs_names}. Got ${CUDA_ARCH_NAME}")
+  endif()
+
+  if(${CUDA_ARCH_NAME} STREQUAL "Manual")
+    set(CUDA_ARCH_BIN "" CACHE STRING
+      "Specify GPU architectures to build binaries for (BIN(PTX) format is supported)")
+    set(CUDA_ARCH_PTX "" CACHE STRING
+      "Specify GPU architectures to build PTX intermediate code for")
+    mark_as_advanced(CUDA_ARCH_BIN CUDA_ARCH_PTX)
+  else()
+    unset(CUDA_ARCH_BIN CACHE)
+    unset(CUDA_ARCH_PTX CACHE)
+  endif()
+
+  if(${CUDA_ARCH_NAME} STREQUAL "Kepler")
+    set(__cuda_arch_bin "30 35")
+  elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell")
+    set(__cuda_arch_bin "50")
+  elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal")
+    set(__cuda_arch_bin "60 61")
+  elseif(${CUDA_ARCH_NAME} STREQUAL "Volta")
+    set(__cuda_arch_bin "70")
+  elseif(${CUDA_ARCH_NAME} STREQUAL "All")
+    set(__cuda_arch_bin ${Caffe2_known_gpu_archs})
+  elseif(${CUDA_ARCH_NAME} STREQUAL "Manual")
+    set(__cuda_arch_bin ${CUDA_ARCH_BIN})
+    set(__cuda_arch_ptx ${CUDA_ARCH_PTX})
+  else()
+    message(FATAL_ERROR "Invalid CUDA_ARCH_NAME")
+  endif()
+
+  # Remove dots and convert to lists
+  string(REGEX REPLACE "\\." "" __cuda_arch_bin "${__cuda_arch_bin}")
+  string(REGEX REPLACE "\\." "" __cuda_arch_ptx "${__cuda_arch_ptx}")
+  string(REGEX MATCHALL "[0-9()]+" __cuda_arch_bin "${__cuda_arch_bin}")
+  string(REGEX MATCHALL "[0-9]+"   __cuda_arch_ptx "${__cuda_arch_ptx}")
+  list(REMOVE_DUPLICATES __cuda_arch_bin)
+  list(REMOVE_DUPLICATES __cuda_arch_ptx)
+
+  set(__nvcc_flags "")
+  set(__nvcc_archs_readable "")
+
+  # Tell NVCC to add binaries for the specified GPUs
+  foreach(__arch ${__cuda_arch_bin})
+    if(__arch MATCHES "([0-9]+)\\(([0-9]+)\\)")
+      # User explicitly specified PTX for the concrete BIN
+      list(APPEND __nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1})
+      list(APPEND __nvcc_archs_readable sm_${CMAKE_MATCH_1})
+    else()
+      # User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN
+      list(APPEND __nvcc_flags -gencode arch=compute_${__arch},code=sm_${__arch})
+      list(APPEND __nvcc_archs_readable sm_${__arch})
+    endif()
+  endforeach()
+
+  # Tell NVCC to add PTX intermediate code for the specified architectures
+  foreach(__arch ${__cuda_arch_ptx})
+    list(APPEND __nvcc_flags -gencode arch=compute_${__arch},code=compute_${__arch})
+    list(APPEND __nvcc_archs_readable compute_${__arch})
+  endforeach()
+
+  string(REPLACE ";" " " __nvcc_archs_readable "${__nvcc_archs_readable}")
+  set(${out_variable}          ${__nvcc_flags}          PARENT_SCOPE)
+  set(${out_variable}_readable ${__nvcc_archs_readable} PARENT_SCOPE)
+endfunction()
+
+
+################################################################################################
+# Short command for cuda compilation
+# Usage:
+#   caffe_cuda_compile(<objlist_variable> <cuda_files>)
+macro(caffe2_cuda_compile objlist_variable)
+  foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG)
+    set(${var}_backup_in_cuda_compile_ "${${var}}")
+
+    # we remove /EHa as it generates warnings under windows
+    string(REPLACE "/EHa" "" ${var} "${${var}}")
+
+  endforeach()
+
+  if(APPLE)
+    list(APPEND CUDA_NVCC_FLAGS -Xcompiler -Wno-unused-function)
+  endif()
+
+  cuda_compile(cuda_objcs ${ARGN})
+
+  foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG)
+    set(${var} "${${var}_backup_in_cuda_compile_}")
+    unset(${var}_backup_in_cuda_compile_)
+  endforeach()
+
+  set(${objlist_variable} ${cuda_objcs})
+endmacro()
+
+################################################################################################
+###  Non macro section
+################################################################################################
+
+# Special care for windows platform: we know that 32-bit windows does not support cuda.
+if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
+  if(NOT (CMAKE_SIZEOF_VOID_P EQUAL 8))
+    message(FATAL_ERROR
+            "CUDA support not available with 32-bit windows. Did you "
+            "forget to set Win64 in the generator target?")
+    return()
+  endif()
+endif()
+
+find_package(CUDA 7.0 QUIET)
+find_cuda_helper_libs(curand)  # cmake 2.8.7 compartibility which doesn't search for curand
+
+if(NOT CUDA_FOUND)
+  set(HAVE_CUDA FALSE)
+  return()
+endif()
+
+set(HAVE_CUDA TRUE)
+message(STATUS "CUDA detected: " ${CUDA_VERSION})
+if (${CUDA_VERSION} LESS 7.0)
+  message(FATAL_ERROR "Caffe2 requires CUDA 7.0 or later version")
+elseif (${CUDA_VERSION} LESS 8.0) # CUDA 7.x
+  set(Caffe2_known_gpu_archs ${Caffe2_known_gpu_archs7})
+  list(APPEND CUDA_NVCC_FLAGS "-D_MWAITXINTRIN_H_INCLUDED")
+  list(APPEND CUDA_NVCC_FLAGS "-D__STRICT_ANSI__")
+elseif (${CUDA_VERSION} LESS 9.0) # CUDA 8.x
+  set(Caffe2_known_gpu_archs ${Caffe2_known_gpu_archs8})
+  list(APPEND CUDA_NVCC_FLAGS "-D_MWAITXINTRIN_H_INCLUDED")
+  list(APPEND CUDA_NVCC_FLAGS "-D__STRICT_ANSI__")
+  # CUDA 8 may complain that sm_20 is no longer supported. Suppress the
+  # warning for now.
+  list(APPEND CUDA_NVCC_FLAGS "-Wno-deprecated-gpu-targets")
+endif()
+
+caffe2_include_directories(${CUDA_INCLUDE_DIRS})
+list(APPEND Caffe2_CUDA_DEPENDENCY_LIBS ${CUDA_CUDART_LIBRARY}
+                              ${CUDA_curand_LIBRARY} ${CUDA_CUBLAS_LIBRARIES})
+
+# find libcuda.so and lbnvrtc.so
+# For libcuda.so, we will find it under lib, lib64, and then the
+# stubs folder, in case we are building on a system that does not
+# have cuda driver installed. On windows, we also search under the
+# folder lib/x64.
+
+find_library(CUDA_CUDA_LIB cuda
+    PATHS ${CUDA_TOOLKIT_ROOT_DIR}
+    PATH_SUFFIXES lib lib64 lib/stubs lib64/stubs lib/x64)
+find_library(CUDA_NVRTC_LIB nvrtc
+    PATHS ${CUDA_TOOLKIT_ROOT_DIR}
+    PATH_SUFFIXES lib lib64 lib/x64)
+
+# setting nvcc arch flags
+caffe2_select_nvcc_arch_flags(NVCC_FLAGS_EXTRA)
+list(APPEND CUDA_NVCC_FLAGS ${NVCC_FLAGS_EXTRA})
+message(STATUS "Added CUDA NVCC flags for: ${NVCC_FLAGS_EXTRA_readable}")
+
+if(CUDA_CUDA_LIB)
+    message(STATUS "Found libcuda: ${CUDA_CUDA_LIB}")
+    list(APPEND Caffe2_CUDA_DEPENDENCY_LIBS ${CUDA_CUDA_LIB})
+else()
+    message(FATAL_ERROR "Cannot find libcuda.so. Please file an issue on https://github.com/caffe2/caffe2 with your build output.")
+endif()
+if(CUDA_NVRTC_LIB)
+  message(STATUS "Found libnvrtc: ${CUDA_NVRTC_LIB}")
+  list(APPEND Caffe2_CUDA_DEPENDENCY_LIBS ${CUDA_NVRTC_LIB})
+else()
+    message(FATAL_ERROR "Cannot find libnvrtc.so. Please file an issue on https://github.com/caffe2/caffe2 with your build output.")
+endif()
+
+# disable some nvcc diagnostic that apears in boost, glog, glags, opencv, etc.
+foreach(diag cc_clobber_ignored integer_sign_change useless_using_declaration set_but_not_used)
+  list(APPEND CUDA_NVCC_FLAGS -Xcudafe --diag_suppress=${diag})
+endforeach()
+
+# Set C++11 support
+set(CUDA_PROPAGATE_HOST_FLAGS OFF)
+if (NOT MSVC)
+  list(APPEND CUDA_NVCC_FLAGS "-std=c++14")
+  list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -fPIC")
+endif()
+
+# Debug and Release symbol support
+if (MSVC)
+  if (${CMAKE_BUILD_TYPE} MATCHES "Release")
+    if (${BUILD_SHARED_LIBS})
+      list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -MD")
+    else()
+      list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -MT")
+    endif()
+  elseif(${CMAKE_BUILD_TYPE} MATCHES "Debug")
+    message(FATAL_ERROR
+            "Caffe2 currently does not support the combination of MSVC, Cuda "
+            "and Debug mode. Either set USE_CUDA=OFF or set the build type "
+            "to Release")
+    if (${BUILD_SHARED_LIBS})
+      list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -MDd")
+    else()
+      list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -MTd")
+    endif()
+  else()
+    message(FATAL_ERROR "Unknown cmake build type: " ${CMAKE_BUILD_TYPE})
+  endif()
+endif()
+
+
+if(OpenMP_FOUND)
+  list(APPEND CUDA_NVCC_FLAGS "-Xcompiler ${OpenMP_CXX_FLAGS}")
+endif()
+
+# Set :expt-relaxed-constexpr to suppress Eigen warnings
+list(APPEND CUDA_NVCC_FLAGS "--expt-relaxed-constexpr")
+
+mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD)
+mark_as_advanced(CUDA_SDK_ROOT_DIR CUDA_SEPARABLE_COMPILATION)
diff --git a/cmake/legacy/Dependencies.cmake b/cmake/legacy/Dependencies.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..5cb9dd8e391e78c89baea0a6d702b5ce61bac4f2
--- /dev/null
+++ b/cmake/legacy/Dependencies.cmake
@@ -0,0 +1,51 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+# Adapted from https://github.com/caffe2/caffe2/blob/master/cmake/Dependencies.cmake
+
+# Find CUDA.
+include(cmake/legacy/Cuda.cmake)
+if (HAVE_CUDA)
+  # CUDA 9.x requires GCC version <= 6
+  if ((CUDA_VERSION VERSION_EQUAL   9.0) OR
+      (CUDA_VERSION VERSION_GREATER 9.0  AND CUDA_VERSION VERSION_LESS 10.0))
+    if (CMAKE_C_COMPILER_ID STREQUAL "GNU" AND
+        NOT CMAKE_C_COMPILER_VERSION VERSION_LESS 7.0 AND
+        CUDA_HOST_COMPILER STREQUAL CMAKE_C_COMPILER)
+      message(FATAL_ERROR
+        "CUDA ${CUDA_VERSION} is not compatible with GCC version >= 7. "
+        "Use the following option to use another version (for example): \n"
+        "  -DCUDA_HOST_COMPILER=/usr/bin/gcc-6\n")
+    endif()
+  # CUDA 8.0 requires GCC version <= 5
+  elseif (CUDA_VERSION VERSION_EQUAL 8.0)
+    if (CMAKE_C_COMPILER_ID STREQUAL "GNU" AND
+        NOT CMAKE_C_COMPILER_VERSION VERSION_LESS 6.0 AND
+        CUDA_HOST_COMPILER STREQUAL CMAKE_C_COMPILER)
+      message(FATAL_ERROR
+        "CUDA 8.0 is not compatible with GCC version >= 6. "
+        "Use the following option to use another version (for example): \n"
+        "  -DCUDA_HOST_COMPILER=/usr/bin/gcc-5\n")
+    endif()
+  endif()
+endif()
+
+# Find CUDNN.
+if (HAVE_CUDA)
+  find_package(CuDNN REQUIRED)
+  if (CUDNN_FOUND)
+    caffe2_include_directories(${CUDNN_INCLUDE_DIRS})
+  endif()
+endif()
diff --git a/cmake/legacy/Modules/FindCuDNN.cmake b/cmake/legacy/Modules/FindCuDNN.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..0b7ea943f907e6caf176d00c3f49ab89be3c04de
--- /dev/null
+++ b/cmake/legacy/Modules/FindCuDNN.cmake
@@ -0,0 +1,70 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+# Copied from https://github.com/caffe2/caffe2/blob/master/cmake/Modules/FindCuDNN.cmake
+
+# - Try to find cuDNN
+#
+# The following variables are optionally searched for defaults
+#  CUDNN_ROOT_DIR:            Base directory where all cuDNN components are found
+#
+# The following are set after configuration is done:
+#  CUDNN_FOUND
+#  CUDNN_INCLUDE_DIRS
+#  CUDNN_LIBRARIES
+#  CUDNN_LIBRARY_DIRS
+
+include(FindPackageHandleStandardArgs)
+
+set(CUDNN_ROOT_DIR "" CACHE PATH "Folder contains NVIDIA cuDNN")
+
+find_path(CUDNN_INCLUDE_DIR cudnn.h
+    HINTS ${CUDNN_ROOT_DIR} ${CUDA_TOOLKIT_ROOT_DIR}
+    PATH_SUFFIXES cuda/include include)
+
+find_library(CUDNN_LIBRARY cudnn
+    HINTS ${CUDNN_ROOT_DIR} ${CUDA_TOOLKIT_ROOT_DIR}
+    PATH_SUFFIXES lib lib64 cuda/lib cuda/lib64 lib/x64)
+
+find_package_handle_standard_args(
+    CUDNN DEFAULT_MSG CUDNN_INCLUDE_DIR CUDNN_LIBRARY)
+
+if(CUDNN_FOUND)
+	# get cuDNN version
+  file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_HEADER_CONTENTS)
+	string(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)"
+				 CUDNN_VERSION_MAJOR "${CUDNN_HEADER_CONTENTS}")
+	string(REGEX REPLACE "define CUDNN_MAJOR * +([0-9]+)" "\\1"
+				 CUDNN_VERSION_MAJOR "${CUDNN_VERSION_MAJOR}")
+	string(REGEX MATCH "define CUDNN_MINOR * +([0-9]+)"
+				 CUDNN_VERSION_MINOR "${CUDNN_HEADER_CONTENTS}")
+	string(REGEX REPLACE "define CUDNN_MINOR * +([0-9]+)" "\\1"
+				 CUDNN_VERSION_MINOR "${CUDNN_VERSION_MINOR}")
+	string(REGEX MATCH "define CUDNN_PATCHLEVEL * +([0-9]+)"
+				 CUDNN_VERSION_PATCH "${CUDNN_HEADER_CONTENTS}")
+	string(REGEX REPLACE "define CUDNN_PATCHLEVEL * +([0-9]+)" "\\1"
+				 CUDNN_VERSION_PATCH "${CUDNN_VERSION_PATCH}")
+  # Assemble cuDNN version
+  if(NOT CUDNN_VERSION_MAJOR)
+    set(CUDNN_VERSION "?")
+  else()
+    set(CUDNN_VERSION "${CUDNN_VERSION_MAJOR}.${CUDNN_VERSION_MINOR}.${CUDNN_VERSION_PATCH}")
+  endif()
+
+  set(CUDNN_INCLUDE_DIRS ${CUDNN_INCLUDE_DIR})
+  set(CUDNN_LIBRARIES ${CUDNN_LIBRARY})
+  message(STATUS "Found cuDNN: v${CUDNN_VERSION}  (include: ${CUDNN_INCLUDE_DIR}, library: ${CUDNN_LIBRARY})")
+  mark_as_advanced(CUDNN_ROOT_DIR CUDNN_LIBRARY CUDNN_INCLUDE_DIR)
+endif()
diff --git a/cmake/legacy/Summary.cmake b/cmake/legacy/Summary.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..531377cfd5f4ffd524850e36872827bbdddf9a14
--- /dev/null
+++ b/cmake/legacy/Summary.cmake
@@ -0,0 +1,34 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+# Adapted from https://github.com/caffe2/caffe2/blob/master/cmake/Summary.cmake
+
+# Prints configuration summary.
+function (detectron_print_config_summary)
+  message(STATUS "Summary:")
+  message(STATUS "  CMake version        : ${CMAKE_VERSION}")
+  message(STATUS "  CMake command        : ${CMAKE_COMMAND}")
+  message(STATUS "  System name          : ${CMAKE_SYSTEM_NAME}")
+  message(STATUS "  C++ compiler         : ${CMAKE_CXX_COMPILER}")
+  message(STATUS "  C++ compiler version : ${CMAKE_CXX_COMPILER_VERSION}")
+  message(STATUS "  CXX flags            : ${CMAKE_CXX_FLAGS}")
+  message(STATUS "  Caffe2 version       : ${CAFFE2_VERSION}")
+  message(STATUS "  Caffe2 include path  : ${CAFFE2_INCLUDE_DIRS}")
+  message(STATUS "  Have CUDA            : ${HAVE_CUDA}")
+  if (${HAVE_CUDA})
+    message(STATUS "    CUDA version       : ${CUDA_VERSION}")
+    message(STATUS "    CuDNN version      : ${CUDNN_VERSION}")
+  endif()
+endfunction()
diff --git a/cmake/legacy/Utils.cmake b/cmake/legacy/Utils.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..e613650cc665aed9eb003f1965f363c5d915ae8f
--- /dev/null
+++ b/cmake/legacy/Utils.cmake
@@ -0,0 +1,290 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+# Copied from https://github.com/caffe2/caffe2/blob/master/cmake/Utils.cmake
+
+################################################################################################
+# Exclude and prepend functionalities
+function (exclude OUTPUT INPUT)
+set(EXCLUDES ${ARGN})
+foreach(EXCLUDE ${EXCLUDES})
+        list(REMOVE_ITEM INPUT "${EXCLUDE}")
+endforeach()
+set(${OUTPUT} ${INPUT} PARENT_SCOPE)
+endfunction(exclude)
+
+function (prepend OUTPUT PREPEND)
+set(OUT "")
+foreach(ITEM ${ARGN})
+        list(APPEND OUT "${PREPEND}${ITEM}")
+endforeach()
+set(${OUTPUT} ${OUT} PARENT_SCOPE)
+endfunction(prepend)
+
+
+################################################################################################
+# Clears variables from list
+# Usage:
+#   caffe_clear_vars(<variables_list>)
+macro(caffe_clear_vars)
+  foreach(_var ${ARGN})
+    unset(${_var})
+  endforeach()
+endmacro()
+
+################################################################################################
+# Prints list element per line
+# Usage:
+#   caffe_print_list(<list>)
+function(caffe_print_list)
+  foreach(e ${ARGN})
+    message(STATUS ${e})
+  endforeach()
+endfunction()
+
+################################################################################################
+# Reads set of version defines from the header file
+# Usage:
+#   caffe_parse_header(<file> <define1> <define2> <define3> ..)
+macro(caffe_parse_header FILENAME FILE_VAR)
+  set(vars_regex "")
+  set(__parnet_scope OFF)
+  set(__add_cache OFF)
+  foreach(name ${ARGN})
+    if("${name}" STREQUAL "PARENT_SCOPE")
+      set(__parnet_scope ON)
+    elseif("${name}" STREQUAL "CACHE")
+      set(__add_cache ON)
+    elseif(vars_regex)
+      set(vars_regex "${vars_regex}|${name}")
+    else()
+      set(vars_regex "${name}")
+    endif()
+  endforeach()
+  if(EXISTS "${FILENAME}")
+    file(STRINGS "${FILENAME}" ${FILE_VAR} REGEX "#define[ \t]+(${vars_regex})[ \t]+[0-9]+" )
+  else()
+    unset(${FILE_VAR})
+  endif()
+  foreach(name ${ARGN})
+    if(NOT "${name}" STREQUAL "PARENT_SCOPE" AND NOT "${name}" STREQUAL "CACHE")
+      if(${FILE_VAR})
+        if(${FILE_VAR} MATCHES ".+[ \t]${name}[ \t]+([0-9]+).*")
+          string(REGEX REPLACE ".+[ \t]${name}[ \t]+([0-9]+).*" "\\1" ${name} "${${FILE_VAR}}")
+        else()
+          set(${name} "")
+        endif()
+        if(__add_cache)
+          set(${name} ${${name}} CACHE INTERNAL "${name} parsed from ${FILENAME}" FORCE)
+        elseif(__parnet_scope)
+          set(${name} "${${name}}" PARENT_SCOPE)
+        endif()
+      else()
+        unset(${name} CACHE)
+      endif()
+    endif()
+  endforeach()
+endmacro()
+
+################################################################################################
+# Reads single version define from the header file and parses it
+# Usage:
+#   caffe_parse_header_single_define(<library_name> <file> <define_name>)
+function(caffe_parse_header_single_define LIBNAME HDR_PATH VARNAME)
+  set(${LIBNAME}_H "")
+  if(EXISTS "${HDR_PATH}")
+    file(STRINGS "${HDR_PATH}" ${LIBNAME}_H REGEX "^#define[ \t]+${VARNAME}[ \t]+\"[^\"]*\".*$" LIMIT_COUNT 1)
+  endif()
+
+  if(${LIBNAME}_H)
+    string(REGEX REPLACE "^.*[ \t]${VARNAME}[ \t]+\"([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_MAJOR "${${LIBNAME}_H}")
+    string(REGEX REPLACE "^.*[ \t]${VARNAME}[ \t]+\"[0-9]+\\.([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_MINOR  "${${LIBNAME}_H}")
+    string(REGEX REPLACE "^.*[ \t]${VARNAME}[ \t]+\"[0-9]+\\.[0-9]+\\.([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_PATCH "${${LIBNAME}_H}")
+    set(${LIBNAME}_VERSION_MAJOR ${${LIBNAME}_VERSION_MAJOR} ${ARGN} PARENT_SCOPE)
+    set(${LIBNAME}_VERSION_MINOR ${${LIBNAME}_VERSION_MINOR} ${ARGN} PARENT_SCOPE)
+    set(${LIBNAME}_VERSION_PATCH ${${LIBNAME}_VERSION_PATCH} ${ARGN} PARENT_SCOPE)
+    set(${LIBNAME}_VERSION_STRING "${${LIBNAME}_VERSION_MAJOR}.${${LIBNAME}_VERSION_MINOR}.${${LIBNAME}_VERSION_PATCH}" PARENT_SCOPE)
+
+    # append a TWEAK version if it exists:
+    set(${LIBNAME}_VERSION_TWEAK "")
+    if("${${LIBNAME}_H}" MATCHES "^.*[ \t]${VARNAME}[ \t]+\"[0-9]+\\.[0-9]+\\.[0-9]+\\.([0-9]+).*$")
+      set(${LIBNAME}_VERSION_TWEAK "${CMAKE_MATCH_1}" ${ARGN} PARENT_SCOPE)
+    endif()
+    if(${LIBNAME}_VERSION_TWEAK)
+      set(${LIBNAME}_VERSION_STRING "${${LIBNAME}_VERSION_STRING}.${${LIBNAME}_VERSION_TWEAK}" ${ARGN} PARENT_SCOPE)
+    else()
+      set(${LIBNAME}_VERSION_STRING "${${LIBNAME}_VERSION_STRING}" ${ARGN} PARENT_SCOPE)
+    endif()
+  endif()
+endfunction()
+
+########################################################################################################
+# An option that the user can select. Can accept condition to control when option is available for user.
+# Usage:
+#   caffe_option(<option_variable> "doc string" <initial value or boolean expression> [IF <condition>])
+function(caffe_option variable description value)
+  set(__value ${value})
+  set(__condition "")
+  set(__varname "__value")
+  foreach(arg ${ARGN})
+    if(arg STREQUAL "IF" OR arg STREQUAL "if")
+      set(__varname "__condition")
+    else()
+      list(APPEND ${__varname} ${arg})
+    endif()
+  endforeach()
+  unset(__varname)
+  if("${__condition}" STREQUAL "")
+    set(__condition 2 GREATER 1)
+  endif()
+
+  if(${__condition})
+    if("${__value}" MATCHES ";")
+      if(${__value})
+        option(${variable} "${description}" ON)
+      else()
+        option(${variable} "${description}" OFF)
+      endif()
+    elseif(DEFINED ${__value})
+      if(${__value})
+        option(${variable} "${description}" ON)
+      else()
+        option(${variable} "${description}" OFF)
+      endif()
+    else()
+      option(${variable} "${description}" ${__value})
+    endif()
+  else()
+    unset(${variable} CACHE)
+  endif()
+endfunction()
+
+##############################################################################
+# Helper function to add as-needed flag around a library.
+function(caffe_add_as_needed_flag lib output_var)
+  if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
+    # TODO: Clang seems to not need this flag. Double check.
+    set(${output_var} ${lib} PARENT_SCOPE)
+  elseif(MSVC)
+    # TODO: check what is the behavior of MSVC.
+    # In MSVC, we will add whole archive in default.
+    set(${output_var} ${lib} PARENT_SCOPE)
+  else()
+    # Assume everything else is like gcc: we will need as-needed flag.
+    set(${output_var} -Wl,--no-as-needed ${lib} -Wl,--as-needed PARENT_SCOPE)
+  endif()
+endfunction()
+
+##############################################################################
+# Helper function to add whole_archive flag around a library.
+function(caffe_add_whole_archive_flag lib output_var)
+  if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
+    set(${output_var} -Wl,-force_load,$<TARGET_FILE:${lib}> PARENT_SCOPE)
+  elseif(MSVC)
+    # In MSVC, we will add whole archive in default.
+    set(${output_var} -WHOLEARCHIVE:$<TARGET_FILE:${lib}> PARENT_SCOPE)
+  else()
+    # Assume everything else is like gcc
+    set(${output_var} -Wl,--whole-archive ${lib} -Wl,--no-whole-archive PARENT_SCOPE)
+  endif()
+endfunction()
+
+##############################################################################
+# Helper function to add either as-needed, or whole_archive flag around a library.
+function(caffe_add_linker_flag lib output_var)
+  if (BUILD_SHARED_LIBS)
+    caffe_add_as_needed_flag(${lib} tmp)
+  else()
+    caffe_add_whole_archive_flag(${lib} tmp)
+  endif()
+  set(${output_var} ${tmp} PARENT_SCOPE)
+endfunction()
+
+##############################################################################
+# Helper function to automatically generate __init__.py files where python
+# sources reside but there are no __init__.py present.
+function(caffe_autogen_init_py_files)
+  file(GLOB_RECURSE all_python_files RELATIVE ${PROJECT_SOURCE_DIR}
+       "${PROJECT_SOURCE_DIR}/caffe2/*.py")
+  set(python_paths_need_init_py)
+  foreach(python_file ${all_python_files})
+    get_filename_component(python_path ${python_file} PATH)
+    string(REPLACE "/" ";" path_parts ${python_path})
+    set(rebuilt_path ${CMAKE_BINARY_DIR})
+    foreach(path_part ${path_parts})
+      set(rebuilt_path "${rebuilt_path}/${path_part}")
+      list(APPEND python_paths_need_init_py ${rebuilt_path})
+    endforeach()
+  endforeach()
+  list(REMOVE_DUPLICATES python_paths_need_init_py)
+  # Since the _pb2.py files are yet to be created, we will need to manually
+  # add them to the list.
+  list(APPEND python_paths_need_init_py ${CMAKE_BINARY_DIR}/caffe)
+  list(APPEND python_paths_need_init_py ${CMAKE_BINARY_DIR}/caffe/proto)
+  list(APPEND python_paths_need_init_py ${CMAKE_BINARY_DIR}/caffe2/proto)
+
+  foreach(tmp ${python_paths_need_init_py})
+    if(NOT EXISTS ${tmp}/__init__.py)
+      # message(STATUS "Generate " ${tmp}/__init__.py)
+      file(WRITE ${tmp}/__init__.py "")
+    endif()
+  endforeach()
+endfunction()
+
+##############################################################################
+# Creating a Caffe2 binary target with sources specified with relative path.
+# Usage:
+#   caffe2_binary_target(target_name_or_src <src1> [<src2>] [<src3>] ...)
+# If only target_name_or_src is specified, this target is build with one single
+# source file and the target name is autogen from the filename. Otherwise, the
+# target name is given by the first argument and the rest are the source files
+# to build the target.
+function(caffe2_binary_target target_name_or_src)
+  if (${ARGN})
+    set(__target ${target_name_or_src})
+    prepend(__srcs "${CMAKE_CURRENT_SOURCE_DIR}/" "${ARGN}")
+  else()
+    get_filename_component(__target ${target_name_or_src} NAME_WE)
+    prepend(__srcs "${CMAKE_CURRENT_SOURCE_DIR}/" "${target_name_or_src}")
+  endif()
+  add_executable(${__target} ${__srcs})
+  add_dependencies(${__target} ${Caffe2_MAIN_LIBS_ORDER})
+  target_link_libraries(${__target} ${Caffe2_MAIN_LIBS} ${Caffe2_DEPENDENCY_LIBS})
+  install(TARGETS ${__target} DESTINATION bin)
+endfunction()
+
+##############################################################################
+# Helper function to add paths to system include directories.
+#
+# Anaconda distributions typically contain a lot of packages and some
+# of those can conflict with headers/libraries that must be sourced
+# from elsewhere. This helper ensures that Anaconda paths are always
+# added AFTER other include paths, such that it does not accidentally
+# takes precedence when it shouldn't.
+#
+# This is just a heuristic and does not have any guarantees. We can
+# add other corner cases here (as long as they are generic enough).
+# A complete include path cross checker is a final resort if this
+# hacky approach proves insufficient.
+#
+function(caffe2_include_directories)
+  foreach(path IN LISTS ARGN)
+    if (${path} MATCHES "/anaconda")
+      include_directories(AFTER SYSTEM ${path})
+    else()
+      include_directories(BEFORE SYSTEM ${path})
+    endif()
+  endforeach()
+endfunction()
diff --git a/cmake/legacy/legacymake.cmake b/cmake/legacy/legacymake.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..82ffcf12accea893bd029c679ea74de0399d441b
--- /dev/null
+++ b/cmake/legacy/legacymake.cmake
@@ -0,0 +1,63 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+# This file contains legacy cmake scripts that is going to be removed
+# in a future release.
+
+# Add CMake modules.
+list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/legacy/Modules)
+
+# Add compiler flags.
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c11")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -O2 -fPIC -Wno-narrowing")
+
+# Include Caffe2 CMake utils.
+include(cmake/legacy/Utils.cmake)
+
+# Find dependencies.
+include(cmake/legacy/Dependencies.cmake)
+
+# Print configuration summary.
+include(cmake/legacy/Summary.cmake)
+detectron_print_config_summary()
+
+# Collect custom ops sources.
+file(GLOB CUSTOM_OPS_CPU_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/detectron/ops/*.cc)
+file(GLOB CUSTOM_OPS_GPU_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/detectron/ops/*.cu)
+
+# Install custom CPU ops lib.
+add_library(
+     caffe2_detectron_custom_ops SHARED
+     ${CUSTOM_OPS_CPU_SRCS})
+
+target_include_directories(
+    caffe2_detectron_custom_ops PRIVATE
+    ${CAFFE2_INCLUDE_DIRS})
+target_link_libraries(caffe2_detectron_custom_ops caffe2)
+install(TARGETS caffe2_detectron_custom_ops DESTINATION lib)
+
+# Install custom GPU ops lib.
+if (${HAVE_CUDA})
+  # Additional -I prefix is required for CMake versions before commit (< 3.7):
+  # https://github.com/Kitware/CMake/commit/7ded655f7ba82ea72a82d0555449f2df5ef38594
+  list(APPEND CUDA_INCLUDE_DIRS -I${CAFFE2_INCLUDE_DIRS})
+  CUDA_ADD_LIBRARY(
+      caffe2_detectron_custom_ops_gpu SHARED
+      ${CUSTOM_OPS_CPU_SRCS}
+      ${CUSTOM_OPS_GPU_SRCS})
+
+  target_link_libraries(caffe2_detectron_custom_ops_gpu caffe2_gpu)
+  install(TARGETS caffe2_detectron_custom_ops_gpu DESTINATION lib)
+endif()
diff --git a/configs/04_2018_gn_baselines/e2e_mask_rcnn_R-101-FPN_2x_gn.yaml b/configs/04_2018_gn_baselines/e2e_mask_rcnn_R-101-FPN_2x_gn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9c8c96968d903381c24da76c04b170fb4684e6a6
--- /dev/null
+++ b/configs/04_2018_gn_baselines/e2e_mask_rcnn_R-101-FPN_2x_gn.yaml
@@ -0,0 +1,52 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 81
+  FASTER_RCNN: True
+  MASK_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 180000
+  STEPS: [0, 120000, 160000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+  USE_GN: True  # Note: use GN on the FPN-specific layers
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_gn_transformation  # Note: this is a GN bottleneck transform
+  STEM_FUNC: basic_gn_stem  # Note: this is a GN stem
+  SHORTCUT_FUNC: basic_gn_shortcut  # Note: this is a GN shortcut
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_Xconv1fc_gn_head  # Note: this is a Conv GN head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+MRCNN:
+  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn  # Note: this is a GN mask head
+  RESOLUTION: 28  # (output mask resolution) default 14
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14  # default 7
+  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
+  DILATION: 1  # default 2
+  CONV_INIT: MSRAFill  # default GaussianFill
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47592356/R-101-GN.pkl  # Note: a GN pre-trained model
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/04_2018_gn_baselines/e2e_mask_rcnn_R-101-FPN_3x_gn.yaml b/configs/04_2018_gn_baselines/e2e_mask_rcnn_R-101-FPN_3x_gn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fe463daac800088faaeb19a8acb4942ce39fea75
--- /dev/null
+++ b/configs/04_2018_gn_baselines/e2e_mask_rcnn_R-101-FPN_3x_gn.yaml
@@ -0,0 +1,52 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 81
+  FASTER_RCNN: True
+  MASK_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 270000
+  STEPS: [0, 210000, 250000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+  USE_GN: True  # Note: use GN on the FPN-specific layers
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_gn_transformation  # Note: this is a GN bottleneck transform
+  STEM_FUNC: basic_gn_stem  # Note: this is a GN stem
+  SHORTCUT_FUNC: basic_gn_shortcut  # Note: this is a GN shortcut
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_Xconv1fc_gn_head  # Note: this is a Conv GN head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+MRCNN:
+  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn  # Note: this is a GN mask head
+  RESOLUTION: 28  # (output mask resolution) default 14
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14  # default 7
+  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
+  DILATION: 1  # default 2
+  CONV_INIT: MSRAFill  # default GaussianFill
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47592356/R-101-GN.pkl  # Note: a GN pre-trained model
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn.yaml b/configs/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..eecae452650b6045cf05cb10eb89647f14daca13
--- /dev/null
+++ b/configs/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn.yaml
@@ -0,0 +1,52 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
+  NUM_CLASSES: 81
+  FASTER_RCNN: True
+  MASK_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 180000
+  STEPS: [0, 120000, 160000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+  USE_GN: True  # Note: use GN on the FPN-specific layers
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_gn_transformation  # Note: this is a GN bottleneck transform
+  STEM_FUNC: basic_gn_stem  # Note: this is a GN stem
+  SHORTCUT_FUNC: basic_gn_shortcut  # Note: this is a GN shortcut
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_Xconv1fc_gn_head  # Note: this is a Conv GN head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+MRCNN:
+  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn  # Note: this is a GN mask head
+  RESOLUTION: 28  # (output mask resolution) default 14
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14  # default 7
+  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
+  DILATION: 1  # default 2
+  CONV_INIT: MSRAFill  # default GaussianFill
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47261647/R-50-GN.pkl  # Note: a GN pre-trained model
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_3x_gn.yaml b/configs/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_3x_gn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..94950ff42ee7f58f9d65c6f25843eff7d291dcfc
--- /dev/null
+++ b/configs/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_3x_gn.yaml
@@ -0,0 +1,52 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
+  NUM_CLASSES: 81
+  FASTER_RCNN: True
+  MASK_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 270000
+  STEPS: [0, 210000, 250000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+  USE_GN: True  # Note: use GN on the FPN-specific layers
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_gn_transformation  # Note: this is a GN bottleneck transform
+  STEM_FUNC: basic_gn_stem  # Note: this is a GN stem
+  SHORTCUT_FUNC: basic_gn_shortcut  # Note: this is a GN shortcut
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_Xconv1fc_gn_head  # Note: this is a Conv GN head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+MRCNN:
+  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn  # Note: this is a GN mask head
+  RESOLUTION: 28  # (output mask resolution) default 14
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14  # default 7
+  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
+  DILATION: 1  # default 2
+  CONV_INIT: MSRAFill  # default GaussianFill
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47261647/R-50-GN.pkl  # Note: a GN pre-trained model
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/04_2018_gn_baselines/mask_rcnn_R-50-FPN_1x_gn.yaml b/configs/04_2018_gn_baselines/mask_rcnn_R-50-FPN_1x_gn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3543bf24fe2412fb6f420162cfd1fe81474556dd
--- /dev/null
+++ b/configs/04_2018_gn_baselines/mask_rcnn_R-50-FPN_1x_gn.yaml
@@ -0,0 +1,52 @@
+# WARNING: this script uses **pre-computed** BN-based proposals, and is for quick debugging only.
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
+  NUM_CLASSES: 81
+  MASK_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 90000
+  STEPS: [0, 60000, 80000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+  USE_GN: True  # Note: use GN on the FPN-specific layers
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_gn_transformation  # Note: this is a GN bottleneck transform
+  STEM_FUNC: basic_gn_stem  # Note: this is a GN stem
+  SHORTCUT_FUNC: basic_gn_shortcut  # Note: this is a GN shortcut
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_Xconv1fc_gn_head  # Note: this is a Conv GN head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+MRCNN:
+  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn  # Note: this is a GN mask head
+  RESOLUTION: 28  # (output mask resolution) default 14
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14  # default 7
+  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
+  DILATION: 1  # default 2
+  CONV_INIT: MSRAFill  # default GaussianFill
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47261647/R-50-GN.pkl  # Note: a GN pre-trained model
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
+  PROPOSAL_LIMIT: 1000
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+OUTPUT_DIR: .
diff --git a/configs/04_2018_gn_baselines/scratch_e2e_mask_rcnn_R-101-FPN_3x_gn.yaml b/configs/04_2018_gn_baselines/scratch_e2e_mask_rcnn_R-101-FPN_3x_gn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c30a0c2cafc2b0762496788f050ad8a3a84b2d8d
--- /dev/null
+++ b/configs/04_2018_gn_baselines/scratch_e2e_mask_rcnn_R-101-FPN_3x_gn.yaml
@@ -0,0 +1,53 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 81
+  FASTER_RCNN: True
+  MASK_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 270000
+  STEPS: [0, 210000, 250000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+  USE_GN: True  # Note: use GN on the FPN-specific layers
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_gn_transformation  # Note: this is a GN bottleneck transform
+  STEM_FUNC: basic_gn_stem  # Note: this is a GN stem
+  SHORTCUT_FUNC: basic_gn_shortcut  # Note: this is a GN shortcut
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_Xconv1fc_gn_head  # Note: this is a Conv GN head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+MRCNN:
+  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn  # Note: this is a GN mask head
+  RESOLUTION: 28  # (output mask resolution) default 14
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14  # default 7
+  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
+  DILATION: 1  # default 2
+  CONV_INIT: MSRAFill  # default GaussianFill
+TRAIN:
+  # WEIGHTS: N/A
+  FREEZE_AT: 0
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/04_2018_gn_baselines/scratch_e2e_mask_rcnn_R-50-FPN_3x_gn.yaml b/configs/04_2018_gn_baselines/scratch_e2e_mask_rcnn_R-50-FPN_3x_gn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1245f3ab6a2713831d0ef1f4dab4f1efb441e102
--- /dev/null
+++ b/configs/04_2018_gn_baselines/scratch_e2e_mask_rcnn_R-50-FPN_3x_gn.yaml
@@ -0,0 +1,53 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
+  NUM_CLASSES: 81
+  FASTER_RCNN: True
+  MASK_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 270000
+  STEPS: [0, 210000, 250000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+  USE_GN: True  # Note: use GN on the FPN-specific layers
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_gn_transformation  # Note: this is a GN bottleneck transform
+  STEM_FUNC: basic_gn_stem  # Note: this is a GN stem
+  SHORTCUT_FUNC: basic_gn_shortcut  # Note: this is a GN shortcut
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_Xconv1fc_gn_head  # Note: this is a Conv GN head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+MRCNN:
+  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs_gn  # Note: this is a GN mask head
+  RESOLUTION: 28  # (output mask resolution) default 14
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14  # default 7
+  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
+  DILATION: 1  # default 2
+  CONV_INIT: MSRAFill  # default GaussianFill
+TRAIN:
+  # WEIGHTS: N/A
+  FREEZE_AT: 0
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml b/configs/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4f17fa37eb2883611d04678cd40f8ad7613dd4da
--- /dev/null
+++ b/configs/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml
@@ -0,0 +1,37 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 81
+  FASTER_RCNN: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 90000
+  STEPS: [0, 60000, 80000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_2x.yaml b/configs/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_2x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..42f9f5dcceb99521e8d94174cbd42b31eab11d9b
--- /dev/null
+++ b/configs/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_2x.yaml
@@ -0,0 +1,37 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 81
+  FASTER_RCNN: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 180000
+  STEPS: [0, 120000, 160000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/e2e_faster_rcnn_R-50-C4_1x.yaml b/configs/12_2017_baselines/e2e_faster_rcnn_R-50-C4_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9b5c12234e09a66ca61b7b824bc3bb361c324c51
--- /dev/null
+++ b/configs/12_2017_baselines/e2e_faster_rcnn_R-50-C4_1x.yaml
@@ -0,0 +1,34 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: ResNet.add_ResNet50_conv4_body
+  NUM_CLASSES: 81
+  FASTER_RCNN: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.01
+  GAMMA: 0.1
+  # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
+  MAX_ITER: 180000
+  STEPS: [0, 120000, 160000]
+RPN:
+  SIZES: (32, 64, 128, 256, 512)
+FAST_RCNN:
+  ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head
+  ROI_XFORM_METHOD: RoIAlign
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  IMS_PER_BATCH: 1
+  BATCH_SIZE_PER_IM: 512
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 6000
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/e2e_faster_rcnn_R-50-C4_2x.yaml b/configs/12_2017_baselines/e2e_faster_rcnn_R-50-C4_2x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..44686a99ec328c0d97dabb847a5d2c03e0f1c3df
--- /dev/null
+++ b/configs/12_2017_baselines/e2e_faster_rcnn_R-50-C4_2x.yaml
@@ -0,0 +1,34 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: ResNet.add_ResNet50_conv4_body
+  NUM_CLASSES: 81
+  FASTER_RCNN: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.01
+  GAMMA: 0.1
+  # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
+  MAX_ITER: 360000
+  STEPS: [0, 240000, 320000]
+RPN:
+  SIZES: (32, 64, 128, 256, 512)
+FAST_RCNN:
+  ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head
+  ROI_XFORM_METHOD: RoIAlign
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  IMS_PER_BATCH: 1
+  BATCH_SIZE_PER_IM: 512
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 6000
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml b/configs/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5f9bb4afbe50ee06d4693bb2cfa96750a43fd90d
--- /dev/null
+++ b/configs/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml
@@ -0,0 +1,37 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
+  NUM_CLASSES: 81
+  FASTER_RCNN: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 90000
+  STEPS: [0, 60000, 80000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_2x.yaml b/configs/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_2x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..457a3ec44722ad4e41f69f588e8e4d291a90599c
--- /dev/null
+++ b/configs/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_2x.yaml
@@ -0,0 +1,37 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
+  NUM_CLASSES: 81
+  FASTER_RCNN: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 180000
+  STEPS: [0, 120000, 160000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml b/configs/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e87e4df571a12bc50855b0ea033cf05939ef5e6b
--- /dev/null
+++ b/configs/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml
@@ -0,0 +1,44 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 81
+  FASTER_RCNN: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
+  BASE_LR: 0.01
+  GAMMA: 0.1
+  MAX_ITER: 180000
+  STEPS: [0, 120000, 160000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_transformation
+  NUM_GROUPS: 32
+  WIDTH_PER_GROUP: 8
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  IMS_PER_BATCH: 1
+  BATCH_SIZE_PER_IM: 512
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_2x.yaml b/configs/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_2x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6c8d4e0d5ff88c27a6d3910d9b9b74de348b2b96
--- /dev/null
+++ b/configs/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_2x.yaml
@@ -0,0 +1,44 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 81
+  FASTER_RCNN: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
+  BASE_LR: 0.01
+  GAMMA: 0.1
+  MAX_ITER: 360000
+  STEPS: [0, 240000, 320000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_transformation
+  NUM_GROUPS: 32
+  WIDTH_PER_GROUP: 8
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  IMS_PER_BATCH: 1
+  BATCH_SIZE_PER_IM: 512
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/e2e_faster_rcnn_X-101-64x4d-FPN_1x.yaml b/configs/12_2017_baselines/e2e_faster_rcnn_X-101-64x4d-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ec91bfda2e20bc659bd4b993e87eac45d8e51d4b
--- /dev/null
+++ b/configs/12_2017_baselines/e2e_faster_rcnn_X-101-64x4d-FPN_1x.yaml
@@ -0,0 +1,44 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 81
+  FASTER_RCNN: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
+  BASE_LR: 0.01
+  GAMMA: 0.1
+  MAX_ITER: 180000
+  STEPS: [0, 120000, 160000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_transformation
+  NUM_GROUPS: 64
+  WIDTH_PER_GROUP: 4
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  IMS_PER_BATCH: 1
+  BATCH_SIZE_PER_IM: 512
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/e2e_faster_rcnn_X-101-64x4d-FPN_2x.yaml b/configs/12_2017_baselines/e2e_faster_rcnn_X-101-64x4d-FPN_2x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ea875df0097453d852e0105ba9078a0db7e3e440
--- /dev/null
+++ b/configs/12_2017_baselines/e2e_faster_rcnn_X-101-64x4d-FPN_2x.yaml
@@ -0,0 +1,45 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 81
+  FASTER_RCNN: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
+  BASE_LR: 0.01
+  GAMMA: 0.1
+  MAX_ITER: 360000
+  STEPS: [0, 240000, 320000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_transformation
+  NUM_GROUPS: 64
+  WIDTH_PER_GROUP: 4
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+TRAIN:
+  # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  IMS_PER_BATCH: 1
+  BATCH_SIZE_PER_IM: 512
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/e2e_keypoint_rcnn_R-101-FPN_1x.yaml b/configs/12_2017_baselines/e2e_keypoint_rcnn_R-101-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..db1691194bc58085a959efa0b487b483401ae528
--- /dev/null
+++ b/configs/12_2017_baselines/e2e_keypoint_rcnn_R-101-FPN_1x.yaml
@@ -0,0 +1,51 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 2
+  FASTER_RCNN: True
+  KEYPOINTS_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 90000
+  STEPS: [0, 60000, 80000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+FAST_RCNN:
+  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+KRCNN:
+  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX
+  NUM_STACKED_CONVS: 8
+  NUM_KEYPOINTS: 17
+  USE_DECONV_OUTPUT: True
+  CONV_INIT: MSRAFill
+  CONV_HEAD_DIM: 512
+  UP_SCALE: 2
+  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14
+  ROI_XFORM_SAMPLING_RATIO: 2
+  KEYPOINT_CONFIDENCE: bbox
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl
+  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
+  SCALES: (640, 672, 704, 736, 768, 800)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  DATASETS: ('keypoints_coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/e2e_keypoint_rcnn_R-101-FPN_s1x.yaml b/configs/12_2017_baselines/e2e_keypoint_rcnn_R-101-FPN_s1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..77d20ffb44f97fc60b4e26d247c782fb7cbab3b5
--- /dev/null
+++ b/configs/12_2017_baselines/e2e_keypoint_rcnn_R-101-FPN_s1x.yaml
@@ -0,0 +1,51 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 2
+  FASTER_RCNN: True
+  KEYPOINTS_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 130000
+  STEPS: [0, 100000, 120000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+FAST_RCNN:
+  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+KRCNN:
+  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX
+  NUM_STACKED_CONVS: 8
+  NUM_KEYPOINTS: 17
+  USE_DECONV_OUTPUT: True
+  CONV_INIT: MSRAFill
+  CONV_HEAD_DIM: 512
+  UP_SCALE: 2
+  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14
+  ROI_XFORM_SAMPLING_RATIO: 2
+  KEYPOINT_CONFIDENCE: bbox
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl
+  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
+  SCALES: (640, 672, 704, 736, 768, 800)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  DATASETS: ('keypoints_coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml b/configs/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9bb94160c1067abb712d17467f2931997fec82e2
--- /dev/null
+++ b/configs/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml
@@ -0,0 +1,51 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
+  NUM_CLASSES: 2
+  FASTER_RCNN: True
+  KEYPOINTS_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 90000
+  STEPS: [0, 60000, 80000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+FAST_RCNN:
+  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+KRCNN:
+  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX
+  NUM_STACKED_CONVS: 8
+  NUM_KEYPOINTS: 17
+  USE_DECONV_OUTPUT: True
+  CONV_INIT: MSRAFill
+  CONV_HEAD_DIM: 512
+  UP_SCALE: 2
+  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14
+  ROI_XFORM_SAMPLING_RATIO: 2
+  KEYPOINT_CONFIDENCE: bbox
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
+  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
+  SCALES: (640, 672, 704, 736, 768, 800)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  DATASETS: ('keypoints_coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_s1x.yaml b/configs/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_s1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e93324fb24459ef7f61f5489a9e5520e59fe0ee4
--- /dev/null
+++ b/configs/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_s1x.yaml
@@ -0,0 +1,51 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
+  NUM_CLASSES: 2
+  FASTER_RCNN: True
+  KEYPOINTS_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 130000
+  STEPS: [0, 100000, 120000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+FAST_RCNN:
+  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+KRCNN:
+  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX
+  NUM_STACKED_CONVS: 8
+  NUM_KEYPOINTS: 17
+  USE_DECONV_OUTPUT: True
+  CONV_INIT: MSRAFill
+  CONV_HEAD_DIM: 512
+  UP_SCALE: 2
+  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14
+  ROI_XFORM_SAMPLING_RATIO: 2
+  KEYPOINT_CONFIDENCE: bbox
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
+  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
+  SCALES: (640, 672, 704, 736, 768, 800)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  DATASETS: ('keypoints_coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/e2e_keypoint_rcnn_X-101-32x8d-FPN_1x.yaml b/configs/12_2017_baselines/e2e_keypoint_rcnn_X-101-32x8d-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8deaf6aaa8611e21a6861d2efb972d378e223005
--- /dev/null
+++ b/configs/12_2017_baselines/e2e_keypoint_rcnn_X-101-32x8d-FPN_1x.yaml
@@ -0,0 +1,56 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 2
+  FASTER_RCNN: True
+  KEYPOINTS_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 90000
+  STEPS: [0, 60000, 80000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_transformation
+  NUM_GROUPS: 32
+  WIDTH_PER_GROUP: 8
+FAST_RCNN:
+  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+KRCNN:
+  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX
+  NUM_STACKED_CONVS: 8
+  NUM_KEYPOINTS: 17
+  USE_DECONV_OUTPUT: True
+  CONV_INIT: MSRAFill
+  CONV_HEAD_DIM: 512
+  UP_SCALE: 2
+  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14
+  ROI_XFORM_SAMPLING_RATIO: 2
+  KEYPOINT_CONFIDENCE: bbox
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl
+  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
+  SCALES: (640, 672, 704, 736, 768, 800)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  DATASETS: ('keypoints_coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/e2e_keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml b/configs/12_2017_baselines/e2e_keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0dd2a9cd8d325c1612c0e9f4ea6a0968ef9f8eb9
--- /dev/null
+++ b/configs/12_2017_baselines/e2e_keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml
@@ -0,0 +1,56 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 2
+  FASTER_RCNN: True
+  KEYPOINTS_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 130000
+  STEPS: [0, 100000, 120000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_transformation
+  NUM_GROUPS: 32
+  WIDTH_PER_GROUP: 8
+FAST_RCNN:
+  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+KRCNN:
+  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX
+  NUM_STACKED_CONVS: 8
+  NUM_KEYPOINTS: 17
+  USE_DECONV_OUTPUT: True
+  CONV_INIT: MSRAFill
+  CONV_HEAD_DIM: 512
+  UP_SCALE: 2
+  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14
+  ROI_XFORM_SAMPLING_RATIO: 2
+  KEYPOINT_CONFIDENCE: bbox
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl
+  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
+  SCALES: (640, 672, 704, 736, 768, 800)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  DATASETS: ('keypoints_coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/e2e_keypoint_rcnn_X-101-64x4d-FPN_1x.yaml b/configs/12_2017_baselines/e2e_keypoint_rcnn_X-101-64x4d-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b710030f699104e1064c1eee0f369dd518451901
--- /dev/null
+++ b/configs/12_2017_baselines/e2e_keypoint_rcnn_X-101-64x4d-FPN_1x.yaml
@@ -0,0 +1,57 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 2
+  FASTER_RCNN: True
+  KEYPOINTS_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 90000
+  STEPS: [0, 60000, 80000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_transformation
+  NUM_GROUPS: 64
+  WIDTH_PER_GROUP: 4
+FAST_RCNN:
+  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+KRCNN:
+  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX
+  NUM_STACKED_CONVS: 8
+  NUM_KEYPOINTS: 17
+  USE_DECONV_OUTPUT: True
+  CONV_INIT: MSRAFill
+  CONV_HEAD_DIM: 512
+  UP_SCALE: 2
+  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14
+  ROI_XFORM_SAMPLING_RATIO: 2
+  KEYPOINT_CONFIDENCE: bbox
+TRAIN:
+  # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl
+  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
+  SCALES: (640, 672, 704, 736, 768, 800)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  DATASETS: ('keypoints_coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/e2e_keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml b/configs/12_2017_baselines/e2e_keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7030db7933df5aff0c94e0c82d2eeab5ef3f32e1
--- /dev/null
+++ b/configs/12_2017_baselines/e2e_keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml
@@ -0,0 +1,57 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 2
+  FASTER_RCNN: True
+  KEYPOINTS_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 130000
+  STEPS: [0, 100000, 120000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_transformation
+  NUM_GROUPS: 64
+  WIDTH_PER_GROUP: 4
+FAST_RCNN:
+  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+KRCNN:
+  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX
+  NUM_STACKED_CONVS: 8
+  NUM_KEYPOINTS: 17
+  USE_DECONV_OUTPUT: True
+  CONV_INIT: MSRAFill
+  CONV_HEAD_DIM: 512
+  UP_SCALE: 2
+  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14
+  ROI_XFORM_SAMPLING_RATIO: 2
+  KEYPOINT_CONFIDENCE: bbox
+TRAIN:
+  # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl
+  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
+  SCALES: (640, 672, 704, 736, 768, 800)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  DATASETS: ('keypoints_coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml b/configs/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..43a0924f68bcac69c26800e066e194dcde047288
--- /dev/null
+++ b/configs/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml
@@ -0,0 +1,46 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 81
+  FASTER_RCNN: True
+  MASK_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 90000
+  STEPS: [0, 60000, 80000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+MRCNN:
+  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
+  RESOLUTION: 28  # (output mask resolution) default 14
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14  # default 7
+  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
+  DILATION: 1  # default 2
+  CONV_INIT: MSRAFill  # default GaussianFill
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml b/configs/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..002d3ac18422b247828987a9728a7ba18b29d4b3
--- /dev/null
+++ b/configs/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml
@@ -0,0 +1,46 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 81
+  FASTER_RCNN: True
+  MASK_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 180000
+  STEPS: [0, 120000, 160000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+MRCNN:
+  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
+  RESOLUTION: 28  # (output mask resolution) default 14
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14  # default 7
+  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
+  DILATION: 1  # default 2
+  CONV_INIT: MSRAFill  # default GaussianFill
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/e2e_mask_rcnn_R-50-C4_1x.yaml b/configs/12_2017_baselines/e2e_mask_rcnn_R-50-C4_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2bee8bd501f0faa9d4ea0f8b81f92077874e7ca7
--- /dev/null
+++ b/configs/12_2017_baselines/e2e_mask_rcnn_R-50-C4_1x.yaml
@@ -0,0 +1,42 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: ResNet.add_ResNet50_conv4_body
+  NUM_CLASSES: 81
+  FASTER_RCNN: True
+  MASK_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.01
+  GAMMA: 0.1
+  # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
+  MAX_ITER: 180000
+  STEPS: [0, 120000, 160000]
+RPN:
+  SIZES: (32, 64, 128, 256, 512)
+FAST_RCNN:
+  ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head
+  ROI_XFORM_METHOD: RoIAlign
+MRCNN:
+  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare
+  RESOLUTION: 14
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14
+  DILATION: 1  # default 2
+  CONV_INIT: MSRAFill  # default: GaussianFill
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  IMS_PER_BATCH: 1
+  BATCH_SIZE_PER_IM: 512
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 6000
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/e2e_mask_rcnn_R-50-C4_2x.yaml b/configs/12_2017_baselines/e2e_mask_rcnn_R-50-C4_2x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7dacafed2e53e429ec720b91ef8b081e672e6bcf
--- /dev/null
+++ b/configs/12_2017_baselines/e2e_mask_rcnn_R-50-C4_2x.yaml
@@ -0,0 +1,42 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: ResNet.add_ResNet50_conv4_body
+  NUM_CLASSES: 81
+  FASTER_RCNN: True
+  MASK_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.01
+  GAMMA: 0.1
+  # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
+  MAX_ITER: 360000
+  STEPS: [0, 240000, 320000]
+RPN:
+  SIZES: (32, 64, 128, 256, 512)
+FAST_RCNN:
+  ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head
+  ROI_XFORM_METHOD: RoIAlign
+MRCNN:
+  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare
+  RESOLUTION: 14
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14
+  DILATION: 1  # default 2
+  CONV_INIT: MSRAFill  # default: GaussianFill
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  IMS_PER_BATCH: 1
+  BATCH_SIZE_PER_IM: 512
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 6000
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml b/configs/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9798483ef7a125e9aa8fb572363c3f252dcd8a3c
--- /dev/null
+++ b/configs/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml
@@ -0,0 +1,46 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
+  NUM_CLASSES: 81
+  FASTER_RCNN: True
+  MASK_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 90000
+  STEPS: [0, 60000, 80000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+MRCNN:
+  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
+  RESOLUTION: 28  # (output mask resolution) default 14
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14  # default 7
+  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
+  DILATION: 1  # default 2
+  CONV_INIT: MSRAFill  # default GaussianFill
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_2x.yaml b/configs/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_2x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0568c306f9891b2ca945781b9a0f689fd1657395
--- /dev/null
+++ b/configs/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_2x.yaml
@@ -0,0 +1,46 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
+  NUM_CLASSES: 81
+  FASTER_RCNN: True
+  MASK_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 180000
+  STEPS: [0, 120000, 160000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+MRCNN:
+  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
+  RESOLUTION: 28  # (output mask resolution) default 14
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14  # default 7
+  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
+  DILATION: 1  # default 2
+  CONV_INIT: MSRAFill  # default GaussianFill
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml b/configs/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4276e9e8c78decd5b2bb4c8c7a27314c090db530
--- /dev/null
+++ b/configs/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml
@@ -0,0 +1,53 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 81
+  FASTER_RCNN: True
+  MASK_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
+  BASE_LR: 0.01
+  GAMMA: 0.1
+  MAX_ITER: 180000
+  STEPS: [0, 120000, 160000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_transformation
+  NUM_GROUPS: 32
+  WIDTH_PER_GROUP: 8
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+MRCNN:
+  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
+  RESOLUTION: 28  # (output mask resolution) default 14
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14  # default 7
+  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
+  DILATION: 1  # default 2
+  CONV_INIT: MSRAFill  # default GaussianFill
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  IMS_PER_BATCH: 1
+  BATCH_SIZE_PER_IM: 512
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_2x.yaml b/configs/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_2x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a55962c0bc8a8fdfe54cf0b60e5d4e096ad1ed6f
--- /dev/null
+++ b/configs/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_2x.yaml
@@ -0,0 +1,53 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 81
+  FASTER_RCNN: True
+  MASK_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
+  BASE_LR: 0.01
+  GAMMA: 0.1
+  MAX_ITER: 360000
+  STEPS: [0, 240000, 320000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_transformation
+  NUM_GROUPS: 32
+  WIDTH_PER_GROUP: 8
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+MRCNN:
+  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
+  RESOLUTION: 28  # (output mask resolution) default 14
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14  # default 7
+  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
+  DILATION: 1  # default 2
+  CONV_INIT: MSRAFill  # default GaussianFill
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  IMS_PER_BATCH: 1
+  BATCH_SIZE_PER_IM: 512
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/e2e_mask_rcnn_X-101-64x4d-FPN_1x.yaml b/configs/12_2017_baselines/e2e_mask_rcnn_X-101-64x4d-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fcfcfdd72785906c72bb5331012281416bfc9354
--- /dev/null
+++ b/configs/12_2017_baselines/e2e_mask_rcnn_X-101-64x4d-FPN_1x.yaml
@@ -0,0 +1,54 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 81
+  FASTER_RCNN: True
+  MASK_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
+  BASE_LR: 0.01
+  GAMMA: 0.1
+  MAX_ITER: 180000
+  STEPS: [0, 120000, 160000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_transformation
+  NUM_GROUPS: 64
+  WIDTH_PER_GROUP: 4
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+MRCNN:
+  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
+  RESOLUTION: 28  # (output mask resolution) default 14
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14  # default 7
+  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
+  DILATION: 1  # default 2
+  CONV_INIT: MSRAFill  # default GaussianFill
+TRAIN:
+  # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  IMS_PER_BATCH: 1
+  BATCH_SIZE_PER_IM: 512
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/e2e_mask_rcnn_X-101-64x4d-FPN_2x.yaml b/configs/12_2017_baselines/e2e_mask_rcnn_X-101-64x4d-FPN_2x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..277ede98f442c07f72daeb0cb0438cd781365bce
--- /dev/null
+++ b/configs/12_2017_baselines/e2e_mask_rcnn_X-101-64x4d-FPN_2x.yaml
@@ -0,0 +1,54 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 81
+  FASTER_RCNN: True
+  MASK_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
+  BASE_LR: 0.01
+  GAMMA: 0.1
+  MAX_ITER: 360000
+  STEPS: [0, 240000, 320000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_transformation
+  NUM_GROUPS: 64
+  WIDTH_PER_GROUP: 4
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+MRCNN:
+  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
+  RESOLUTION: 28  # (output mask resolution) default 14
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14  # default 7
+  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
+  DILATION: 1  # default 2
+  CONV_INIT: MSRAFill  # default GaussianFill
+TRAIN:
+  # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  IMS_PER_BATCH: 1
+  BATCH_SIZE_PER_IM: 512
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x.yaml b/configs/12_2017_baselines/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d540a299dacddf27d04feac6a4090435167ef1e4
--- /dev/null
+++ b/configs/12_2017_baselines/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x.yaml
@@ -0,0 +1,77 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet152_conv5_body
+  NUM_CLASSES: 81
+  FASTER_RCNN: True
+  MASK_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  # 1.44x schedule (note TRAIN.IMS_PER_BATCH: 1)
+  BASE_LR: 0.01
+  GAMMA: 0.1
+  MAX_ITER: 260000
+  STEPS: [0, 200000, 240000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_transformation
+  NUM_GROUPS: 32
+  WIDTH_PER_GROUP: 8
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+MRCNN:
+  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
+  RESOLUTION: 28  # (output mask resolution) default 14
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14  # default 7
+  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
+  DILATION: 1  # default 2
+  CONV_INIT: MSRAFill  # default GaussianFill
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (640, 672, 704, 736, 768, 800)  # Scale jitter
+  MAX_SIZE: 1333
+  IMS_PER_BATCH: 1
+  BATCH_SIZE_PER_IM: 512
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  BBOX_VOTE:
+    ENABLED: True
+    VOTE_TH: 0.9
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
+  BBOX_AUG:
+    ENABLED: True
+    SCORE_HEUR: UNION
+    COORD_HEUR: UNION
+    H_FLIP: True
+    SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200)
+    MAX_SIZE: 2000
+    SCALE_H_FLIP: True
+    SCALE_SIZE_DEP: False
+    ASPECT_RATIOS: ()
+    ASPECT_RATIO_H_FLIP: False
+  MASK_AUG:
+    ENABLED: True
+    HEUR: SOFT_AVG
+    H_FLIP: True
+    SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200)
+    MAX_SIZE: 2000
+    SCALE_H_FLIP: True
+    SCALE_SIZE_DEP: False
+    ASPECT_RATIOS: ()
+    ASPECT_RATIO_H_FLIP: False
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/fast_rcnn_R-101-FPN_1x.yaml b/configs/12_2017_baselines/fast_rcnn_R-101-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c11631203adef3bd63005b42992b9e67767566cd
--- /dev/null
+++ b/configs/12_2017_baselines/fast_rcnn_R-101-FPN_1x.yaml
@@ -0,0 +1,36 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 81
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 90000
+  STEPS: [0, 60000, 80000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
+  PROPOSAL_LIMIT: 1000
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/fast_rcnn_R-101-FPN_2x.yaml b/configs/12_2017_baselines/fast_rcnn_R-101-FPN_2x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f7c5b3ace1e6d194b3db318221bdd71cdfe7f3cc
--- /dev/null
+++ b/configs/12_2017_baselines/fast_rcnn_R-101-FPN_2x.yaml
@@ -0,0 +1,36 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 81
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 180000
+  STEPS: [0, 120000, 160000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
+  PROPOSAL_LIMIT: 1000
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/fast_rcnn_R-50-C4_1x.yaml b/configs/12_2017_baselines/fast_rcnn_R-50-C4_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a2c3287917105b0c5f3d5dec5f63fa5240648108
--- /dev/null
+++ b/configs/12_2017_baselines/fast_rcnn_R-50-C4_1x.yaml
@@ -0,0 +1,34 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: ResNet.add_ResNet50_conv4_body
+  NUM_CLASSES: 81
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.01
+  GAMMA: 0.1
+  # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
+  MAX_ITER: 180000
+  STEPS: [0, 120000, 160000]
+RPN:
+  SIZES: (32, 64, 128, 256, 512)
+FAST_RCNN:
+  ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head
+  ROI_XFORM_METHOD: RoIAlign
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_train/rpn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_valminusminival/rpn/rpn_proposals.pkl')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  IMS_PER_BATCH: 1
+  BATCH_SIZE_PER_IM: 512
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_minival/rpn/rpn_proposals.pkl',)
+  PROPOSAL_LIMIT: 1000
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/fast_rcnn_R-50-C4_2x.yaml b/configs/12_2017_baselines/fast_rcnn_R-50-C4_2x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..71313fa380e2c0f5ca651806f5ad9ffaca58d145
--- /dev/null
+++ b/configs/12_2017_baselines/fast_rcnn_R-50-C4_2x.yaml
@@ -0,0 +1,34 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: ResNet.add_ResNet50_conv4_body
+  NUM_CLASSES: 81
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.01
+  GAMMA: 0.1
+  # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
+  MAX_ITER: 360000
+  STEPS: [0, 240000, 320000]
+RPN:
+  SIZES: (32, 64, 128, 256, 512)
+FAST_RCNN:
+  ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head
+  ROI_XFORM_METHOD: RoIAlign
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_train/rpn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_valminusminival/rpn/rpn_proposals.pkl')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  IMS_PER_BATCH: 1
+  BATCH_SIZE_PER_IM: 512
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_minival/rpn/rpn_proposals.pkl',)
+  PROPOSAL_LIMIT: 1000
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/fast_rcnn_R-50-FPN_1x.yaml b/configs/12_2017_baselines/fast_rcnn_R-50-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..baa053cc140e3478a9b1e06108669c8abe11161c
--- /dev/null
+++ b/configs/12_2017_baselines/fast_rcnn_R-50-FPN_1x.yaml
@@ -0,0 +1,36 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
+  NUM_CLASSES: 81
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 90000
+  STEPS: [0, 60000, 80000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
+  PROPOSAL_LIMIT: 1000
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/fast_rcnn_R-50-FPN_2x.yaml b/configs/12_2017_baselines/fast_rcnn_R-50-FPN_2x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..aee5481c6dd38daeb3d2089f822a183e4a04c3e5
--- /dev/null
+++ b/configs/12_2017_baselines/fast_rcnn_R-50-FPN_2x.yaml
@@ -0,0 +1,36 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
+  NUM_CLASSES: 81
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 180000
+  STEPS: [0, 120000, 160000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
+  PROPOSAL_LIMIT: 1000
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/fast_rcnn_X-101-32x8d-FPN_1x.yaml b/configs/12_2017_baselines/fast_rcnn_X-101-32x8d-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b65d35f69d021333a9b82d02d36fae2147896f00
--- /dev/null
+++ b/configs/12_2017_baselines/fast_rcnn_X-101-32x8d-FPN_1x.yaml
@@ -0,0 +1,43 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 81
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
+  BASE_LR: 0.01
+  GAMMA: 0.1
+  MAX_ITER: 180000
+  STEPS: [0, 120000, 160000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_transformation
+  NUM_GROUPS: 32
+  WIDTH_PER_GROUP: 8
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  IMS_PER_BATCH: 1
+  BATCH_SIZE_PER_IM: 512
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
+  PROPOSAL_LIMIT: 1000
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/fast_rcnn_X-101-32x8d-FPN_2x.yaml b/configs/12_2017_baselines/fast_rcnn_X-101-32x8d-FPN_2x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2a129b5715caf6e7c45d15d9cb5736a3afe24ab8
--- /dev/null
+++ b/configs/12_2017_baselines/fast_rcnn_X-101-32x8d-FPN_2x.yaml
@@ -0,0 +1,43 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 81
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
+  BASE_LR: 0.01
+  GAMMA: 0.1
+  MAX_ITER: 360000
+  STEPS: [0, 240000, 320000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_transformation
+  NUM_GROUPS: 32
+  WIDTH_PER_GROUP: 8
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  IMS_PER_BATCH: 1
+  BATCH_SIZE_PER_IM: 512
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
+  PROPOSAL_LIMIT: 1000
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/fast_rcnn_X-101-64x4d-FPN_1x.yaml b/configs/12_2017_baselines/fast_rcnn_X-101-64x4d-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e28806d3d8697fc84e28fc08135d57590379f397
--- /dev/null
+++ b/configs/12_2017_baselines/fast_rcnn_X-101-64x4d-FPN_1x.yaml
@@ -0,0 +1,43 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 81
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
+  BASE_LR: 0.01
+  GAMMA: 0.1
+  MAX_ITER: 180000
+  STEPS: [0, 120000, 160000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_transformation
+  NUM_GROUPS: 64
+  WIDTH_PER_GROUP: 4
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  IMS_PER_BATCH: 1
+  BATCH_SIZE_PER_IM: 512
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
+  PROPOSAL_LIMIT: 1000
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/fast_rcnn_X-101-64x4d-FPN_2x.yaml b/configs/12_2017_baselines/fast_rcnn_X-101-64x4d-FPN_2x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..af79f2bb0a0c5b9e67e2ba122b11b9cf3abd6ec1
--- /dev/null
+++ b/configs/12_2017_baselines/fast_rcnn_X-101-64x4d-FPN_2x.yaml
@@ -0,0 +1,43 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 81
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
+  BASE_LR: 0.01
+  GAMMA: 0.1
+  MAX_ITER: 360000
+  STEPS: [0, 240000, 320000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_transformation
+  NUM_GROUPS: 64
+  WIDTH_PER_GROUP: 4
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  IMS_PER_BATCH: 1
+  BATCH_SIZE_PER_IM: 512
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
+  PROPOSAL_LIMIT: 1000
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/keypoint_rcnn_R-101-FPN_1x.yaml b/configs/12_2017_baselines/keypoint_rcnn_R-101-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f37de735c1386e91d92e0f9928be5211c40f783a
--- /dev/null
+++ b/configs/12_2017_baselines/keypoint_rcnn_R-101-FPN_1x.yaml
@@ -0,0 +1,50 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 2
+  KEYPOINTS_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 90000
+  STEPS: [0, 60000, 80000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+FAST_RCNN:
+  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+KRCNN:
+  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX
+  NUM_STACKED_CONVS: 8
+  NUM_KEYPOINTS: 17
+  USE_DECONV_OUTPUT: True
+  CONV_INIT: MSRAFill
+  CONV_HEAD_DIM: 512
+  UP_SCALE: 2
+  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14
+  ROI_XFORM_SAMPLING_RATIO: 2
+  KEYPOINT_CONFIDENCE: bbox
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl
+  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
+  SCALES: (640, 672, 704, 736, 768, 800)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+TEST:
+  DATASETS: ('keypoints_coco_2014_minival',)
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
+  PROPOSAL_LIMIT: 1000
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/keypoint_rcnn_R-101-FPN_s1x.yaml b/configs/12_2017_baselines/keypoint_rcnn_R-101-FPN_s1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a13e45a2c1b54d9f6cf6f3789d9110fadad02172
--- /dev/null
+++ b/configs/12_2017_baselines/keypoint_rcnn_R-101-FPN_s1x.yaml
@@ -0,0 +1,50 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 2
+  KEYPOINTS_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 130000
+  STEPS: [0, 100000, 120000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+FAST_RCNN:
+  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+KRCNN:
+  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX
+  NUM_STACKED_CONVS: 8
+  NUM_KEYPOINTS: 17
+  USE_DECONV_OUTPUT: True
+  CONV_INIT: MSRAFill
+  CONV_HEAD_DIM: 512
+  UP_SCALE: 2
+  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14
+  ROI_XFORM_SAMPLING_RATIO: 2
+  KEYPOINT_CONFIDENCE: bbox
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl
+  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
+  SCALES: (640, 672, 704, 736, 768, 800)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+TEST:
+  DATASETS: ('keypoints_coco_2014_minival',)
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35999521/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml.08_20_33.1OkqMmqP/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
+  PROPOSAL_LIMIT: 1000
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/keypoint_rcnn_R-50-FPN_1x.yaml b/configs/12_2017_baselines/keypoint_rcnn_R-50-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fe3d222d0a6eac530ecaae614d0d0274a77bb758
--- /dev/null
+++ b/configs/12_2017_baselines/keypoint_rcnn_R-50-FPN_1x.yaml
@@ -0,0 +1,50 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
+  NUM_CLASSES: 2
+  KEYPOINTS_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 90000
+  STEPS: [0, 60000, 80000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+FAST_RCNN:
+  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+KRCNN:
+  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX
+  NUM_STACKED_CONVS: 8
+  NUM_KEYPOINTS: 17
+  USE_DECONV_OUTPUT: True
+  CONV_INIT: MSRAFill
+  CONV_HEAD_DIM: 512
+  UP_SCALE: 2
+  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14
+  ROI_XFORM_SAMPLING_RATIO: 2
+  KEYPOINT_CONFIDENCE: bbox
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
+  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
+  SCALES: (640, 672, 704, 736, 768, 800)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+TEST:
+  DATASETS: ('keypoints_coco_2014_minival',)
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
+  PROPOSAL_LIMIT: 1000
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/keypoint_rcnn_R-50-FPN_s1x.yaml b/configs/12_2017_baselines/keypoint_rcnn_R-50-FPN_s1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..542d082d70138b963569089a991aed21870304f7
--- /dev/null
+++ b/configs/12_2017_baselines/keypoint_rcnn_R-50-FPN_s1x.yaml
@@ -0,0 +1,50 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
+  NUM_CLASSES: 2
+  KEYPOINTS_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 130000
+  STEPS: [0, 100000, 120000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+FAST_RCNN:
+  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+KRCNN:
+  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX
+  NUM_STACKED_CONVS: 8
+  NUM_KEYPOINTS: 17
+  USE_DECONV_OUTPUT: True
+  CONV_INIT: MSRAFill
+  CONV_HEAD_DIM: 512
+  UP_SCALE: 2
+  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14
+  ROI_XFORM_SAMPLING_RATIO: 2
+  KEYPOINT_CONFIDENCE: bbox
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
+  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
+  SCALES: (640, 672, 704, 736, 768, 800)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+TEST:
+  DATASETS: ('keypoints_coco_2014_minival',)
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
+  PROPOSAL_LIMIT: 1000
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/keypoint_rcnn_X-101-32x8d-FPN_1x.yaml b/configs/12_2017_baselines/keypoint_rcnn_X-101-32x8d-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fd4ca5d1fc36da65359033785818f410948d4b7f
--- /dev/null
+++ b/configs/12_2017_baselines/keypoint_rcnn_X-101-32x8d-FPN_1x.yaml
@@ -0,0 +1,55 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 2
+  KEYPOINTS_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 90000
+  STEPS: [0, 60000, 80000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_transformation
+  NUM_GROUPS: 32
+  WIDTH_PER_GROUP: 8
+FAST_RCNN:
+  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+KRCNN:
+  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX
+  NUM_STACKED_CONVS: 8
+  NUM_KEYPOINTS: 17
+  USE_DECONV_OUTPUT: True
+  CONV_INIT: MSRAFill
+  CONV_HEAD_DIM: 512
+  UP_SCALE: 2
+  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14
+  ROI_XFORM_SAMPLING_RATIO: 2
+  KEYPOINT_CONFIDENCE: bbox
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl
+  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
+  SCALES: (640, 672, 704, 736, 768, 800)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+TEST:
+  DATASETS: ('keypoints_coco_2014_minival',)
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
+  PROPOSAL_LIMIT: 1000
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml b/configs/12_2017_baselines/keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7841f0b21e8d3e8e90b02272436b99cf29879332
--- /dev/null
+++ b/configs/12_2017_baselines/keypoint_rcnn_X-101-32x8d-FPN_s1x.yaml
@@ -0,0 +1,55 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 2
+  KEYPOINTS_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 130000
+  STEPS: [0, 100000, 120000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_transformation
+  NUM_GROUPS: 32
+  WIDTH_PER_GROUP: 8
+FAST_RCNN:
+  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+KRCNN:
+  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX
+  NUM_STACKED_CONVS: 8
+  NUM_KEYPOINTS: 17
+  USE_DECONV_OUTPUT: True
+  CONV_INIT: MSRAFill
+  CONV_HEAD_DIM: 512
+  UP_SCALE: 2
+  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14
+  ROI_XFORM_SAMPLING_RATIO: 2
+  KEYPOINT_CONFIDENCE: bbox
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl
+  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
+  SCALES: (640, 672, 704, 736, 768, 800)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+TEST:
+  DATASETS: ('keypoints_coco_2014_minival',)
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760438/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml.06_04_23.M2oJlDPW/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
+  PROPOSAL_LIMIT: 1000
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/keypoint_rcnn_X-101-64x4d-FPN_1x.yaml b/configs/12_2017_baselines/keypoint_rcnn_X-101-64x4d-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..137248a384562c2c0edcbe835ed8f854bd884bd9
--- /dev/null
+++ b/configs/12_2017_baselines/keypoint_rcnn_X-101-64x4d-FPN_1x.yaml
@@ -0,0 +1,56 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 2
+  KEYPOINTS_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 90000
+  STEPS: [0, 60000, 80000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_transformation
+  NUM_GROUPS: 64
+  WIDTH_PER_GROUP: 4
+FAST_RCNN:
+  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+KRCNN:
+  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX
+  NUM_STACKED_CONVS: 8
+  NUM_KEYPOINTS: 17
+  USE_DECONV_OUTPUT: True
+  CONV_INIT: MSRAFill
+  CONV_HEAD_DIM: 512
+  UP_SCALE: 2
+  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14
+  ROI_XFORM_SAMPLING_RATIO: 2
+  KEYPOINT_CONFIDENCE: bbox
+TRAIN:
+  # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl
+  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
+  SCALES: (640, 672, 704, 736, 768, 800)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+TEST:
+  DATASETS: ('keypoints_coco_2014_minival',)
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
+  PROPOSAL_LIMIT: 1000
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml b/configs/12_2017_baselines/keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5d39633b8a9f0d4bfeba8910e5efe231cb923584
--- /dev/null
+++ b/configs/12_2017_baselines/keypoint_rcnn_X-101-64x4d-FPN_s1x.yaml
@@ -0,0 +1,56 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 2
+  KEYPOINTS_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 130000
+  STEPS: [0, 100000, 120000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_transformation
+  NUM_GROUPS: 64
+  WIDTH_PER_GROUP: 4
+FAST_RCNN:
+  ROI_BOX_HEAD: head_builder.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+KRCNN:
+  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX
+  NUM_STACKED_CONVS: 8
+  NUM_KEYPOINTS: 17
+  USE_DECONV_OUTPUT: True
+  CONV_INIT: MSRAFill
+  CONV_HEAD_DIM: 512
+  UP_SCALE: 2
+  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14
+  ROI_XFORM_SAMPLING_RATIO: 2
+  KEYPOINT_CONFIDENCE: bbox
+TRAIN:
+  # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl
+  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
+  SCALES: (640, 672, 704, 736, 768, 800)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+TEST:
+  DATASETS: ('keypoints_coco_2014_minival',)
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35999553/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml.08_21_33.ghFzzArr/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
+  PROPOSAL_LIMIT: 1000
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/mask_rcnn_R-101-FPN_1x.yaml b/configs/12_2017_baselines/mask_rcnn_R-101-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2b69697e840a044a61e344fe8bdb4a467ba05fbe
--- /dev/null
+++ b/configs/12_2017_baselines/mask_rcnn_R-101-FPN_1x.yaml
@@ -0,0 +1,45 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 81
+  MASK_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 90000
+  STEPS: [0, 60000, 80000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+MRCNN:
+  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
+  RESOLUTION: 28  # (output mask resolution) default 14
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14  # default 7
+  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
+  DILATION: 1  # default 2
+  CONV_INIT: MSRAFill  # default GaussianFill
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
+  PROPOSAL_LIMIT: 1000
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/mask_rcnn_R-101-FPN_2x.yaml b/configs/12_2017_baselines/mask_rcnn_R-101-FPN_2x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8561ffffcf8e7217d56cfd15c3f4a79b672390d0
--- /dev/null
+++ b/configs/12_2017_baselines/mask_rcnn_R-101-FPN_2x.yaml
@@ -0,0 +1,45 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 81
+  MASK_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 180000
+  STEPS: [0, 120000, 160000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+MRCNN:
+  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
+  RESOLUTION: 28  # (output mask resolution) default 14
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14  # default 7
+  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
+  DILATION: 1  # default 2
+  CONV_INIT: MSRAFill  # default GaussianFill
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998887/12_2017_baselines/rpn_R-101-FPN_1x.yaml.08_07_07.vzhHEs0V/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
+  PROPOSAL_LIMIT: 1000
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/mask_rcnn_R-50-C4_1x.yaml b/configs/12_2017_baselines/mask_rcnn_R-50-C4_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b29b4440737561cb75f2769f160bd2d0120285ac
--- /dev/null
+++ b/configs/12_2017_baselines/mask_rcnn_R-50-C4_1x.yaml
@@ -0,0 +1,42 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: ResNet.add_ResNet50_conv4_body
+  NUM_CLASSES: 81
+  MASK_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.01
+  GAMMA: 0.1
+  # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
+  MAX_ITER: 180000
+  STEPS: [0, 120000, 160000]
+RPN:
+  SIZES: (32, 64, 128, 256, 512)
+FAST_RCNN:
+  ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head
+  ROI_XFORM_METHOD: RoIAlign
+MRCNN:
+  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare
+  RESOLUTION: 14
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14
+  DILATION: 1  # default 2
+  CONV_INIT: MSRAFill  # default: GaussianFill
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_train/rpn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_valminusminival/rpn/rpn_proposals.pkl')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  IMS_PER_BATCH: 1
+  BATCH_SIZE_PER_IM: 512
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_minival/rpn/rpn_proposals.pkl',)
+  PROPOSAL_LIMIT: 1000
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/mask_rcnn_R-50-C4_2x.yaml b/configs/12_2017_baselines/mask_rcnn_R-50-C4_2x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..06d07b671374ea2907efba890dfd3b052923e2cb
--- /dev/null
+++ b/configs/12_2017_baselines/mask_rcnn_R-50-C4_2x.yaml
@@ -0,0 +1,42 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: ResNet.add_ResNet50_conv4_body
+  NUM_CLASSES: 81
+  MASK_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.01
+  GAMMA: 0.1
+  # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
+  MAX_ITER: 360000
+  STEPS: [0, 240000, 320000]
+RPN:
+  SIZES: (32, 64, 128, 256, 512)
+FAST_RCNN:
+  ROI_BOX_HEAD: ResNet.add_ResNet_roi_conv5_head
+  ROI_XFORM_METHOD: RoIAlign
+MRCNN:
+  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare
+  RESOLUTION: 14
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14
+  DILATION: 1  # default 2
+  CONV_INIT: MSRAFill  # default: GaussianFill
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_train/rpn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_valminusminival/rpn/rpn_proposals.pkl')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  IMS_PER_BATCH: 1
+  BATCH_SIZE_PER_IM: 512
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L/output/test/coco_2014_minival/rpn/rpn_proposals.pkl',)
+  PROPOSAL_LIMIT: 1000
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/mask_rcnn_R-50-FPN_1x.yaml b/configs/12_2017_baselines/mask_rcnn_R-50-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..14c7ae969583ebe52b5848b152ef958231c8e6dd
--- /dev/null
+++ b/configs/12_2017_baselines/mask_rcnn_R-50-FPN_1x.yaml
@@ -0,0 +1,45 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
+  NUM_CLASSES: 81
+  MASK_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 90000
+  STEPS: [0, 60000, 80000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+MRCNN:
+  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
+  RESOLUTION: 28  # (output mask resolution) default 14
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14  # default 7
+  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
+  DILATION: 1  # default 2
+  CONV_INIT: MSRAFill  # default GaussianFill
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
+  PROPOSAL_LIMIT: 1000
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/mask_rcnn_R-50-FPN_2x.yaml b/configs/12_2017_baselines/mask_rcnn_R-50-FPN_2x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4778c037b0e560fa0e021304244416ffa4abdf44
--- /dev/null
+++ b/configs/12_2017_baselines/mask_rcnn_R-50-FPN_2x.yaml
@@ -0,0 +1,45 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
+  NUM_CLASSES: 81
+  MASK_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 180000
+  STEPS: [0, 120000, 160000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+MRCNN:
+  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
+  RESOLUTION: 28  # (output mask resolution) default 14
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14  # default 7
+  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
+  DILATION: 1  # default 2
+  CONV_INIT: MSRAFill  # default GaussianFill
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
+  PROPOSAL_LIMIT: 1000
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/mask_rcnn_X-101-32x8d-FPN_1x.yaml b/configs/12_2017_baselines/mask_rcnn_X-101-32x8d-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2d236267640fc661f40b791eeaf72b36056a3a82
--- /dev/null
+++ b/configs/12_2017_baselines/mask_rcnn_X-101-32x8d-FPN_1x.yaml
@@ -0,0 +1,52 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 81
+  MASK_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
+  BASE_LR: 0.01
+  GAMMA: 0.1
+  MAX_ITER: 180000
+  STEPS: [0, 120000, 160000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_transformation
+  NUM_GROUPS: 32
+  WIDTH_PER_GROUP: 8
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+MRCNN:
+  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
+  RESOLUTION: 28  # (output mask resolution) default 14
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14  # default 7
+  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
+  DILATION: 1  # default 2
+  CONV_INIT: MSRAFill  # default GaussianFill
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  IMS_PER_BATCH: 1
+  BATCH_SIZE_PER_IM: 512
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
+  PROPOSAL_LIMIT: 1000
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/mask_rcnn_X-101-32x8d-FPN_2x.yaml b/configs/12_2017_baselines/mask_rcnn_X-101-32x8d-FPN_2x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0a6532ec3afdcbdc99cbc087b32f365ea3e84d43
--- /dev/null
+++ b/configs/12_2017_baselines/mask_rcnn_X-101-32x8d-FPN_2x.yaml
@@ -0,0 +1,52 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 81
+  MASK_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
+  BASE_LR: 0.01
+  GAMMA: 0.1
+  MAX_ITER: 360000
+  STEPS: [0, 240000, 320000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_transformation
+  NUM_GROUPS: 32
+  WIDTH_PER_GROUP: 8
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+MRCNN:
+  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
+  RESOLUTION: 28  # (output mask resolution) default 14
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14  # default 7
+  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
+  DILATION: 1  # default 2
+  CONV_INIT: MSRAFill  # default GaussianFill
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  IMS_PER_BATCH: 1
+  BATCH_SIZE_PER_IM: 512
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/36760102/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml.06_00_16.RWeBAniO/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
+  PROPOSAL_LIMIT: 1000
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/mask_rcnn_X-101-64x4d-FPN_1x.yaml b/configs/12_2017_baselines/mask_rcnn_X-101-64x4d-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2441d5ff9ec72c80932f5a536987da3454dca367
--- /dev/null
+++ b/configs/12_2017_baselines/mask_rcnn_X-101-64x4d-FPN_1x.yaml
@@ -0,0 +1,53 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 81
+  MASK_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
+  BASE_LR: 0.01
+  GAMMA: 0.1
+  MAX_ITER: 180000
+  STEPS: [0, 120000, 160000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_transformation
+  NUM_GROUPS: 64
+  WIDTH_PER_GROUP: 4
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+MRCNN:
+  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
+  RESOLUTION: 28  # (output mask resolution) default 14
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14  # default 7
+  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
+  DILATION: 1  # default 2
+  CONV_INIT: MSRAFill  # default GaussianFill
+TRAIN:
+  # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  IMS_PER_BATCH: 1
+  BATCH_SIZE_PER_IM: 512
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
+  PROPOSAL_LIMIT: 1000
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/mask_rcnn_X-101-64x4d-FPN_2x.yaml b/configs/12_2017_baselines/mask_rcnn_X-101-64x4d-FPN_2x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..87e9e2dce80748014a2c68d0262022c3b7d96a56
--- /dev/null
+++ b/configs/12_2017_baselines/mask_rcnn_X-101-64x4d-FPN_2x.yaml
@@ -0,0 +1,53 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 81
+  MASK_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  # 2x schedule (note TRAIN.IMS_PER_BATCH: 1)
+  BASE_LR: 0.01
+  GAMMA: 0.1
+  MAX_ITER: 360000
+  STEPS: [0, 240000, 320000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_transformation
+  NUM_GROUPS: 64
+  WIDTH_PER_GROUP: 4
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+MRCNN:
+  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
+  RESOLUTION: 28  # (output mask resolution) default 14
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14  # default 7
+  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
+  DILATION: 1  # default 2
+  CONV_INIT: MSRAFill  # default GaussianFill
+TRAIN:
+  # md5sum of weights pkl file: aa14062280226e48f569ef1c7212e7c7
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  IMS_PER_BATCH: 1
+  BATCH_SIZE_PER_IM: 512
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998956/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml.08_08_41.Seh0psKz/output/test/coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
+  PROPOSAL_LIMIT: 1000
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/retinanet_R-101-FPN_1x.yaml b/configs/12_2017_baselines/retinanet_R-101-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c875328d92ea69d864494716950e18c27b0cfa71
--- /dev/null
+++ b/configs/12_2017_baselines/retinanet_R-101-FPN_1x.yaml
@@ -0,0 +1,41 @@
+MODEL:
+  TYPE: retinanet
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 81
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.01
+  GAMMA: 0.1
+  MAX_ITER: 90000
+  STEPS: [0, 60000, 80000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_RPN: True
+  RPN_MAX_LEVEL: 7
+  RPN_MIN_LEVEL: 3
+  COARSEST_STRIDE: 128
+  EXTRA_CONV_LEVELS: True
+RETINANET:
+  RETINANET_ON: True
+  NUM_CONVS: 4
+  ASPECT_RATIOS: (1.0, 2.0, 0.5)
+  SCALES_PER_OCTAVE: 3
+  ANCHOR_SCALE: 4
+  LOSS_GAMMA: 2.0
+  LOSS_ALPHA: 0.25
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  RPN_STRADDLE_THRESH: -1  # default 0
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 10000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 2000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/retinanet_R-101-FPN_2x.yaml b/configs/12_2017_baselines/retinanet_R-101-FPN_2x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9a3f91aba4d3358b6e982cb8d5cbbff465376bce
--- /dev/null
+++ b/configs/12_2017_baselines/retinanet_R-101-FPN_2x.yaml
@@ -0,0 +1,41 @@
+MODEL:
+  TYPE: retinanet
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 81
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.01
+  GAMMA: 0.1
+  MAX_ITER: 180000
+  STEPS: [0, 120000, 160000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_RPN: True
+  RPN_MAX_LEVEL: 7
+  RPN_MIN_LEVEL: 3
+  COARSEST_STRIDE: 128
+  EXTRA_CONV_LEVELS: True
+RETINANET:
+  RETINANET_ON: True
+  NUM_CONVS: 4
+  ASPECT_RATIOS: (1.0, 2.0, 0.5)
+  SCALES_PER_OCTAVE: 3
+  ANCHOR_SCALE: 4
+  LOSS_GAMMA: 2.0
+  LOSS_ALPHA: 0.25
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  RPN_STRADDLE_THRESH: -1  # default 0
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 10000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 2000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/retinanet_R-50-FPN_1x.yaml b/configs/12_2017_baselines/retinanet_R-50-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..35271fa2ff92013f7cb507ee82c52cd6dfc854f3
--- /dev/null
+++ b/configs/12_2017_baselines/retinanet_R-50-FPN_1x.yaml
@@ -0,0 +1,41 @@
+MODEL:
+  TYPE: retinanet
+  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
+  NUM_CLASSES: 81
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.01
+  GAMMA: 0.1
+  MAX_ITER: 90000
+  STEPS: [0, 60000, 80000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_RPN: True
+  RPN_MAX_LEVEL: 7
+  RPN_MIN_LEVEL: 3
+  COARSEST_STRIDE: 128
+  EXTRA_CONV_LEVELS: True
+RETINANET:
+  RETINANET_ON: True
+  NUM_CONVS: 4
+  ASPECT_RATIOS: (1.0, 2.0, 0.5)
+  SCALES_PER_OCTAVE: 3
+  ANCHOR_SCALE: 4
+  LOSS_GAMMA: 2.0
+  LOSS_ALPHA: 0.25
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  RPN_STRADDLE_THRESH: -1  # default 0
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 10000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 2000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/retinanet_R-50-FPN_2x.yaml b/configs/12_2017_baselines/retinanet_R-50-FPN_2x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..21acf070f116c8752eb07744002c23e65ec01a8d
--- /dev/null
+++ b/configs/12_2017_baselines/retinanet_R-50-FPN_2x.yaml
@@ -0,0 +1,41 @@
+MODEL:
+  TYPE: retinanet
+  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
+  NUM_CLASSES: 81
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.01
+  GAMMA: 0.1
+  MAX_ITER: 180000
+  STEPS: [0, 120000, 160000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_RPN: True
+  RPN_MAX_LEVEL: 7
+  RPN_MIN_LEVEL: 3
+  COARSEST_STRIDE: 128
+  EXTRA_CONV_LEVELS: True
+RETINANET:
+  RETINANET_ON: True
+  NUM_CONVS: 4
+  ASPECT_RATIOS: (1.0, 2.0, 0.5)
+  SCALES_PER_OCTAVE: 3
+  ANCHOR_SCALE: 4
+  LOSS_GAMMA: 2.0
+  LOSS_ALPHA: 0.25
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  RPN_STRADDLE_THRESH: -1  # default 0
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 10000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 2000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/retinanet_X-101-32x8d-FPN_1x.yaml b/configs/12_2017_baselines/retinanet_X-101-32x8d-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d06848160c866d12afc69b273abc68c42d97ecfa
--- /dev/null
+++ b/configs/12_2017_baselines/retinanet_X-101-32x8d-FPN_1x.yaml
@@ -0,0 +1,46 @@
+MODEL:
+  TYPE: retinanet
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 81
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.01
+  GAMMA: 0.1
+  MAX_ITER: 90000
+  STEPS: [0, 60000, 80000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_RPN: True
+  RPN_MAX_LEVEL: 7
+  RPN_MIN_LEVEL: 3
+  COARSEST_STRIDE: 128
+  EXTRA_CONV_LEVELS: True
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_transformation
+  NUM_GROUPS: 32
+  WIDTH_PER_GROUP: 8
+RETINANET:
+  RETINANET_ON: True
+  NUM_CONVS: 4
+  ASPECT_RATIOS: (1.0, 2.0, 0.5)
+  SCALES_PER_OCTAVE: 3
+  ANCHOR_SCALE: 4
+  LOSS_GAMMA: 2.0
+  LOSS_ALPHA: 0.25
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  RPN_STRADDLE_THRESH: -1  # default 0
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 10000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 2000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/retinanet_X-101-32x8d-FPN_2x.yaml b/configs/12_2017_baselines/retinanet_X-101-32x8d-FPN_2x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..da71fb6443e56f8f12a63e2f812cd2c63b24d7b8
--- /dev/null
+++ b/configs/12_2017_baselines/retinanet_X-101-32x8d-FPN_2x.yaml
@@ -0,0 +1,46 @@
+MODEL:
+  TYPE: retinanet
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 81
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.01
+  GAMMA: 0.1
+  MAX_ITER: 180000
+  STEPS: [0, 120000, 160000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_RPN: True
+  RPN_MAX_LEVEL: 7
+  RPN_MIN_LEVEL: 3
+  COARSEST_STRIDE: 128
+  EXTRA_CONV_LEVELS: True
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_transformation
+  NUM_GROUPS: 32
+  WIDTH_PER_GROUP: 8
+RETINANET:
+  RETINANET_ON: True
+  NUM_CONVS: 4
+  ASPECT_RATIOS: (1.0, 2.0, 0.5)
+  SCALES_PER_OCTAVE: 3
+  ANCHOR_SCALE: 4
+  LOSS_GAMMA: 2.0
+  LOSS_ALPHA: 0.25
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  RPN_STRADDLE_THRESH: -1  # default 0
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 10000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 2000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/retinanet_X-101-64x4d-FPN_1x.yaml b/configs/12_2017_baselines/retinanet_X-101-64x4d-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7ac1175ea6d26b566bdcb16e89e4ead659079e6b
--- /dev/null
+++ b/configs/12_2017_baselines/retinanet_X-101-64x4d-FPN_1x.yaml
@@ -0,0 +1,46 @@
+MODEL:
+  TYPE: retinanet
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 81
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.01
+  GAMMA: 0.1
+  MAX_ITER: 90000
+  STEPS: [0, 60000, 80000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_RPN: True
+  RPN_MAX_LEVEL: 7
+  RPN_MIN_LEVEL: 3
+  COARSEST_STRIDE: 128
+  EXTRA_CONV_LEVELS: True
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_transformation
+  NUM_GROUPS: 64
+  WIDTH_PER_GROUP: 4
+RETINANET:
+  RETINANET_ON: True
+  NUM_CONVS: 4
+  ASPECT_RATIOS: (1.0, 2.0, 0.5)
+  SCALES_PER_OCTAVE: 3
+  ANCHOR_SCALE: 4
+  LOSS_GAMMA: 2.0
+  LOSS_ALPHA: 0.25
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  RPN_STRADDLE_THRESH: -1  # default 0
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 10000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 2000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/retinanet_X-101-64x4d-FPN_2x.yaml b/configs/12_2017_baselines/retinanet_X-101-64x4d-FPN_2x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0c2d47431b58227ce48345d9d6e5afb3fd519073
--- /dev/null
+++ b/configs/12_2017_baselines/retinanet_X-101-64x4d-FPN_2x.yaml
@@ -0,0 +1,46 @@
+MODEL:
+  TYPE: retinanet
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 81
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.01
+  GAMMA: 0.1
+  MAX_ITER: 180000
+  STEPS: [0, 120000, 160000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_RPN: True
+  RPN_MAX_LEVEL: 7
+  RPN_MIN_LEVEL: 3
+  COARSEST_STRIDE: 128
+  EXTRA_CONV_LEVELS: True
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_transformation
+  NUM_GROUPS: 64
+  WIDTH_PER_GROUP: 4
+RETINANET:
+  RETINANET_ON: True
+  NUM_CONVS: 4
+  ASPECT_RATIOS: (1.0, 2.0, 0.5)
+  SCALES_PER_OCTAVE: 3
+  ANCHOR_SCALE: 4
+  LOSS_GAMMA: 2.0
+  LOSS_ALPHA: 0.25
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  RPN_STRADDLE_THRESH: -1  # default 0
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 10000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 2000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/rpn_R-101-FPN_1x.yaml b/configs/12_2017_baselines/rpn_R-101-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d85c06b57ad938787b8512f01f2e22b89679a0ca
--- /dev/null
+++ b/configs/12_2017_baselines/rpn_R-101-FPN_1x.yaml
@@ -0,0 +1,32 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 81
+  RPN_ONLY: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 90000
+  STEPS: [0, 60000, 80000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_RPN: True
+  RPN_MAX_LEVEL: 6
+  RPN_MIN_LEVEL: 2
+  RPN_ANCHOR_START_SIZE: 32
+  RPN_ASPECT_RATIOS: (0.5, 1, 2)
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+TEST:
+  DATASETS: ('coco_2014_minival','coco_2014_train','coco_2014_valminusminival')
+  SCALE: 800
+  MAX_SIZE: 1333
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 2000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/rpn_R-50-C4_1x.yaml b/configs/12_2017_baselines/rpn_R-50-C4_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ca1d4745cc21977dd60450bd3ee45ebe9a29ad1c
--- /dev/null
+++ b/configs/12_2017_baselines/rpn_R-50-C4_1x.yaml
@@ -0,0 +1,26 @@
+MODEL:
+  TYPE: rpn
+  CONV_BODY: ResNet.add_ResNet50_conv4_body
+  NUM_CLASSES: 81
+  RPN_ONLY: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 90000
+  STEPS: [0, 60000, 80000]
+RPN:
+  SIZES: (32, 64, 128, 256, 512)
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+TEST:
+  DATASETS: ('coco_2014_minival','coco_2014_train','coco_2014_valminusminival')
+  SCALE: 800
+  MAX_SIZE: 1333
+USE_NCCL: False
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/rpn_R-50-FPN_1x.yaml b/configs/12_2017_baselines/rpn_R-50-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..90990fe33783cacece36246c1ea004849649ad5f
--- /dev/null
+++ b/configs/12_2017_baselines/rpn_R-50-FPN_1x.yaml
@@ -0,0 +1,32 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
+  NUM_CLASSES: 81
+  RPN_ONLY: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 90000
+  STEPS: [0, 60000, 80000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_RPN: True
+  RPN_MAX_LEVEL: 6
+  RPN_MIN_LEVEL: 2
+  RPN_ANCHOR_START_SIZE: 32
+  RPN_ASPECT_RATIOS: (0.5, 1, 2)
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+TEST:
+  DATASETS: ('coco_2014_minival','coco_2014_train','coco_2014_valminusminival')
+  SCALE: 800
+  MAX_SIZE: 1333
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 2000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml b/configs/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..98b78461648e9b6d82ac41447b6b7c8de6c50229
--- /dev/null
+++ b/configs/12_2017_baselines/rpn_X-101-32x8d-FPN_1x.yaml
@@ -0,0 +1,37 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 81
+  RPN_ONLY: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 90000
+  STEPS: [0, 60000, 80000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_RPN: True
+  RPN_MAX_LEVEL: 6
+  RPN_MIN_LEVEL: 2
+  RPN_ANCHOR_START_SIZE: 32
+  RPN_ASPECT_RATIOS: (0.5, 1, 2)
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_transformation
+  NUM_GROUPS: 32
+  WIDTH_PER_GROUP: 8
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+TEST:
+  DATASETS: ('coco_2014_minival','coco_2014_train','coco_2014_valminusminival')
+  SCALE: 800
+  MAX_SIZE: 1333
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 2000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml b/configs/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cf92ba6a90c61ae75b68b43714df847559ec5531
--- /dev/null
+++ b/configs/12_2017_baselines/rpn_X-101-64x4d-FPN_1x.yaml
@@ -0,0 +1,37 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 81
+  RPN_ONLY: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 90000
+  STEPS: [0, 60000, 80000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_RPN: True
+  RPN_MAX_LEVEL: 6
+  RPN_MIN_LEVEL: 2
+  RPN_ANCHOR_START_SIZE: 32
+  RPN_ASPECT_RATIOS: (0.5, 1, 2)
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_transformation
+  NUM_GROUPS: 64
+  WIDTH_PER_GROUP: 4
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+TEST:
+  DATASETS: ('coco_2014_minival','coco_2014_train','coco_2014_valminusminival')
+  SCALE: 800
+  MAX_SIZE: 1333
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 2000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml b/configs/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a1e8f4ba2f2d9b0bcf787627c66b994aa8746f90
--- /dev/null
+++ b/configs/12_2017_baselines/rpn_person_only_R-101-FPN_1x.yaml
@@ -0,0 +1,32 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 2
+  RPN_ONLY: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 90000
+  STEPS: [0, 60000, 80000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_RPN: True
+  RPN_MAX_LEVEL: 6
+  RPN_MIN_LEVEL: 2
+  RPN_ANCHOR_START_SIZE: 32
+  RPN_ASPECT_RATIOS: (0.5, 1, 2)
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-101.pkl
+  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+TEST:
+  DATASETS: ('keypoints_coco_2014_minival', 'keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival', 'keypoints_coco_2015_test')
+  SCALE: 800
+  MAX_SIZE: 1333
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 2000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml b/configs/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..395a91de629b6a8ab1d6b1d6c4e5513a4e1ca89b
--- /dev/null
+++ b/configs/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml
@@ -0,0 +1,32 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
+  NUM_CLASSES: 2
+  RPN_ONLY: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 90000
+  STEPS: [0, 60000, 80000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_RPN: True
+  RPN_MAX_LEVEL: 6
+  RPN_MIN_LEVEL: 2
+  RPN_ANCHOR_START_SIZE: 32
+  RPN_ASPECT_RATIOS: (0.5, 1, 2)
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
+  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+TEST:
+  DATASETS: ('keypoints_coco_2014_minival', 'keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival', 'keypoints_coco_2015_test')
+  SCALE: 800
+  MAX_SIZE: 1333
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 2000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml b/configs/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a797dda128516708e7f5ac7c96650d08c9ea1e24
--- /dev/null
+++ b/configs/12_2017_baselines/rpn_person_only_X-101-32x8d-FPN_1x.yaml
@@ -0,0 +1,37 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 2
+  RPN_ONLY: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 90000
+  STEPS: [0, 60000, 80000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_RPN: True
+  RPN_MAX_LEVEL: 6
+  RPN_MIN_LEVEL: 2
+  RPN_ANCHOR_START_SIZE: 32
+  RPN_ASPECT_RATIOS: (0.5, 1, 2)
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_transformation
+  NUM_GROUPS: 32
+  WIDTH_PER_GROUP: 8
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/20171220/X-101-32x8d.pkl
+  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+TEST:
+  DATASETS: ('keypoints_coco_2014_minival', 'keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival', 'keypoints_coco_2015_test')
+  SCALE: 800
+  MAX_SIZE: 1333
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 2000
+OUTPUT_DIR: .
diff --git a/configs/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml b/configs/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4f7b3082c66ce5fb3ebba84315598575a4495249
--- /dev/null
+++ b/configs/12_2017_baselines/rpn_person_only_X-101-64x4d-FPN_1x.yaml
@@ -0,0 +1,37 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet101_conv5_body
+  NUM_CLASSES: 2
+  RPN_ONLY: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 90000
+  STEPS: [0, 60000, 80000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_RPN: True
+  RPN_MAX_LEVEL: 6
+  RPN_MIN_LEVEL: 2
+  RPN_ANCHOR_START_SIZE: 32
+  RPN_ASPECT_RATIOS: (0.5, 1, 2)
+RESNETS:
+  STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
+  TRANS_FUNC: bottleneck_transformation
+  NUM_GROUPS: 64
+  WIDTH_PER_GROUP: 4
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl
+  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+TEST:
+  DATASETS: ('keypoints_coco_2014_minival', 'keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival', 'keypoints_coco_2015_test')
+  SCALE: 800
+  MAX_SIZE: 1333
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 2000
+OUTPUT_DIR: .
diff --git a/configs/getting_started/tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml b/configs/getting_started/tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..83ea2acdb354d91edb27d6dbc6c1a3b6f19383e1
--- /dev/null
+++ b/configs/getting_started/tutorial_1gpu_e2e_faster_rcnn_R-50-FPN.yaml
@@ -0,0 +1,54 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
+  NUM_CLASSES: 81
+  FASTER_RCNN: True
+NUM_GPUS: 1
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.0025
+  GAMMA: 0.1
+  MAX_ITER: 60000
+  STEPS: [0, 30000, 40000]
+  # Equivalent schedules with...
+  # 1 GPU:
+  #   BASE_LR: 0.0025
+  #   MAX_ITER: 60000
+  #   STEPS: [0, 30000, 40000]
+  # 2 GPUs:
+  #   BASE_LR: 0.005
+  #   MAX_ITER: 30000
+  #   STEPS: [0, 15000, 20000]
+  # 4 GPUs:
+  #   BASE_LR: 0.01
+  #   MAX_ITER: 15000
+  #   STEPS: [0, 7500, 10000]
+  # 8 GPUs:
+  #   BASE_LR: 0.02
+  #   MAX_ITER: 7500
+  #   STEPS: [0, 3750, 5000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
+  DATASETS: ('coco_2014_train',)
+  SCALES: (500,)
+  MAX_SIZE: 833
+  BATCH_SIZE_PER_IM: 256
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 500
+  MAX_SIZE: 833
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/getting_started/tutorial_2gpu_e2e_faster_rcnn_R-50-FPN.yaml b/configs/getting_started/tutorial_2gpu_e2e_faster_rcnn_R-50-FPN.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a8df147030efbe7c54cf6ea9aef48b99a9f10bca
--- /dev/null
+++ b/configs/getting_started/tutorial_2gpu_e2e_faster_rcnn_R-50-FPN.yaml
@@ -0,0 +1,54 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
+  NUM_CLASSES: 81
+  FASTER_RCNN: True
+NUM_GPUS: 2
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.005
+  GAMMA: 0.1
+  MAX_ITER: 30000
+  STEPS: [0, 15000, 20000]
+  # Equivalent schedules with...
+  # 1 GPU:
+  #   BASE_LR: 0.0025
+  #   MAX_ITER: 60000
+  #   STEPS: [0, 30000, 40000]
+  # 2 GPUs:
+  #   BASE_LR: 0.005
+  #   MAX_ITER: 30000
+  #   STEPS: [0, 15000, 20000]
+  # 4 GPUs:
+  #   BASE_LR: 0.01
+  #   MAX_ITER: 15000
+  #   STEPS: [0, 7500, 10000]
+  # 8 GPUs:
+  #   BASE_LR: 0.02
+  #   MAX_ITER: 7500
+  #   STEPS: [0, 3750, 5000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
+  DATASETS: ('coco_2014_train',)
+  SCALES: (500,)
+  MAX_SIZE: 833
+  BATCH_SIZE_PER_IM: 256
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 500
+  MAX_SIZE: 833
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/getting_started/tutorial_4gpu_e2e_faster_rcnn_R-50-FPN.yaml b/configs/getting_started/tutorial_4gpu_e2e_faster_rcnn_R-50-FPN.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5ffffc665e21f50dc154cada1640a89303278985
--- /dev/null
+++ b/configs/getting_started/tutorial_4gpu_e2e_faster_rcnn_R-50-FPN.yaml
@@ -0,0 +1,54 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
+  NUM_CLASSES: 81
+  FASTER_RCNN: True
+NUM_GPUS: 4
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.01
+  GAMMA: 0.1
+  MAX_ITER: 15000
+  STEPS: [0, 7500, 10000]
+  # Equivalent schedules with...
+  # 1 GPU:
+  #   BASE_LR: 0.0025
+  #   MAX_ITER: 60000
+  #   STEPS: [0, 30000, 40000]
+  # 2 GPUs:
+  #   BASE_LR: 0.005
+  #   MAX_ITER: 30000
+  #   STEPS: [0, 15000, 20000]
+  # 4 GPUs:
+  #   BASE_LR: 0.01
+  #   MAX_ITER: 15000
+  #   STEPS: [0, 7500, 10000]
+  # 8 GPUs:
+  #   BASE_LR: 0.02
+  #   MAX_ITER: 7500
+  #   STEPS: [0, 3750, 5000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
+  DATASETS: ('coco_2014_train',)
+  SCALES: (500,)
+  MAX_SIZE: 833
+  BATCH_SIZE_PER_IM: 256
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 500
+  MAX_SIZE: 833
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/getting_started/tutorial_8gpu_e2e_faster_rcnn_R-50-FPN.yaml b/configs/getting_started/tutorial_8gpu_e2e_faster_rcnn_R-50-FPN.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..21ce1fe6defc7e0a9fc5655ef589b88f70867559
--- /dev/null
+++ b/configs/getting_started/tutorial_8gpu_e2e_faster_rcnn_R-50-FPN.yaml
@@ -0,0 +1,54 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
+  NUM_CLASSES: 81
+  FASTER_RCNN: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 7500
+  STEPS: [0, 3750, 5000]
+  # Equivalent schedules with...
+  # 1 GPU:
+  #   BASE_LR: 0.0025
+  #   MAX_ITER: 60000
+  #   STEPS: [0, 30000, 40000]
+  # 2 GPUs:
+  #   BASE_LR: 0.005
+  #   MAX_ITER: 30000
+  #   STEPS: [0, 15000, 20000]
+  # 4 GPUs:
+  #   BASE_LR: 0.01
+  #   MAX_ITER: 15000
+  #   STEPS: [0, 7500, 10000]
+  # 8 GPUs:
+  #   BASE_LR: 0.02
+  #   MAX_ITER: 7500
+  #   STEPS: [0, 3750, 5000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
+  DATASETS: ('coco_2014_train',)
+  SCALES: (500,)
+  MAX_SIZE: 833
+  BATCH_SIZE_PER_IM: 256
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 500
+  MAX_SIZE: 833
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
+OUTPUT_DIR: .
diff --git a/configs/test_time_aug/e2e_mask_rcnn_R-50-FPN_2x.yaml b/configs/test_time_aug/e2e_mask_rcnn_R-50-FPN_2x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c2bfd3b583cb09a40a8767b6142dc6f0e46fb90f
--- /dev/null
+++ b/configs/test_time_aug/e2e_mask_rcnn_R-50-FPN_2x.yaml
@@ -0,0 +1,79 @@
+MODEL:
+  TYPE: mask_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
+  NUM_CLASSES: 81
+  FASTER_RCNN: True
+  MASK_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 180000
+  STEPS: [0, 120000, 160000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+MRCNN:
+  ROI_MASK_HEAD: mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs
+  RESOLUTION: 28  # (output mask resolution) default 14
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14  # default 7
+  ROI_XFORM_SAMPLING_RATIO: 2  # default 0
+  DILATION: 1  # default 2
+  CONV_INIT: MSRAFill  # default GaussianFill
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
+  DATASETS: ('coco_2014_train', 'coco_2014_valminusminival')
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  DATASETS: ('coco_2014_minival',)
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/35859007/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_2x.yaml.01_49_07.By8nQcCH/output/train/coco_2014_train:coco_2014_valminusminival/generalized_rcnn/model_final.pkl
+
+  # -- Test time augmentation example -- #
+  BBOX_AUG:
+    ENABLED: True
+    SCORE_HEUR: UNION  # AVG NOTE: cannot use AVG for e2e model
+    COORD_HEUR: UNION  # AVG NOTE: cannot use AVG for e2e model
+    H_FLIP: True
+    SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200)
+    MAX_SIZE: 2000
+    SCALE_H_FLIP: True
+    SCALE_SIZE_DEP: False
+    AREA_TH_LO: 2500   # 50^2
+    AREA_TH_HI: 32400  # 180^2
+    ASPECT_RATIOS: ()
+    ASPECT_RATIO_H_FLIP: False
+  MASK_AUG:
+    ENABLED: True
+    HEUR: SOFT_AVG
+    H_FLIP: True
+    SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200)
+    MAX_SIZE: 2000
+    SCALE_H_FLIP: True
+    SCALE_SIZE_DEP: False
+    AREA_TH: 32400  # 180^2
+    ASPECT_RATIOS: ()
+    ASPECT_RATIO_H_FLIP: False
+  BBOX_VOTE:
+    ENABLED: True
+    VOTE_TH: 0.9
+  # -- Test time augmentation example -- #
+
+USE_NCCL: False
+OUTPUT_DIR: .
diff --git a/configs/test_time_aug/keypoint_rcnn_R-50-FPN_1x.yaml b/configs/test_time_aug/keypoint_rcnn_R-50-FPN_1x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8e0d17e927e2f963458750fb27e34b80dbf2ac4a
--- /dev/null
+++ b/configs/test_time_aug/keypoint_rcnn_R-50-FPN_1x.yaml
@@ -0,0 +1,77 @@
+MODEL:
+  TYPE: keypoint_rcnn
+  CONV_BODY: FPN.add_fpn_ResNet50_conv5_body
+  NUM_CLASSES: 2
+  KEYPOINTS_ON: True
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 90000
+  STEPS: [0, 60000, 80000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True  # accidentally True; disable in the future
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.add_roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+KRCNN:
+  ROI_KEYPOINTS_HEAD: keypoint_rcnn_heads.add_roi_pose_head_v1convX
+  NUM_STACKED_CONVS: 8
+  NUM_KEYPOINTS: 17
+  USE_DECONV_OUTPUT: True
+  CONV_INIT: MSRAFill
+  CONV_HEAD_DIM: 512
+  UP_SCALE: 2
+  HEATMAP_SIZE: 56  # ROI_XFORM_RESOLUTION (14) * UP_SCALE (2) * USE_DECONV_OUTPUT (2)
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 14
+  ROI_XFORM_SAMPLING_RATIO: 2
+  KEYPOINT_CONFIDENCE: bbox
+TRAIN:
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl
+  DATASETS: ('keypoints_coco_2014_train', 'keypoints_coco_2014_valminusminival')
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_train/generalized_rcnn/rpn_proposals.pkl', 'https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_valminusminival/generalized_rcnn/rpn_proposals.pkl')
+  SCALES: (640, 672, 704, 736, 768, 800)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+TEST:
+  DATASETS: ('keypoints_coco_2014_minival',)
+  PROPOSAL_FILES: ('https://dl.fbaipublicfiles.com/detectron/35998996/12_2017_baselines/rpn_person_only_R-50-FPN_1x.yaml.08_10_08.0ZWmJm6F/output/test/keypoints_coco_2014_minival/generalized_rcnn/rpn_proposals.pkl',)
+  PROPOSAL_LIMIT: 1000
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  WEIGHTS: https://dl.fbaipublicfiles.com/detectron/37651887/12_2017_baselines/keypoint_rcnn_R-50-FPN_s1x.yaml.20_01_40.FDjUQ7VX/output/train/keypoints_coco_2014_train:keypoints_coco_2014_valminusminival/generalized_rcnn/model_final.pkl
+
+  # -- Test time augmentation example -- #
+  BBOX_AUG:
+    ENABLED: True
+    SCORE_HEUR: AVG
+    COORD_HEUR: AVG
+    H_FLIP: True
+    SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200)
+    MAX_SIZE: 2000
+    SCALE_H_FLIP: True
+    SCALE_SIZE_DEP: False
+    AREA_TH_LO: 2500  # 50^2
+    AREA_TH_HI: 32400  # 180^2
+  KPS_AUG:
+    ENABLED: True
+    HEUR: HM_AVG
+    H_FLIP: True
+    SCALES: (400, 500, 600, 700, 900, 1000, 1100, 1200)
+    MAX_SIZE: 2000
+    SCALE_H_FLIP: True
+    SCALE_SIZE_DEP: True
+    AREA_TH: 22500  # 150^2
+    ASPECT_RATIOS: ()
+    ASPECT_RATIO_H_FLIP: False
+  # -- Test time augmentation example -- #
+
+OUTPUT_DIR: .
diff --git a/configs/tools/convert_cityscapes_to_coco.py b/configs/tools/convert_cityscapes_to_coco.py
new file mode 100644
index 0000000000000000000000000000000000000000..3583eca1aef66f37dbc9621ff006cde5885233f2
--- /dev/null
+++ b/configs/tools/convert_cityscapes_to_coco.py
@@ -0,0 +1,219 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import argparse
+import h5py
+import json
+import os
+import imageio
+import sys
+
+import cityscapesscripts.evaluation.instances2dict_with_polygons as cs
+
+import detectron.utils.segms as segms_util
+import detectron.utils.boxes as bboxs_util
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Convert dataset')
+    parser.add_argument(
+        '--dataset', help="cocostuff, cityscapes", default=None, type=str)
+    parser.add_argument(
+        '--outdir', help="output dir for json files", default=None, type=str)
+    parser.add_argument(
+        '--datadir', help="data dir for annotations to be converted",
+        default=None, type=str)
+    if len(sys.argv) == 1:
+        parser.print_help()
+        sys.exit(1)
+    return parser.parse_args()
+
+
+def convert_coco_stuff_mat(data_dir, out_dir):
+    """Convert to png and save json with path. This currently only contains
+    the segmentation labels for objects+stuff in cocostuff - if we need to
+    combine with other labels from original COCO that will be a TODO."""
+    sets = ['train', 'val']
+    categories = []
+    json_name = 'coco_stuff_%s.json'
+    ann_dict = {}
+    for data_set in sets:
+        file_list = os.path.join(data_dir, '%s.txt')
+        images = []
+        with open(file_list % data_set) as f:
+            for img_id, img_name in enumerate(f):
+                img_name = img_name.replace('coco', 'COCO').strip('\n')
+                image = {}
+                mat_file = os.path.join(
+                    data_dir, 'annotations/%s.mat' % img_name)
+                data = h5py.File(mat_file, 'r')
+                labelMap = data.get('S')
+                if len(categories) == 0:
+                    labelNames = data.get('names')
+                    for idx, n in enumerate(labelNames):
+                        categories.append(
+                            {"id": idx, "name": ''.join(chr(i) for i in data[
+                                n[0]])})
+                    ann_dict['categories'] = categories
+                imageio.imsave(
+                    os.path.join(data_dir, img_name + '.png'), labelMap)
+                image['width'] = labelMap.shape[0]
+                image['height'] = labelMap.shape[1]
+                image['file_name'] = img_name
+                image['seg_file_name'] = img_name
+                image['id'] = img_id
+                images.append(image)
+        ann_dict['images'] = images
+        print("Num images: %s" % len(images))
+        with open(os.path.join(out_dir, json_name % data_set), 'wb') as outfile:
+            outfile.write(json.dumps(ann_dict))
+
+
+# for Cityscapes
+def getLabelID(self, instID):
+    if (instID < 1000):
+        return instID
+    else:
+        return int(instID / 1000)
+
+
+def convert_cityscapes_instance_only(
+        data_dir, out_dir):
+    """Convert from cityscapes format to COCO instance seg format - polygons"""
+    sets = [
+        'gtFine_val',
+        # 'gtFine_train',
+        # 'gtFine_test',
+
+        # 'gtCoarse_train',
+        # 'gtCoarse_val',
+        # 'gtCoarse_train_extra'
+    ]
+    ann_dirs = [
+        'gtFine_trainvaltest/gtFine/val',
+        # 'gtFine_trainvaltest/gtFine/train',
+        # 'gtFine_trainvaltest/gtFine/test',
+
+        # 'gtCoarse/train',
+        # 'gtCoarse/train_extra',
+        # 'gtCoarse/val'
+    ]
+    json_name = 'instancesonly_filtered_%s.json'
+    ends_in = '%s_polygons.json'
+    img_id = 0
+    ann_id = 0
+    cat_id = 1
+    category_dict = {}
+
+    category_instancesonly = [
+        'person',
+        'rider',
+        'car',
+        'truck',
+        'bus',
+        'train',
+        'motorcycle',
+        'bicycle',
+    ]
+
+    for data_set, ann_dir in zip(sets, ann_dirs):
+        print('Starting %s' % data_set)
+        ann_dict = {}
+        images = []
+        annotations = []
+        ann_dir = os.path.join(data_dir, ann_dir)
+        for root, _, files in os.walk(ann_dir):
+            for filename in files:
+                if filename.endswith(ends_in % data_set.split('_')[0]):
+                    if len(images) % 50 == 0:
+                        print("Processed %s images, %s annotations" % (
+                            len(images), len(annotations)))
+                    json_ann = json.load(open(os.path.join(root, filename)))
+                    image = {}
+                    image['id'] = img_id
+                    img_id += 1
+
+                    image['width'] = json_ann['imgWidth']
+                    image['height'] = json_ann['imgHeight']
+                    image['file_name'] = filename[:-len(
+                        ends_in % data_set.split('_')[0])] + 'leftImg8bit.png'
+                    image['seg_file_name'] = filename[:-len(
+                        ends_in % data_set.split('_')[0])] + \
+                        '%s_instanceIds.png' % data_set.split('_')[0]
+                    images.append(image)
+
+                    fullname = os.path.join(root, image['seg_file_name'])
+                    objects = cs.instances2dict_with_polygons(
+                        [fullname], verbose=False)[fullname]
+
+                    for object_cls in objects:
+                        if object_cls not in category_instancesonly:
+                            continue  # skip non-instance categories
+
+                        for obj in objects[object_cls]:
+                            if obj['contours'] == []:
+                                print('Warning: empty contours.')
+                                continue  # skip non-instance categories
+
+                            len_p = [len(p) for p in obj['contours']]
+                            if min(len_p) <= 4:
+                                print('Warning: invalid contours.')
+                                continue  # skip non-instance categories
+
+                            ann = {}
+                            ann['id'] = ann_id
+                            ann_id += 1
+                            ann['image_id'] = image['id']
+                            ann['segmentation'] = obj['contours']
+
+                            if object_cls not in category_dict:
+                                category_dict[object_cls] = cat_id
+                                cat_id += 1
+                            ann['category_id'] = category_dict[object_cls]
+                            ann['iscrowd'] = 0
+                            ann['area'] = obj['pixelCount']
+                            ann['bbox'] = bboxs_util.xyxy_to_xywh(
+                                segms_util.polys_to_boxes(
+                                    [ann['segmentation']])).tolist()[0]
+
+                            annotations.append(ann)
+
+        ann_dict['images'] = images
+        categories = [{"id": category_dict[name], "name": name} for name in
+                      category_dict]
+        ann_dict['categories'] = categories
+        ann_dict['annotations'] = annotations
+        print("Num categories: %s" % len(categories))
+        print("Num images: %s" % len(images))
+        print("Num annotations: %s" % len(annotations))
+        with open(os.path.join(out_dir, json_name % data_set), 'wb') as outfile:
+            outfile.write(json.dumps(ann_dict))
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    if args.dataset == "cityscapes_instance_only":
+        convert_cityscapes_instance_only(args.datadir, args.outdir)
+    elif args.dataset == "cocostuff":
+        convert_coco_stuff_mat(args.datadir, args.outdir)
+    else:
+        print("Dataset not supported: %s" % args.dataset)
diff --git a/configs/tools/convert_coco_model_to_cityscapes.py b/configs/tools/convert_coco_model_to_cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..11dec595af064db202b366351cabc7cffd16125c
--- /dev/null
+++ b/configs/tools/convert_coco_model_to_cityscapes.py
@@ -0,0 +1,128 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+# Convert a detection model trained for COCO into a model that can be fine-tuned
+# on cityscapes
+#
+# cityscapes_to_coco
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import argparse
+import numpy as np
+import os
+import sys
+
+import detectron.datasets.coco_to_cityscapes_id as cs
+from detectron.utils.io import load_object
+from detectron.utils.io import save_object
+
+NUM_CS_CLS = 9
+NUM_COCO_CLS = 81
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert a COCO pre-trained model for use with Cityscapes')
+    parser.add_argument(
+        '--coco_model', dest='coco_model_file_name',
+        help='Pretrained network weights file path',
+        default=None, type=str)
+    parser.add_argument(
+        '--convert_func', dest='convert_func',
+        help='Blob conversion function',
+        default='cityscapes_to_coco', type=str)
+    parser.add_argument(
+        '--output', dest='out_file_name',
+        help='Output file path',
+        default=None, type=str)
+
+    if len(sys.argv) == 1:
+        parser.print_help()
+        sys.exit(1)
+
+    args = parser.parse_args()
+    return args
+
+
+def convert_coco_blobs_to_cityscape_blobs(model_dict):
+    for k, v in model_dict['blobs'].items():
+        if v.shape[0] == NUM_COCO_CLS or v.shape[0] == 4 * NUM_COCO_CLS:
+            coco_blob = model_dict['blobs'][k]
+            print(
+                'Converting COCO blob {} with shape {}'.
+                format(k, coco_blob.shape)
+            )
+            cs_blob = convert_coco_blob_to_cityscapes_blob(
+                coco_blob, args.convert_func
+            )
+            print(' -> converted shape {}'.format(cs_blob.shape))
+            model_dict['blobs'][k] = cs_blob
+
+
+def convert_coco_blob_to_cityscapes_blob(coco_blob, convert_func):
+    # coco blob (81, ...) or (81*4, ...)
+    coco_shape = coco_blob.shape
+    leading_factor = int(coco_shape[0] / NUM_COCO_CLS)
+    tail_shape = list(coco_shape[1:])
+    assert leading_factor == 1 or leading_factor == 4
+
+    # Reshape in [num_classes, ...] form for easier manipulations
+    coco_blob = coco_blob.reshape([NUM_COCO_CLS, -1] + tail_shape)
+    # Default initialization uses Gaussian with mean and std to match the
+    # existing parameters
+    std = coco_blob.std()
+    mean = coco_blob.mean()
+    cs_shape = [NUM_CS_CLS] + list(coco_blob.shape[1:])
+    cs_blob = (np.random.randn(*cs_shape) * std + mean).astype(np.float32)
+
+    # Replace random parameters with COCO parameters if class mapping exists
+    for i in range(NUM_CS_CLS):
+        coco_cls_id = getattr(cs, convert_func)(i)
+        if coco_cls_id >= 0:  # otherwise ignore (rand init)
+            cs_blob[i] = coco_blob[coco_cls_id]
+
+    cs_shape = [NUM_CS_CLS * leading_factor] + tail_shape
+    return cs_blob.reshape(cs_shape)
+
+
+def remove_momentum(model_dict):
+    for k in model_dict['blobs'].keys():
+        if k.endswith('_momentum'):
+            del model_dict['blobs'][k]
+
+
+def load_and_convert_coco_model(args):
+    model_dict = load_object(args.coco_model_file_name)
+    remove_momentum(model_dict)
+    convert_coco_blobs_to_cityscape_blobs(model_dict)
+    return model_dict
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    print(args)
+    assert os.path.exists(args.coco_model_file_name), \
+        'Weights file does not exist'
+    weights = load_and_convert_coco_model(args)
+
+    save_object(weights, args.out_file_name)
+    print('Wrote blobs to {}:'.format(args.out_file_name))
+    print(sorted(weights['blobs'].keys()))
diff --git a/configs/tools/convert_pkl_to_pb.py b/configs/tools/convert_pkl_to_pb.py
new file mode 100644
index 0000000000000000000000000000000000000000..522d83896ed5c075cd4ac1c11c83abe3719d4325
--- /dev/null
+++ b/configs/tools/convert_pkl_to_pb.py
@@ -0,0 +1,696 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Script to convert the model (.yaml and .pkl) trained by train_net to a
+standard Caffe2 model in pb format (model.pb and model_init.pb). The converted
+model is good for production usage, as it could run independently and efficiently
+on CPU, GPU and mobile without depending on the detectron codebase.
+
+Please see Caffe2 tutorial (
+https://caffe2.ai/docs/tutorial-loading-pre-trained-models.html) for loading
+the converted model, and run_model_pb() for running the model for inference.
+"""
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import argparse
+import copy
+import os
+import pprint
+import sys
+
+import caffe2.python.utils as putils
+import cv2  # NOQA (Must import before importing caffe2 due to bug in cv2)
+import detectron.core.test_engine as test_engine
+import detectron.utils.blob as blob_utils
+import detectron.utils.c2 as c2_utils
+import detectron.utils.model_convert_utils as mutils
+import detectron.utils.vis as vis_utils
+import numpy as np
+from caffe2.caffe2.fb.predictor import predictor_exporter, predictor_py_utils
+from caffe2.proto import caffe2_pb2
+from caffe2.python import core, workspace
+from caffe2.python.predictor_constants import predictor_constants
+from detectron.core.config import (
+    assert_and_infer_cfg,
+    cfg,
+    merge_cfg_from_file,
+    merge_cfg_from_list,
+)
+from detectron.modeling import generate_anchors
+from detectron.utils.logging import setup_logging
+from detectron.utils.model_convert_utils import convert_op_in_proto, op_filter
+
+
+c2_utils.import_contrib_ops()
+c2_utils.import_detectron_ops()
+
+# OpenCL may be enabled by default in OpenCV3; disable it because it's not
+# thread safe and causes unwanted GPU memory allocations.
+cv2.ocl.setUseOpenCL(False)
+
+logger = setup_logging(__name__)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Convert a trained network to pb format"
+    )
+    parser.add_argument(
+        "--cfg", dest="cfg_file", help="optional config file", default=None, type=str
+    )
+    parser.add_argument(
+        "--net_name",
+        dest="net_name",
+        help="optional name for the net",
+        default="detectron",
+        type=str,
+    )
+    parser.add_argument(
+        "--out_dir", dest="out_dir", help="output dir", default=None, type=str
+    )
+    parser.add_argument(
+        "--test_img",
+        dest="test_img",
+        help="optional test image, used to verify the model conversion",
+        default=None,
+        type=str,
+    )
+    parser.add_argument(
+        "--fuse_af", dest="fuse_af", help="1 to fuse_af", default=1, type=int
+    )
+    parser.add_argument(
+        "--device",
+        dest="device",
+        help="Device to run the model on",
+        choices=["cpu", "gpu"],
+        default="cpu",
+        type=str,
+    )
+    parser.add_argument(
+        "--net_execution_type",
+        dest="net_execution_type",
+        help="caffe2 net execution type",
+        choices=["simple", "dag"],
+        default="simple",
+        type=str,
+    )
+    parser.add_argument(
+        "--use_nnpack",
+        dest="use_nnpack",
+        help="Use nnpack for conv",
+        default=1,
+        type=int,
+    )
+    parser.add_argument(
+        "--logdb",
+        dest="logdb",
+        help="output to logfiledb instead of pb files",
+        default=0,
+        type=int,
+    )
+    parser.add_argument(
+        "opts",
+        help="See detectron/core/config.py for all options",
+        default=None,
+        nargs=argparse.REMAINDER,
+    )
+    if len(sys.argv) == 1:
+        parser.print_help()
+        sys.exit(1)
+    ret = parser.parse_args()
+    ret.out_dir = os.path.abspath(ret.out_dir)
+    if ret.device == "gpu" and ret.use_nnpack:
+        logger.warn("Should not use mobile engine for gpu model.")
+        ret.use_nnpack = 0
+
+    return ret
+
+
+def unscope_name(name):
+    return c2_utils.UnscopeName(name)
+
+
+def reset_names(names):
+    for i in range(len(names)):
+        names[i] = unscope_name(names[i])
+
+
+def convert_collect_and_distribute(
+    op,
+    blobs,
+    roi_canonical_scale,
+    roi_canonical_level,
+    roi_max_level,
+    roi_min_level,
+    rpn_max_level,
+    rpn_min_level,
+    rpn_post_nms_topN,
+):
+    print(
+        "Converting CollectAndDistributeFpnRpnProposals"
+        " Python -> C++:\n{}".format(op)
+    )
+    assert op.name.startswith(
+        "CollectAndDistributeFpnRpnProposalsOp"
+    ), "Not valid CollectAndDistributeFpnRpnProposalsOp"
+
+    inputs = [x for x in op.input]
+    ret = core.CreateOperator(
+        "CollectAndDistributeFpnRpnProposals",
+        inputs,
+        list(op.output),
+        roi_canonical_scale=roi_canonical_scale,
+        roi_canonical_level=roi_canonical_level,
+        roi_max_level=roi_max_level,
+        roi_min_level=roi_min_level,
+        rpn_max_level=rpn_max_level,
+        rpn_min_level=rpn_min_level,
+        rpn_post_nms_topN=rpn_post_nms_topN,
+    )
+    return ret
+
+
+def convert_gen_proposals(
+    op, blobs, rpn_pre_nms_topN, rpn_post_nms_topN, rpn_nms_thresh, rpn_min_size
+):
+    print("Converting GenerateProposals Python -> C++:\n{}".format(op))
+    assert op.name.startswith("GenerateProposalsOp"), "Not valid GenerateProposalsOp"
+
+    spatial_scale = mutils.get_op_arg_valf(op, "spatial_scale", None)
+    assert spatial_scale is not None
+
+    lvl = int(op.input[0][-1]) if op.input[0][-1].isdigit() else None
+
+    inputs = [x for x in op.input]
+    anchor_name = "anchor{}".format(lvl) if lvl else "anchor"
+    inputs.append(anchor_name)
+    anchor_sizes = (
+        (cfg.FPN.RPN_ANCHOR_START_SIZE * 2.0 ** (lvl - cfg.FPN.RPN_MIN_LEVEL),)
+        if lvl
+        else cfg.RPN.SIZES
+    )
+    blobs[anchor_name] = get_anchors(spatial_scale, anchor_sizes)
+    print("anchors {}".format(blobs[anchor_name]))
+
+    ret = core.CreateOperator(
+        "GenerateProposals",
+        inputs,
+        list(op.output),
+        spatial_scale=spatial_scale,
+        pre_nms_topN=rpn_pre_nms_topN,
+        post_nms_topN=rpn_post_nms_topN,
+        nms_thresh=rpn_nms_thresh,
+        min_size=rpn_min_size,
+        correct_transform_coords=True,
+    )
+    return ret, anchor_name
+
+
+def get_anchors(spatial_scale, anchor_sizes):
+    anchors = generate_anchors.generate_anchors(
+        stride=1.0 / spatial_scale,
+        sizes=anchor_sizes,
+        aspect_ratios=cfg.RPN.ASPECT_RATIOS,
+    ).astype(np.float32)
+    return anchors
+
+
+def reset_blob_names(blobs):
+    ret = {unscope_name(x): blobs[x] for x in blobs}
+    blobs.clear()
+    blobs.update(ret)
+
+
+def convert_net(args, net, blobs):
+    @op_filter()
+    def convert_op_name(op):
+        if args.device != "gpu":
+            if op.engine != "DEPTHWISE_3x3":
+                op.engine = ""
+            op.device_option.CopyFrom(caffe2_pb2.DeviceOption())
+        reset_names(op.input)
+        reset_names(op.output)
+        return [op]
+
+    @op_filter(type="Python")
+    def convert_python(op):
+        if op.name.startswith("GenerateProposalsOp"):
+            gen_proposals_op, ext_input = convert_gen_proposals(
+                op,
+                blobs,
+                rpn_min_size=float(cfg.TEST.RPN_MIN_SIZE),
+                rpn_post_nms_topN=cfg.TEST.RPN_POST_NMS_TOP_N,
+                rpn_pre_nms_topN=cfg.TEST.RPN_PRE_NMS_TOP_N,
+                rpn_nms_thresh=cfg.TEST.RPN_NMS_THRESH,
+            )
+            net.external_input.extend([ext_input])
+            return [gen_proposals_op]
+        elif op.name.startswith("CollectAndDistributeFpnRpnProposalsOp"):
+            collect_dist_op = convert_collect_and_distribute(
+                op,
+                blobs,
+                roi_canonical_scale=cfg.FPN.ROI_CANONICAL_SCALE,
+                roi_canonical_level=cfg.FPN.ROI_CANONICAL_LEVEL,
+                roi_max_level=cfg.FPN.ROI_MAX_LEVEL,
+                roi_min_level=cfg.FPN.ROI_MIN_LEVEL,
+                rpn_max_level=cfg.FPN.RPN_MAX_LEVEL,
+                rpn_min_level=cfg.FPN.RPN_MIN_LEVEL,
+                rpn_post_nms_topN=cfg.TEST.RPN_POST_NMS_TOP_N,
+            )
+            return [collect_dist_op]
+        else:
+            raise ValueError("Failed to convert Python op {}".format(op.name))
+
+    # Only convert UpsampleNearest to ResizeNearest when converting to pb so that the existing models is unchanged
+    # https://github.com/facebookresearch/Detectron/pull/372#issuecomment-410248561
+    @op_filter(type="UpsampleNearest")
+    def convert_upsample_nearest(op):
+        for arg in op.arg:
+            if arg.name == "scale":
+                scale = arg.i
+                break
+        else:
+            raise KeyError('No attribute "scale" in UpsampleNearest op')
+        resize_nearest_op = core.CreateOperator(
+            "ResizeNearest",
+            list(op.input),
+            list(op.output),
+            name=op.name,
+            width_scale=float(scale),
+            height_scale=float(scale),
+        )
+        return resize_nearest_op
+
+    @op_filter()
+    def convert_rpn_rois(op):
+        for j in range(len(op.input)):
+            if op.input[j] == "rois":
+                print(
+                    "Converting op {} input name: rois -> rpn_rois:\n{}".format(
+                        op.type, op
+                    )
+                )
+                op.input[j] = "rpn_rois"
+        for j in range(len(op.output)):
+            if op.output[j] == "rois":
+                print(
+                    "Converting op {} output name: rois -> rpn_rois:\n{}".format(
+                        op.type, op
+                    )
+                )
+                op.output[j] = "rpn_rois"
+        return [op]
+
+    @op_filter(type_in=["StopGradient", "Alias"])
+    def convert_remove_op(op):
+        print("Removing op {}:\n{}".format(op.type, op))
+        return []
+
+    # We want to apply to all operators, including converted
+    # so run separately
+    convert_op_in_proto(net, convert_remove_op)
+    convert_op_in_proto(net, convert_upsample_nearest)
+    convert_op_in_proto(net, convert_python)
+    convert_op_in_proto(net, convert_op_name)
+    convert_op_in_proto(net, convert_rpn_rois)
+
+    reset_names(net.external_input)
+    reset_names(net.external_output)
+
+    reset_blob_names(blobs)
+
+
+def add_bbox_ops(args, net, blobs):
+    new_ops = []
+    new_external_outputs = []
+
+    # Operators for bboxes
+    op_box = core.CreateOperator(
+        "BBoxTransform",
+        ["rpn_rois", "bbox_pred", "im_info"],
+        ["pred_bbox"],
+        weights=cfg.MODEL.BBOX_REG_WEIGHTS,
+        apply_scale=False,
+        correct_transform_coords=True,
+    )
+    new_ops.extend([op_box])
+
+    blob_prob = "cls_prob"
+    blob_box = "pred_bbox"
+    op_nms = core.CreateOperator(
+        "BoxWithNMSLimit",
+        [blob_prob, blob_box],
+        ["score_nms", "bbox_nms", "class_nms"],
+        arg=[
+            putils.MakeArgument("score_thresh", cfg.TEST.SCORE_THRESH),
+            putils.MakeArgument("nms", cfg.TEST.NMS),
+            putils.MakeArgument("detections_per_im", cfg.TEST.DETECTIONS_PER_IM),
+            putils.MakeArgument("soft_nms_enabled", cfg.TEST.SOFT_NMS.ENABLED),
+            putils.MakeArgument("soft_nms_method", cfg.TEST.SOFT_NMS.METHOD),
+            putils.MakeArgument("soft_nms_sigma", cfg.TEST.SOFT_NMS.SIGMA),
+        ],
+    )
+    new_ops.extend([op_nms])
+    new_external_outputs.extend(["score_nms", "bbox_nms", "class_nms"])
+
+    net.Proto().op.extend(new_ops)
+    net.Proto().external_output.extend(new_external_outputs)
+
+
+def convert_model_gpu(args, net, init_net):
+    assert args.device == "gpu"
+
+    ret_net = copy.deepcopy(net)
+    ret_init_net = copy.deepcopy(init_net)
+
+    cdo_cuda = mutils.get_device_option_cuda()
+    cdo_cpu = mutils.get_device_option_cpu()
+
+    CPU_OPS = [
+        ["CollectAndDistributeFpnRpnProposals", None],
+        ["GenerateProposals", None],
+        ["BBoxTransform", None],
+        ["BoxWithNMSLimit", None],
+    ]
+    CPU_BLOBS = ["im_info", "anchor"]
+
+    @op_filter()
+    def convert_op_gpu(op):
+        for x in CPU_OPS:
+            if mutils.filter_op(op, type=x[0], inputs=x[1]):
+                return None
+        op.device_option.CopyFrom(cdo_cuda)
+        return [op]
+
+    @op_filter()
+    def convert_init_op_gpu(op):
+        if op.output[0] in CPU_BLOBS:
+            op.device_option.CopyFrom(cdo_cpu)
+        else:
+            op.device_option.CopyFrom(cdo_cuda)
+        return [op]
+
+    convert_op_in_proto(ret_init_net.Proto(), convert_init_op_gpu)
+    convert_op_in_proto(ret_net.Proto(), convert_op_gpu)
+
+    ret = core.InjectDeviceCopiesAmongNets([ret_init_net, ret_net])
+
+    return [ret[0][1], ret[0][0]]
+
+
+def gen_init_net(net, blobs, empty_blobs):
+    blobs = copy.deepcopy(blobs)
+    for x in empty_blobs:
+        blobs[x] = np.array([], dtype=np.float32)
+    init_net = mutils.gen_init_net_from_blobs(blobs, net.external_inputs)
+    init_net = core.Net(init_net)
+    return init_net
+
+
+def _save_image_graphs(args, all_net, all_init_net):
+    print("Saving model graph...")
+    mutils.save_graph(
+        all_net.Proto(), os.path.join(args.out_dir, "model_def.png"), op_only=False
+    )
+    print("Model def image saved to {}.".format(args.out_dir))
+
+
+def _save_models(all_net, all_init_net, args):
+    print("Writing converted model to {}...".format(args.out_dir))
+    fname = "model"
+
+    if not os.path.exists(args.out_dir):
+        os.makedirs(args.out_dir)
+
+    with open(os.path.join(args.out_dir, fname + ".pb"), "wb") as f:
+        f.write(all_net.Proto().SerializeToString())
+    with open(os.path.join(args.out_dir, fname + ".pbtxt"), "wb") as f:
+        f.write(str(all_net.Proto()))
+    with open(os.path.join(args.out_dir, fname + "_init.pb"), "wb") as f:
+        f.write(all_init_net.Proto().SerializeToString())
+
+    _save_image_graphs(args, all_net, all_init_net)
+
+
+def load_model(args):
+    model = test_engine.initialize_model_from_cfg(cfg.TEST.WEIGHTS)
+    blobs = mutils.get_ws_blobs()
+
+    return model, blobs
+
+
+def _get_result_blobs(check_blobs):
+    ret = {}
+    for x in check_blobs:
+        sn = core.ScopedName(x)
+        if workspace.HasBlob(sn):
+            ret[x] = workspace.FetchBlob(sn)
+        else:
+            ret[x] = None
+
+    return ret
+
+
+def _sort_results(boxes, segms, keypoints, classes):
+    indices = np.argsort(boxes[:, -1])[::-1]
+    if boxes is not None:
+        boxes = boxes[indices, :]
+    if segms is not None:
+        segms = [segms[x] for x in indices]
+    if keypoints is not None:
+        keypoints = [keypoints[x] for x in indices]
+    if classes is not None:
+        if isinstance(classes, list):
+            classes = [classes[x] for x in indices]
+        else:
+            classes = classes[indices]
+
+    return boxes, segms, keypoints, classes
+
+
+def run_model_cfg(args, im, check_blobs):
+    workspace.ResetWorkspace()
+    model, _ = load_model(args)
+    with c2_utils.NamedCudaScope(0):
+        cls_boxes, cls_segms, cls_keyps = test_engine.im_detect_all(
+            model, im, None, None
+        )
+
+    boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format(
+        cls_boxes, cls_segms, cls_keyps
+    )
+
+    # sort the results based on score for comparision
+    boxes, segms, keypoints, classes = _sort_results(boxes, segms, keypoints, classes)
+
+    # write final results back to workspace
+    def _ornone(res):
+        return np.array(res) if res is not None else np.array([], dtype=np.float32)
+
+    with c2_utils.NamedCudaScope(0):
+        workspace.FeedBlob(core.ScopedName("result_boxes"), _ornone(boxes))
+        workspace.FeedBlob(core.ScopedName("result_segms"), _ornone(segms))
+        workspace.FeedBlob(core.ScopedName("result_keypoints"), _ornone(keypoints))
+        workspace.FeedBlob(core.ScopedName("result_classids"), _ornone(classes))
+
+    # get result blobs
+    with c2_utils.NamedCudaScope(0):
+        ret = _get_result_blobs(check_blobs)
+
+    return ret
+
+
+def _prepare_blobs(im, pixel_means, target_size, max_size):
+    """ Reference: blob.prep_im_for_blob() """
+
+    im = im.astype(np.float32, copy=False)
+    im -= pixel_means
+    im_shape = im.shape
+
+    im_size_min = np.min(im_shape[0:2])
+    im_size_max = np.max(im_shape[0:2])
+    im_scale = float(target_size) / float(im_size_min)
+    if np.round(im_scale * im_size_max) > max_size:
+        im_scale = float(max_size) / float(im_size_max)
+    im = cv2.resize(
+        im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR
+    )
+
+    # Reuse code in blob_utils and fit FPN
+    blob = blob_utils.im_list_to_blob([im])
+
+    blobs = {}
+    blobs["data"] = blob
+    blobs["im_info"] = np.array(
+        [[blob.shape[2], blob.shape[3], im_scale]], dtype=np.float32
+    )
+    return blobs
+
+
+def run_model_pb(args, net, init_net, im, check_blobs):
+    workspace.ResetWorkspace()
+    workspace.RunNetOnce(init_net)
+    mutils.create_input_blobs_for_net(net.Proto())
+    workspace.CreateNet(net)
+
+    # input_blobs, _ = core_test._get_blobs(im, None)
+    input_blobs = _prepare_blobs(im, cfg.PIXEL_MEANS, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE)
+    gpu_blobs = []
+    if args.device == "gpu":
+        gpu_blobs = ["data"]
+    for k, v in input_blobs.items():
+        workspace.FeedBlob(
+            core.ScopedName(k),
+            v,
+            mutils.get_device_option_cuda()
+            if k in gpu_blobs
+            else mutils.get_device_option_cpu(),
+        )
+
+    try:
+        workspace.RunNet(net)
+        scores = workspace.FetchBlob("score_nms")
+        classids = workspace.FetchBlob("class_nms")
+        boxes = workspace.FetchBlob("bbox_nms")
+    except Exception as e:
+        print("Running pb model failed.\n{}".format(e))
+        # may not detect anything at all
+        R = 0
+        scores = np.zeros((R,), dtype=np.float32)
+        boxes = np.zeros((R, 4), dtype=np.float32)
+        classids = np.zeros((R,), dtype=np.float32)
+
+    boxes = np.column_stack((boxes, scores))
+
+    # sort the results based on score for comparision
+    boxes, _, _, classids = _sort_results(boxes, None, None, classids)
+
+    # write final result back to workspace
+    workspace.FeedBlob("result_boxes", boxes)
+    workspace.FeedBlob("result_classids", classids)
+
+    ret = _get_result_blobs(check_blobs)
+
+    return ret
+
+
+def verify_model(args, model_pb, test_img_file):
+    check_blobs = ["result_boxes", "result_classids"]  # result
+
+    print("Loading test file {}...".format(test_img_file))
+    test_img = cv2.imread(test_img_file)
+    assert test_img is not None
+
+    def _run_cfg_func(im, blobs):
+        return run_model_cfg(args, im, check_blobs)
+
+    def _run_pb_func(im, blobs):
+        return run_model_pb(args, model_pb[0], model_pb[1], im, check_blobs)
+
+    print("Checking models...")
+    assert mutils.compare_model(_run_cfg_func, _run_pb_func, test_img, check_blobs)
+
+
+def _export_to_logfiledb(args, net, init_net, inputs, out_file, extra_out_tensors=None):
+    out_tensors = list(net.Proto().external_output)
+    if extra_out_tensors is not None:
+        out_tensors += extra_out_tensors
+    params = list(set(net.Proto().external_input) - set(inputs))
+    net_type = None
+    predictor_export_meta = predictor_exporter.PredictorExportMeta(
+        predict_net=net,
+        parameters=params,
+        inputs=inputs,
+        outputs=out_tensors,
+        net_type=net_type,
+    )
+
+    logger.info("Exporting Caffe2 model to {}".format(out_file))
+    predictor_exporter.save_to_db(
+        db_type="log_file_db",
+        db_destination=out_file,
+        predictor_export_meta=predictor_export_meta,
+    )
+
+
+def main():
+    workspace.GlobalInit(["caffe2", "--caffe2_log_level=0"])
+    args = parse_args()
+    logger.info("Called with args:")
+    logger.info(args)
+    if args.cfg_file is not None:
+        merge_cfg_from_file(args.cfg_file)
+    if args.opts is not None:
+        merge_cfg_from_list(args.opts)
+    cfg.NUM_GPUS = 1
+    assert_and_infer_cfg()
+    logger.info("Converting model with config:")
+    logger.info(pprint.pformat(cfg))
+
+    # script will stop when it can't find an operator rather
+    # than stopping based on these flags
+    #
+    # assert not cfg.MODEL.KEYPOINTS_ON, "Keypoint model not supported."
+    # assert not cfg.MODEL.MASK_ON, "Mask model not supported."
+    # assert not cfg.FPN.FPN_ON, "FPN not supported."
+    # assert not cfg.RETINANET.RETINANET_ON, "RetinaNet model not supported."
+
+    # load model from cfg
+    model, blobs = load_model(args)
+
+    net = core.Net("")
+    net.Proto().op.extend(copy.deepcopy(model.net.Proto().op))
+    net.Proto().external_input.extend(copy.deepcopy(model.net.Proto().external_input))
+    net.Proto().external_output.extend(copy.deepcopy(model.net.Proto().external_output))
+    net.Proto().type = args.net_execution_type
+    net.Proto().num_workers = 1 if args.net_execution_type == "simple" else 4
+
+    # Reset the device_option, change to unscope name and replace python operators
+    convert_net(args, net.Proto(), blobs)
+
+    # add operators for bbox
+    add_bbox_ops(args, net, blobs)
+
+    if args.fuse_af:
+        print("Fusing affine channel...")
+        net, blobs = mutils.fuse_net_affine(net, blobs)
+
+    if args.use_nnpack:
+        mutils.update_mobile_engines(net.Proto())
+
+    # generate init net
+    empty_blobs = ["data", "im_info"]
+    init_net = gen_init_net(net, blobs, empty_blobs)
+
+    if args.device == "gpu":
+        [net, init_net] = convert_model_gpu(args, net, init_net)
+
+    net.Proto().name = args.net_name
+    init_net.Proto().name = args.net_name + "_init"
+
+    if args.test_img is not None:
+        verify_model(args, [net, init_net], args.test_img)
+
+    if args.logdb == 1:
+        output_file = os.path.join(args.out_dir, "model.logfiledb")
+        _export_to_logfiledb(args, net, init_net, empty_blobs, output_file)
+    else:
+        _save_models(net, init_net, args)
+
+if __name__ == "__main__":
+    main()
diff --git a/configs/tools/convert_selective_search.py b/configs/tools/convert_selective_search.py
new file mode 100644
index 0000000000000000000000000000000000000000..c98ae74a23f8204c0ae9ddb9ec4b1f9c79a9f3ec
--- /dev/null
+++ b/configs/tools/convert_selective_search.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Script to convert Selective Search proposal boxes into the Detectron proposal
+file format.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import numpy as np
+import scipy.io as sio
+import sys
+
+from detectron.datasets.json_dataset import JsonDataset
+from detectron.utils.io import save_object
+
+
+if __name__ == '__main__':
+    dataset_name = sys.argv[1]
+    file_in = sys.argv[2]
+    file_out = sys.argv[3]
+
+    ds = JsonDataset(dataset_name)
+    roidb = ds.get_roidb()
+    raw_data = sio.loadmat(file_in)['boxes'].ravel()
+    assert raw_data.shape[0] == len(roidb)
+
+    boxes = []
+    scores = []
+    ids = []
+    for i in range(raw_data.shape[0]):
+        if i % 1000 == 0:
+            print('{}/{}'.format(i + 1, len(roidb)))
+        # selective search boxes are 1-indexed and (y1, x1, y2, x2)
+        i_boxes = raw_data[i][:, (1, 0, 3, 2)] - 1
+        boxes.append(i_boxes.astype(np.float32))
+        scores.append(np.zeros((i_boxes.shape[0]), dtype=np.float32))
+        ids.append(roidb[i]['id'])
+
+    save_object(dict(boxes=boxes, scores=scores, indexes=ids), file_out)
diff --git a/configs/tools/generate_testdev_from_test.py b/configs/tools/generate_testdev_from_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d4b515cc0be732675ca1ca51d31649c93754bcb
--- /dev/null
+++ b/configs/tools/generate_testdev_from_test.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Given a full set of results (boxes, masks, or keypoints) on the 2017 COCO
+test set, this script extracts the results subset that corresponds to 2017
+test-dev. The test-dev subset can then be submitted to the COCO evaluation
+server.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import argparse
+import json
+import os
+import sys
+
+from detectron.datasets.dataset_catalog import get_ann_fn
+from detectron.utils.timer import Timer
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--json', dest='json_file',
+        help='detections json file',
+        default='', type=str)
+    parser.add_argument(
+        '--output-dir', dest='output_dir',
+        help='output directory',
+        default='/tmp', type=str)
+    if len(sys.argv) == 1:
+        parser.print_help()
+        sys.exit(1)
+    args = parser.parse_args()
+    return args
+
+
+def convert(json_file, output_dir):
+    print('Reading: {}'.format(json_file))
+    with open(json_file, 'r') as fid:
+        dt = json.load(fid)
+    print('done!')
+
+    test_image_info = get_ann_fn('coco_2017_test')
+    with open(test_image_info, 'r') as fid:
+        info_test = json.load(fid)
+    image_test = info_test['images']
+    image_test_id = [i['id'] for i in image_test]
+    print('{} has {} images'.format(test_image_info, len(image_test_id)))
+
+    test_dev_image_info = get_ann_fn('coco_2017_test-dev')
+    with open(test_dev_image_info, 'r') as fid:
+        info_testdev = json.load(fid)
+    image_testdev = info_testdev['images']
+    image_testdev_id = [i['id'] for i in image_testdev]
+    print('{} has {} images'.format(test_dev_image_info, len(image_testdev_id)))
+
+    dt_testdev = []
+    print('Filtering test-dev from test...')
+    t = Timer()
+    t.tic()
+    for i in range(len(dt)):
+        if i % 1000 == 0:
+            print('{}/{}'.format(i, len(dt)))
+        if dt[i]['image_id'] in image_testdev_id:
+            dt_testdev.append(dt[i])
+    print('Done filtering ({:2}s)!'.format(t.toc()))
+
+    filename, file_extension = os.path.splitext(os.path.basename(json_file))
+    filename = filename + '_test-dev'
+    filename = os.path.join(output_dir, filename + file_extension)
+    with open(filename, 'w') as fid:
+        info_test = json.dump(dt_testdev, fid)
+    print('Done writing: {}!'.format(filename))
+
+
+if __name__ == '__main__':
+    opts = parse_args()
+    convert(opts.json_file, opts.output_dir)
diff --git a/configs/tools/infer.py b/configs/tools/infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..1c0199624a0f7882ef89c28205ebe949d2b30cb5
--- /dev/null
+++ b/configs/tools/infer.py
@@ -0,0 +1,198 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Perform inference on a single image or all images with a certain extension
+(e.g., .jpg) in a folder. Allows for using a combination of multiple models.
+For example, one model may be used for RPN, another model for Fast R-CNN style
+box detection, yet another model to predict masks, and yet another model to
+predict keypoints.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import argparse
+import cv2  # NOQA (Must import before importing caffe2 due to bug in cv2)
+import logging
+import os
+import sys
+
+from caffe2.python import workspace
+
+from detectron.core.config import assert_and_infer_cfg
+from detectron.core.config import cfg
+from detectron.core.config import load_cfg
+from detectron.core.config import merge_cfg_from_cfg
+from detectron.core.config import merge_cfg_from_file
+from detectron.utils.io import cache_url
+from detectron.utils.logging import setup_logging
+import detectron.core.rpn_generator as rpn_engine
+import detectron.core.test_engine as model_engine
+import detectron.datasets.dummy_datasets as dummy_datasets
+import detectron.utils.c2 as c2_utils
+import detectron.utils.env as envu
+import detectron.utils.vis as vis_utils
+
+c2_utils.import_detectron_ops()
+
+# OpenCL may be enabled by default in OpenCV3; disable it because it's not
+# thread safe and causes unwanted GPU memory allocations.
+cv2.ocl.setUseOpenCL(False)
+
+# infer.py
+#   --im [path/to/image.jpg] \
+#   --rpn-model [path/to/rpn/model.pkl] \
+#   --rpn-cfg [path/to/rpn/config.yaml] \
+#   --output-dir [path/to/output/dir] \
+#   [model1] [config1] [model2] [config2] ...
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Inference on an image')
+    parser.add_argument(
+        '--im', dest='im_file', help='input image', default=None, type=str
+    )
+    parser.add_argument(
+        '--rpn-pkl',
+        dest='rpn_pkl',
+        help='rpn model file (pkl)',
+        default=None,
+        type=str
+    )
+    parser.add_argument(
+        '--rpn-cfg',
+        dest='rpn_cfg',
+        help='cfg model file (yaml)',
+        default=None,
+        type=str
+    )
+    parser.add_argument(
+        '--output-dir',
+        dest='output_dir',
+        help='directory for visualization pdfs (default: /tmp/infer)',
+        default='/tmp/infer',
+        type=str
+    )
+    parser.add_argument(
+        'models_to_run',
+        help='pairs of models & configs, listed like so: [pkl1] [yaml1] [pkl2] [yaml2] ...',
+        default=None,
+        nargs=argparse.REMAINDER
+    )
+    if len(sys.argv) == 1:
+        parser.print_help()
+        sys.exit(1)
+    return parser.parse_args()
+
+
+def get_rpn_box_proposals(im, args):
+    cfg.immutable(False)
+    merge_cfg_from_file(args.rpn_cfg)
+    cfg.NUM_GPUS = 1
+    cfg.MODEL.RPN_ONLY = True
+    cfg.TEST.RPN_PRE_NMS_TOP_N = 10000
+    cfg.TEST.RPN_POST_NMS_TOP_N = 2000
+    assert_and_infer_cfg(cache_urls=False)
+
+    model = model_engine.initialize_model_from_cfg(args.rpn_pkl)
+    with c2_utils.NamedCudaScope(0):
+        boxes, scores = rpn_engine.im_proposals(model, im)
+    return boxes, scores
+
+
+def main(args):
+    logger = logging.getLogger(__name__)
+    dummy_coco_dataset = dummy_datasets.get_coco_dataset()
+    cfg_orig = load_cfg(envu.yaml_dump(cfg))
+    im = cv2.imread(args.im_file)
+
+    if args.rpn_pkl is not None:
+        proposal_boxes, _proposal_scores = get_rpn_box_proposals(im, args)
+        workspace.ResetWorkspace()
+    else:
+        proposal_boxes = None
+
+    cls_boxes, cls_segms, cls_keyps = None, None, None
+    for i in range(0, len(args.models_to_run), 2):
+        pkl = args.models_to_run[i]
+        yml = args.models_to_run[i + 1]
+        cfg.immutable(False)
+        merge_cfg_from_cfg(cfg_orig)
+        merge_cfg_from_file(yml)
+        if len(pkl) > 0:
+            weights_file = pkl
+        else:
+            weights_file = cfg.TEST.WEIGHTS
+        cfg.NUM_GPUS = 1
+        assert_and_infer_cfg(cache_urls=False)
+        model = model_engine.initialize_model_from_cfg(weights_file)
+        with c2_utils.NamedCudaScope(0):
+            cls_boxes_, cls_segms_, cls_keyps_ = \
+                model_engine.im_detect_all(model, im, proposal_boxes)
+        cls_boxes = cls_boxes_ if cls_boxes_ is not None else cls_boxes
+        cls_segms = cls_segms_ if cls_segms_ is not None else cls_segms
+        cls_keyps = cls_keyps_ if cls_keyps_ is not None else cls_keyps
+        workspace.ResetWorkspace()
+
+    out_name = os.path.join(
+        args.output_dir, '{}'.format(os.path.basename(args.im_file) + '.pdf')
+    )
+    logger.info('Processing {} -> {}'.format(args.im_file, out_name))
+
+    vis_utils.vis_one_image(
+        im[:, :, ::-1],
+        args.im_file,
+        args.output_dir,
+        cls_boxes,
+        cls_segms,
+        cls_keyps,
+        dataset=dummy_coco_dataset,
+        box_alpha=0.3,
+        show_class=True,
+        thresh=0.7,
+        kp_thresh=2
+    )
+
+
+def check_args(args):
+    assert (
+        (args.rpn_pkl is not None and args.rpn_cfg is not None) or
+        (args.rpn_pkl is None and args.rpn_cfg is None)
+    )
+    if args.rpn_pkl is not None:
+        args.rpn_pkl = cache_url(args.rpn_pkl, cfg.DOWNLOAD_CACHE)
+        assert os.path.exists(args.rpn_pkl)
+        assert os.path.exists(args.rpn_cfg)
+    if args.models_to_run is not None:
+        assert len(args.models_to_run) % 2 == 0
+        for i, model_file in enumerate(args.models_to_run):
+            if len(model_file) > 0:
+                if i % 2 == 0:
+                    model_file = cache_url(model_file, cfg.DOWNLOAD_CACHE)
+                    args.models_to_run[i] = model_file
+                assert os.path.exists(model_file), \
+                    '\'{}\' does not exist'.format(model_file)
+
+
+if __name__ == '__main__':
+    workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
+    setup_logging(__name__)
+    args = parse_args()
+    check_args(args)
+    main(args)
diff --git a/configs/tools/infer_simple.py b/configs/tools/infer_simple.py
new file mode 100644
index 0000000000000000000000000000000000000000..63506babfaadc22efe4c60245c5f9aacf3737638
--- /dev/null
+++ b/configs/tools/infer_simple.py
@@ -0,0 +1,185 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Perform inference on a single image or all images with a certain extension
+(e.g., .jpg) in a folder.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from collections import defaultdict
+import argparse
+import cv2  # NOQA (Must import before importing caffe2 due to bug in cv2)
+import glob
+import logging
+import os
+import sys
+import time
+
+from caffe2.python import workspace
+
+from detectron.core.config import assert_and_infer_cfg
+from detectron.core.config import cfg
+from detectron.core.config import merge_cfg_from_file
+from detectron.utils.io import cache_url
+from detectron.utils.logging import setup_logging
+from detectron.utils.timer import Timer
+import detectron.core.test_engine as infer_engine
+import detectron.datasets.dummy_datasets as dummy_datasets
+import detectron.utils.c2 as c2_utils
+import detectron.utils.vis as vis_utils
+
+c2_utils.import_detectron_ops()
+
+# OpenCL may be enabled by default in OpenCV3; disable it because it's not
+# thread safe and causes unwanted GPU memory allocations.
+cv2.ocl.setUseOpenCL(False)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='End-to-end inference')
+    parser.add_argument(
+        '--cfg',
+        dest='cfg',
+        help='cfg model file (/path/to/model_config.yaml)',
+        default=None,
+        type=str
+    )
+    parser.add_argument(
+        '--wts',
+        dest='weights',
+        help='weights model file (/path/to/model_weights.pkl)',
+        default=None,
+        type=str
+    )
+    parser.add_argument(
+        '--output-dir',
+        dest='output_dir',
+        help='directory for visualization pdfs (default: /tmp/infer_simple)',
+        default='/tmp/infer_simple',
+        type=str
+    )
+    parser.add_argument(
+        '--image-ext',
+        dest='image_ext',
+        help='image file name extension (default: jpg)',
+        default='jpg',
+        type=str
+    )
+    parser.add_argument(
+        '--always-out',
+        dest='out_when_no_box',
+        help='output image even when no object is found',
+        action='store_true'
+    )
+    parser.add_argument(
+        '--output-ext',
+        dest='output_ext',
+        help='output image file format (default: pdf)',
+        default='pdf',
+        type=str
+    )
+    parser.add_argument(
+        '--thresh',
+        dest='thresh',
+        help='Threshold for visualizing detections',
+        default=0.7,
+        type=float
+    )
+    parser.add_argument(
+        '--kp-thresh',
+        dest='kp_thresh',
+        help='Threshold for visualizing keypoints',
+        default=2.0,
+        type=float
+    )
+    parser.add_argument(
+        'im_or_folder', help='image or folder of images', default=None
+    )
+    if len(sys.argv) == 1:
+        parser.print_help()
+        sys.exit(1)
+    return parser.parse_args()
+
+
+def main(args):
+    logger = logging.getLogger(__name__)
+
+    merge_cfg_from_file(args.cfg)
+    cfg.NUM_GPUS = 1
+    args.weights = cache_url(args.weights, cfg.DOWNLOAD_CACHE)
+    assert_and_infer_cfg(cache_urls=False)
+
+    assert not cfg.MODEL.RPN_ONLY, \
+        'RPN models are not supported'
+    assert not cfg.TEST.PRECOMPUTED_PROPOSALS, \
+        'Models that require precomputed proposals are not supported'
+
+    model = infer_engine.initialize_model_from_cfg(args.weights)
+    dummy_coco_dataset = dummy_datasets.get_coco_dataset()
+
+    if os.path.isdir(args.im_or_folder):
+        im_list = glob.iglob(args.im_or_folder + '/*.' + args.image_ext)
+    else:
+        im_list = [args.im_or_folder]
+
+    for i, im_name in enumerate(im_list):
+        out_name = os.path.join(
+            args.output_dir, '{}'.format(os.path.basename(im_name) + '.' + args.output_ext)
+        )
+        logger.info('Processing {} -> {}'.format(im_name, out_name))
+        im = cv2.imread(im_name)
+        timers = defaultdict(Timer)
+        t = time.time()
+        with c2_utils.NamedCudaScope(0):
+            cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all(
+                model, im, None, timers=timers
+            )
+        logger.info('Inference time: {:.3f}s'.format(time.time() - t))
+        for k, v in timers.items():
+            logger.info(' | {}: {:.3f}s'.format(k, v.average_time))
+        if i == 0:
+            logger.info(
+                ' \ Note: inference on the first image will be slower than the '
+                'rest (caches and auto-tuning need to warm up)'
+            )
+
+        vis_utils.vis_one_image(
+            im[:, :, ::-1],  # BGR -> RGB for visualization
+            im_name,
+            args.output_dir,
+            cls_boxes,
+            cls_segms,
+            cls_keyps,
+            dataset=dummy_coco_dataset,
+            box_alpha=0.3,
+            show_class=True,
+            thresh=args.thresh,
+            kp_thresh=args.kp_thresh,
+            ext=args.output_ext,
+            out_when_no_box=args.out_when_no_box
+        )
+
+
+if __name__ == '__main__':
+    workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
+    setup_logging(__name__)
+    args = parse_args()
+    main(args)
diff --git a/configs/tools/pickle_caffe_blobs.py b/configs/tools/pickle_caffe_blobs.py
new file mode 100644
index 0000000000000000000000000000000000000000..e8dc238f421bece6c5ef1bcf028f66c34b42ed8f
--- /dev/null
+++ b/configs/tools/pickle_caffe_blobs.py
@@ -0,0 +1,224 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Script for converting Caffe (<= 1.0) models into the the simple state dict
+format used by Detectron. For example, this script can convert the orignal
+ResNet models released by MSRA.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import argparse
+import numpy as np
+import os
+import sys
+
+from caffe.proto import caffe_pb2
+from caffe2.proto import caffe2_pb2
+from caffe2.python import caffe_translator
+from caffe2.python import utils
+from google.protobuf import text_format
+
+from detectron.utils.io import save_object
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Dump weights from a Caffe model'
+    )
+    parser.add_argument(
+        '--prototxt',
+        dest='prototxt_file_name',
+        help='Network definition prototxt file path',
+        default=None,
+        type=str
+    )
+    parser.add_argument(
+        '--caffemodel',
+        dest='caffemodel_file_name',
+        help='Pretrained network weights file path',
+        default=None,
+        type=str
+    )
+    parser.add_argument(
+        '--output',
+        dest='out_file_name',
+        help='Output file path',
+        default=None,
+        type=str
+    )
+
+    if len(sys.argv) == 1:
+        parser.print_help()
+        sys.exit(1)
+
+    args = parser.parse_args()
+    return args
+
+
+def normalize_resnet_name(name):
+    if name.find('res') == 0 and name.find('res_') == -1:
+        # E.g.,
+        #  res4b11_branch2c -> res4_11_branch2c
+        #  res2a_branch1 -> res2_0_branch1
+        chunk = name[len('res'):name.find('_')]
+        name = (
+            'res' + chunk[0] + '_' + str(
+                int(chunk[2:]) if len(chunk) > 2  # e.g., "b1" -> 1
+                else ord(chunk[1]) - ord('a')
+            ) +  # e.g., "a" -> 0
+            name[name.find('_'):]
+        )
+    return name
+
+
+def pickle_weights(out_file_name, weights):
+    blobs = {
+        normalize_resnet_name(blob.name): utils.Caffe2TensorToNumpyArray(blob)
+        for blob in weights.protos
+    }
+    save_object(blobs, out_file_name)
+    print('Wrote blobs:')
+    print(sorted(blobs.keys()))
+
+
+def add_missing_biases(caffenet_weights):
+    for layer in caffenet_weights.layer:
+        if layer.type == 'Convolution' and len(layer.blobs) == 1:
+            num_filters = layer.blobs[0].shape.dim[0]
+            bias_blob = caffe_pb2.BlobProto()
+            bias_blob.data.extend(np.zeros(num_filters))
+            bias_blob.num, bias_blob.channels, bias_blob.height = 1, 1, 1
+            bias_blob.width = num_filters
+            layer.blobs.extend([bias_blob])
+
+
+def remove_spatial_bn_layers(caffenet, caffenet_weights):
+    # Layer types associated with spatial batch norm
+    remove_types = ['BatchNorm', 'Scale']
+
+    def _remove_layers(net):
+        for i in reversed(range(len(net.layer))):
+            if net.layer[i].type in remove_types:
+                net.layer.pop(i)
+
+    # First remove layers from caffenet proto
+    _remove_layers(caffenet)
+    # We'll return these so we can save the batch norm parameters
+    bn_layers = [
+        layer for layer in caffenet_weights.layer if layer.type in remove_types
+    ]
+    _remove_layers(caffenet_weights)
+
+    def _create_tensor(arr, shape, name):
+        t = caffe2_pb2.TensorProto()
+        t.name = name
+        t.data_type = caffe2_pb2.TensorProto.FLOAT
+        t.dims.extend(shape.dim)
+        t.float_data.extend(arr)
+        assert len(t.float_data) == np.prod(t.dims), 'Data size, shape mismatch'
+        return t
+
+    bn_tensors = []
+    for (bn, scl) in zip(bn_layers[0::2], bn_layers[1::2]):
+        assert bn.name[len('bn'):] == scl.name[len('scale'):], 'Pair mismatch'
+        blob_out = 'res' + bn.name[len('bn'):] + '_bn'
+        bn_mean = np.asarray(bn.blobs[0].data)
+        bn_var = np.asarray(bn.blobs[1].data)
+        scale = np.asarray(scl.blobs[0].data)
+        bias = np.asarray(scl.blobs[1].data)
+        std = np.sqrt(bn_var + 1e-5)
+        new_scale = scale / std
+        new_bias = bias - bn_mean * scale / std
+        new_scale_tensor = _create_tensor(
+            new_scale, bn.blobs[0].shape, blob_out + '_s'
+        )
+        new_bias_tensor = _create_tensor(
+            new_bias, bn.blobs[0].shape, blob_out + '_b'
+        )
+        bn_tensors.extend([new_scale_tensor, new_bias_tensor])
+    return bn_tensors
+
+
+def remove_layers_without_parameters(caffenet, caffenet_weights):
+    for i in reversed(range(len(caffenet_weights.layer))):
+        if len(caffenet_weights.layer[i].blobs) == 0:
+            # Search for the corresponding layer in caffenet and remove it
+            name = caffenet_weights.layer[i].name
+            found = False
+            for j in range(len(caffenet.layer)):
+                if caffenet.layer[j].name == name:
+                    caffenet.layer.pop(j)
+                    found = True
+                    break
+            if not found and name[-len('_split'):] != '_split':
+                print('Warning: layer {} not found in caffenet'.format(name))
+            caffenet_weights.layer.pop(i)
+
+
+def normalize_shape(caffenet_weights):
+    for layer in caffenet_weights.layer:
+        for blob in layer.blobs:
+            shape = (blob.num, blob.channels, blob.height, blob.width)
+            if len(blob.data) != np.prod(shape):
+                shape = tuple(blob.shape.dim)
+                if len(shape) == 1:
+                    # Handle biases
+                    shape = (1, 1, 1, shape[0])
+                if len(shape) == 2:
+                    # Handle InnerProduct layers
+                    shape = (1, 1, shape[0], shape[1])
+                assert len(shape) == 4
+                blob.num, blob.channels, blob.height, blob.width = shape
+
+
+def load_and_convert_caffe_model(prototxt_file_name, caffemodel_file_name):
+    caffenet = caffe_pb2.NetParameter()
+    caffenet_weights = caffe_pb2.NetParameter()
+    text_format.Merge(open(prototxt_file_name).read(), caffenet)
+    caffenet_weights.ParseFromString(open(caffemodel_file_name).read())
+    # C2 conv layers current require biases, but they are optional in C1
+    # Add zeros as biases is they are missing
+    add_missing_biases(caffenet_weights)
+    # We only care about getting parameters, so remove layers w/o parameters
+    remove_layers_without_parameters(caffenet, caffenet_weights)
+    # BatchNorm is not implemented in the translator *and* we need to fold Scale
+    # layers into the new C2 SpatialBN op, hence we remove the batch norm layers
+    # and apply custom translations code
+    bn_weights = remove_spatial_bn_layers(caffenet, caffenet_weights)
+    # Set num, channel, height and width for blobs that use shape.dim instead
+    normalize_shape(caffenet_weights)
+    # Translate the rest of the model
+    net, pretrained_weights = caffe_translator.TranslateModel(
+        caffenet, caffenet_weights
+    )
+    pretrained_weights.protos.extend(bn_weights)
+    return net, pretrained_weights
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    assert os.path.exists(args.prototxt_file_name), \
+        'Prototxt file does not exist'
+    assert os.path.exists(args.caffemodel_file_name), \
+        'Weights file does not exist'
+    net, weights = load_and_convert_caffe_model(
+        args.prototxt_file_name, args.caffemodel_file_name
+    )
+    pickle_weights(args.out_file_name, weights)
diff --git a/configs/tools/reval.py b/configs/tools/reval.py
new file mode 100644
index 0000000000000000000000000000000000000000..c8138a97db7971d567379ce5db04749389ea9333
--- /dev/null
+++ b/configs/tools/reval.py
@@ -0,0 +1,111 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+#
+# Based on:
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+
+"""Reval = re-eval. Re-evaluate saved detections."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import argparse
+import os
+import sys
+
+from detectron.core.config import cfg
+from detectron.datasets import task_evaluation
+from detectron.datasets.json_dataset import JsonDataset
+from detectron.utils.io import load_object
+from detectron.utils.logging import setup_logging
+import detectron.core.config as core_config
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Re-evaluate results')
+    parser.add_argument(
+        'output_dir', nargs=1, help='results directory', type=str
+    )
+    parser.add_argument(
+        '--dataset',
+        dest='dataset_name',
+        help='dataset to re-evaluate',
+        default='voc_2007_test',
+        type=str
+    )
+    parser.add_argument(
+        '--matlab',
+        dest='matlab_eval',
+        help='use matlab for evaluation',
+        action='store_true'
+    )
+    parser.add_argument(
+        '--comp',
+        dest='comp_mode',
+        help='competition mode',
+        action='store_true'
+    )
+    parser.add_argument(
+        '--cfg',
+        dest='cfg_file',
+        help='optional config file',
+        default=None,
+        type=str
+    )
+
+    if len(sys.argv) == 1:
+        parser.print_help()
+        sys.exit(1)
+
+    args = parser.parse_args()
+    return args
+
+
+def do_reval(dataset_name, output_dir, args):
+    dataset = JsonDataset(dataset_name)
+    dets = load_object(os.path.join(output_dir, 'detections.pkl'))
+
+    # Override config with the one saved in the detections file
+    if args.cfg_file is not None:
+        core_config.merge_cfg_from_cfg(core_config.load_cfg(dets['cfg']))
+    else:
+        core_config._merge_a_into_b(core_config.load_cfg(dets['cfg']), cfg)
+    results = task_evaluation.evaluate_all(
+        dataset,
+        dets['all_boxes'],
+        dets['all_segms'],
+        dets['all_keyps'],
+        output_dir,
+        use_matlab=args.matlab_eval
+    )
+    task_evaluation.log_copy_paste_friendly_results(results)
+
+
+if __name__ == '__main__':
+    setup_logging(__name__)
+    args = parse_args()
+    if args.comp_mode:
+        cfg.TEST.COMPETITION_MODE = True
+    output_dir = os.path.abspath(args.output_dir[0])
+    do_reval(args.dataset_name, output_dir, args)
diff --git a/configs/tools/test_net.py b/configs/tools/test_net.py
new file mode 100644
index 0000000000000000000000000000000000000000..4afa4c60522c96d29b85af8373de669e4c15dc4e
--- /dev/null
+++ b/configs/tools/test_net.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Perform inference on one or more datasets."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import argparse
+import cv2  # NOQA (Must import before importing caffe2 due to bug in cv2)
+import os
+import pprint
+import sys
+import time
+
+from caffe2.python import workspace
+
+from detectron.core.config import assert_and_infer_cfg
+from detectron.core.config import cfg
+from detectron.core.config import merge_cfg_from_file
+from detectron.core.config import merge_cfg_from_list
+from detectron.core.test_engine import run_inference
+from detectron.utils.logging import setup_logging
+import detectron.utils.c2 as c2_utils
+
+c2_utils.import_detectron_ops()
+
+# OpenCL may be enabled by default in OpenCV3; disable it because it's not
+# thread safe and causes unwanted GPU memory allocations.
+cv2.ocl.setUseOpenCL(False)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Test a Fast R-CNN network')
+    parser.add_argument(
+        '--cfg',
+        dest='cfg_file',
+        help='optional config file',
+        default=None,
+        type=str
+    )
+    parser.add_argument(
+        '--wait',
+        dest='wait',
+        help='wait until net file exists',
+        default=True,
+        type=bool
+    )
+    parser.add_argument(
+        '--vis', dest='vis', help='visualize detections', action='store_true'
+    )
+    parser.add_argument(
+        '--multi-gpu-testing',
+        dest='multi_gpu_testing',
+        help='using cfg.NUM_GPUS for inference',
+        action='store_true'
+    )
+    parser.add_argument(
+        '--range',
+        dest='range',
+        help='start (inclusive) and end (exclusive) indices',
+        default=None,
+        type=int,
+        nargs=2
+    )
+    parser.add_argument(
+        'opts',
+        help='See detectron/core/config.py for all options',
+        default=None,
+        nargs=argparse.REMAINDER
+    )
+    if len(sys.argv) == 1:
+        parser.print_help()
+        sys.exit(1)
+    return parser.parse_args()
+
+
+if __name__ == '__main__':
+    workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
+    logger = setup_logging(__name__)
+    args = parse_args()
+    logger.info('Called with args:')
+    logger.info(args)
+    if args.cfg_file is not None:
+        merge_cfg_from_file(args.cfg_file)
+    if args.opts is not None:
+        merge_cfg_from_list(args.opts)
+    assert_and_infer_cfg()
+    logger.info('Testing with config:')
+    logger.info(pprint.pformat(cfg))
+
+    while not os.path.exists(cfg.TEST.WEIGHTS) and args.wait:
+        logger.info('Waiting for \'{}\' to exist...'.format(cfg.TEST.WEIGHTS))
+        time.sleep(10)
+
+    run_inference(
+        cfg.TEST.WEIGHTS,
+        ind_range=args.range,
+        multi_gpu_testing=args.multi_gpu_testing,
+        check_expected_results=True,
+    )
diff --git a/configs/tools/train_net.py b/configs/tools/train_net.py
new file mode 100644
index 0000000000000000000000000000000000000000..9e757b599690a9f190c2c9b9e4769abdc079a134
--- /dev/null
+++ b/configs/tools/train_net.py
@@ -0,0 +1,132 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Train a network with Detectron."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import argparse
+import cv2  # NOQA (Must import before importing caffe2 due to bug in cv2)
+import logging
+import numpy as np
+import pprint
+import sys
+
+from caffe2.python import workspace
+
+from detectron.core.config import assert_and_infer_cfg
+from detectron.core.config import cfg
+from detectron.core.config import merge_cfg_from_file
+from detectron.core.config import merge_cfg_from_list
+from detectron.core.test_engine import run_inference
+from detectron.utils.logging import setup_logging
+import detectron.utils.c2 as c2_utils
+import detectron.utils.train
+
+c2_utils.import_contrib_ops()
+c2_utils.import_detectron_ops()
+
+# OpenCL may be enabled by default in OpenCV3; disable it because it's not
+# thread safe and causes unwanted GPU memory allocations.
+cv2.ocl.setUseOpenCL(False)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Train a network with Detectron'
+    )
+    parser.add_argument(
+        '--cfg',
+        dest='cfg_file',
+        help='Config file for training (and optionally testing)',
+        default=None,
+        type=str
+    )
+    parser.add_argument(
+        '--multi-gpu-testing',
+        dest='multi_gpu_testing',
+        help='Use cfg.NUM_GPUS GPUs for inference',
+        action='store_true'
+    )
+    parser.add_argument(
+        '--skip-test',
+        dest='skip_test',
+        help='Do not test the final model',
+        action='store_true'
+    )
+    parser.add_argument(
+        'opts',
+        help='See detectron/core/config.py for all options',
+        default=None,
+        nargs=argparse.REMAINDER
+    )
+    if len(sys.argv) == 1:
+        parser.print_help()
+        sys.exit(1)
+    return parser.parse_args()
+
+
+def main():
+    # Initialize C2
+    workspace.GlobalInit(
+        ['caffe2', '--caffe2_log_level=0', '--caffe2_gpu_memory_tracking=1']
+    )
+    # Set up logging and load config options
+    logger = setup_logging(__name__)
+    logging.getLogger('detectron.roi_data.loader').setLevel(logging.INFO)
+    args = parse_args()
+    logger.info('Called with args:')
+    logger.info(args)
+    if args.cfg_file is not None:
+        merge_cfg_from_file(args.cfg_file)
+    if args.opts is not None:
+        merge_cfg_from_list(args.opts)
+    assert_and_infer_cfg()
+    smi_output, cuda_ver, cudnn_ver = c2_utils.get_nvidia_info()
+    logger.info("cuda version : {}".format(cuda_ver))
+    logger.info("cudnn version: {}".format(cudnn_ver))
+    logger.info("nvidia-smi output:\n{}".format(smi_output))
+    logger.info('Training with config:')
+    logger.info(pprint.pformat(cfg))
+    # Note that while we set the numpy random seed network training will not be
+    # deterministic in general. There are sources of non-determinism that cannot
+    # be removed with a reasonble execution-speed tradeoff (such as certain
+    # non-deterministic cudnn functions).
+    np.random.seed(cfg.RNG_SEED)
+    # Execute the training run
+    checkpoints = detectron.utils.train.train_model()
+    # Test the trained model
+    if not args.skip_test:
+        test_model(checkpoints['final'], args.multi_gpu_testing, args.opts)
+
+
+def test_model(model_file, multi_gpu_testing, opts=None):
+    """Test a model."""
+    # Clear memory before inference
+    workspace.ResetWorkspace()
+    # Run inference
+    run_inference(
+        model_file, multi_gpu_testing=multi_gpu_testing,
+        check_expected_results=True,
+    )
+
+
+if __name__ == '__main__':
+    main()
diff --git a/configs/tools/visualize_results.py b/configs/tools/visualize_results.py
new file mode 100644
index 0000000000000000000000000000000000000000..fc83e444ddbe17565cd04602e729bebeda6ec1ea
--- /dev/null
+++ b/configs/tools/visualize_results.py
@@ -0,0 +1,143 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Script for visualizing results saved in a detections.pkl file."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import argparse
+import cv2
+import os
+import sys
+
+from detectron.datasets.json_dataset import JsonDataset
+from detectron.utils.io import load_object
+import detectron.utils.vis as vis_utils
+
+# OpenCL may be enabled by default in OpenCV3; disable it because it's not
+# thread safe and causes unwanted GPU memory allocations.
+cv2.ocl.setUseOpenCL(False)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--dataset',
+        dest='dataset',
+        help='dataset',
+        default='coco_2014_minival',
+        type=str
+    )
+    parser.add_argument(
+        '--detections',
+        dest='detections',
+        help='detections pkl file',
+        default='',
+        type=str
+    )
+    parser.add_argument(
+        '--thresh',
+        dest='thresh',
+        help='detection prob threshold',
+        default=0.9,
+        type=float
+    )
+    parser.add_argument(
+        '--output-dir',
+        dest='output_dir',
+        help='output directory',
+        default='./tmp/vis-output',
+        type=str
+    )
+    parser.add_argument(
+        '--first',
+        dest='first',
+        help='only visualize the first k images',
+        default=0,
+        type=int
+    )
+    if len(sys.argv) == 1:
+        parser.print_help()
+        sys.exit(1)
+    args = parser.parse_args()
+    return args
+
+
+def vis(dataset, detections_pkl, thresh, output_dir, limit=0):
+    ds = JsonDataset(dataset)
+    roidb = ds.get_roidb()
+
+    dets = load_object(detections_pkl)
+
+    assert all(k in dets for k in ['all_boxes', 'all_segms', 'all_keyps']), \
+        'Expected detections pkl file in the format used by test_engine.py'
+
+    all_boxes = dets['all_boxes']
+    all_segms = dets['all_segms']
+    all_keyps = dets['all_keyps']
+
+    def id_or_index(ix, val):
+        if len(val) == 0:
+            return val
+        else:
+            return val[ix]
+
+    for ix, entry in enumerate(roidb):
+        if limit > 0 and ix >= limit:
+            break
+        if ix % 10 == 0:
+            print('{:d}/{:d}'.format(ix + 1, len(roidb)))
+
+        im = cv2.imread(entry['image'])
+        im_name = os.path.splitext(os.path.basename(entry['image']))[0]
+
+        cls_boxes_i = [
+            id_or_index(ix, cls_k_boxes) for cls_k_boxes in all_boxes
+        ]
+        cls_segms_i = [
+            id_or_index(ix, cls_k_segms) for cls_k_segms in all_segms
+        ]
+        cls_keyps_i = [
+            id_or_index(ix, cls_k_keyps) for cls_k_keyps in all_keyps
+        ]
+
+        vis_utils.vis_one_image(
+            im[:, :, ::-1],
+            '{:d}_{:s}'.format(ix, im_name),
+            os.path.join(output_dir, 'vis'),
+            cls_boxes_i,
+            segms=cls_segms_i,
+            keypoints=cls_keyps_i,
+            thresh=thresh,
+            box_alpha=0.8,
+            dataset=ds,
+            show_class=True
+        )
+
+
+if __name__ == '__main__':
+    opts = parse_args()
+    vis(
+        opts.dataset,
+        opts.detections,
+        opts.thresh,
+        opts.output_dir,
+        limit=opts.first
+    )
diff --git a/demo/15673749081_767a7fa63a_k.jpg b/demo/15673749081_767a7fa63a_k.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0ec90e11f64603f0a2136c142a4ba0089a8ba051
Binary files /dev/null and b/demo/15673749081_767a7fa63a_k.jpg differ
diff --git a/demo/16004479832_a748d55f21_k.jpg b/demo/16004479832_a748d55f21_k.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..5ef06804cf6bde89396c5892c8b463e1b0fd960b
Binary files /dev/null and b/demo/16004479832_a748d55f21_k.jpg differ
diff --git a/demo/17790319373_bd19b24cfc_k.jpg b/demo/17790319373_bd19b24cfc_k.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..8d7ce5e33b75aa926cce67e34f82e53e43b6b4fc
Binary files /dev/null and b/demo/17790319373_bd19b24cfc_k.jpg differ
diff --git a/demo/18124840932_e42b3e377c_k.jpg b/demo/18124840932_e42b3e377c_k.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0e20882bbeb16fb6f1e7353e8d66375f678f930d
Binary files /dev/null and b/demo/18124840932_e42b3e377c_k.jpg differ
diff --git a/demo/19064748793_bb942deea1_k.jpg b/demo/19064748793_bb942deea1_k.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6269382798382a8bb1e64ae94bb6156d536a8bc0
Binary files /dev/null and b/demo/19064748793_bb942deea1_k.jpg differ
diff --git a/demo/24274813513_0cfd2ce6d0_k.jpg b/demo/24274813513_0cfd2ce6d0_k.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2f3271a28622fa442fb594c48611374d1bd73f20
Binary files /dev/null and b/demo/24274813513_0cfd2ce6d0_k.jpg differ
diff --git a/demo/33823288584_1d21cf0a26_k.jpg b/demo/33823288584_1d21cf0a26_k.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c218118f94f3c7469fbb7ddb4d3d162b73669660
Binary files /dev/null and b/demo/33823288584_1d21cf0a26_k.jpg differ
diff --git a/demo/33887522274_eebd074106_k.jpg b/demo/33887522274_eebd074106_k.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3173f58d1e96a72e97b89e9461665c15f27b9acf
Binary files /dev/null and b/demo/33887522274_eebd074106_k.jpg differ
diff --git a/demo/34501842524_3c858b3080_k.jpg b/demo/34501842524_3c858b3080_k.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..26398dcde42f2fb78c7aa26fbabead880f422cce
Binary files /dev/null and b/demo/34501842524_3c858b3080_k.jpg differ
diff --git a/demo/NOTICE b/demo/NOTICE
new file mode 100644
index 0000000000000000000000000000000000000000..506f76e8699c54694a038bc262db6394af45f584
--- /dev/null
+++ b/demo/NOTICE
@@ -0,0 +1,32 @@
+The demo images are licensed as United States government work:
+https://www.usa.gov/government-works
+
+The image files were obtained on Jan 13, 2018 from the following
+URLs.
+
+16004479832_a748d55f21_k.jpg
+https://www.flickr.com/photos/archivesnews/16004479832
+
+18124840932_e42b3e377c_k.jpg
+https://www.flickr.com/photos/usnavy/18124840932
+
+33887522274_eebd074106_k.jpg
+https://www.flickr.com/photos/usaid_pakistan/33887522274
+
+15673749081_767a7fa63a_k.jpg
+https://www.flickr.com/photos/usnavy/15673749081
+
+34501842524_3c858b3080_k.jpg
+https://www.flickr.com/photos/departmentofenergy/34501842524
+
+24274813513_0cfd2ce6d0_k.jpg
+https://www.flickr.com/photos/dhsgov/24274813513
+
+19064748793_bb942deea1_k.jpg
+https://www.flickr.com/photos/statephotos/19064748793
+
+33823288584_1d21cf0a26_k.jpg
+https://www.flickr.com/photos/cbpphotos/33823288584
+
+17790319373_bd19b24cfc_k.jpg
+https://www.flickr.com/photos/secdef/17790319373
diff --git a/demo/output/17790319373_bd19b24cfc_k_example_output.jpg b/demo/output/17790319373_bd19b24cfc_k_example_output.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..911626d7a3863a35480f3e6718b9b5897666e79a
Binary files /dev/null and b/demo/output/17790319373_bd19b24cfc_k_example_output.jpg differ
diff --git a/demo/output/33823288584_1d21cf0a26_k_example_output.jpg b/demo/output/33823288584_1d21cf0a26_k_example_output.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7e806a49c84e8521c8d608a2a06c8f0dfc663f23
Binary files /dev/null and b/demo/output/33823288584_1d21cf0a26_k_example_output.jpg differ
diff --git a/detectron/__init__.py b/detectron/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/detectron/core/__init__.py b/detectron/core/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/detectron/core/config.py b/detectron/core/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c07ec137c271058ba38ba359aeef1a5a9342c60
--- /dev/null
+++ b/detectron/core/config.py
@@ -0,0 +1,1303 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+#
+# Based on:
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+
+"""Detectron config system.
+
+This file specifies default config options for Detectron. You should not
+change values in this file. Instead, you should write a config file (in yaml)
+and use merge_cfg_from_file(yaml_file) to load it and override the default
+options.
+
+Most tools in the tools directory take a --cfg option to specify an override
+file and an optional list of override (key, value) pairs:
+ - See tools/{train,test}_net.py for example code that uses merge_cfg_from_file
+ - See configs/*/*.yaml for example config files
+
+Detectron supports a lot of different model types, each of which has a lot of
+different options. The result is a HUGE set of configuration options.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from ast import literal_eval
+from future.utils import iteritems
+import copy
+import io
+import logging
+import numpy as np
+import os
+import os.path as osp
+import six
+
+from detectron.utils.collections import AttrDict
+from detectron.utils.io import cache_url
+
+logger = logging.getLogger(__name__)
+
+__C = AttrDict()
+# Consumers can get config by:
+#   from detectron.core.config import cfg
+cfg = __C
+
+# Random note: avoid using '.ON' as a config key since yaml converts it to True;
+# prefer 'ENABLED' instead
+
+# ---------------------------------------------------------------------------- #
+# Training options
+# ---------------------------------------------------------------------------- #
+__C.TRAIN = AttrDict()
+
+# Initialize network with weights from this .pkl file
+__C.TRAIN.WEIGHTS = ''
+
+# Datasets to train on
+# Available dataset list: detectron.datasets.dataset_catalog.datasets()
+# If multiple datasets are listed, the model is trained on their union
+__C.TRAIN.DATASETS = ()
+
+# Scales to use during training
+# Each scale is the pixel size of an image's shortest side
+# If multiple scales are listed, then one is selected uniformly at random for
+# each training image (i.e., scale jitter data augmentation)
+__C.TRAIN.SCALES = (600, )
+
+# Max pixel size of the longest side of a scaled input image
+__C.TRAIN.MAX_SIZE = 1000
+
+# Images *per GPU* in the training minibatch
+# Total images per minibatch = TRAIN.IMS_PER_BATCH * NUM_GPUS
+__C.TRAIN.IMS_PER_BATCH = 2
+
+# RoI minibatch size *per image* (number of regions of interest [ROIs])
+# Total number of RoIs per training minibatch =
+#   TRAIN.BATCH_SIZE_PER_IM * TRAIN.IMS_PER_BATCH * NUM_GPUS
+# E.g., a common configuration is: 512 * 2 * 8 = 8192
+__C.TRAIN.BATCH_SIZE_PER_IM = 64
+
+# Target fraction of RoI minibatch that is labeled foreground (i.e. class > 0)
+__C.TRAIN.FG_FRACTION = 0.25
+
+# Overlap threshold for an RoI to be considered foreground (if >= FG_THRESH)
+__C.TRAIN.FG_THRESH = 0.5
+
+# Overlap threshold for an RoI to be considered background (class = 0 if
+# overlap in [LO, HI))
+__C.TRAIN.BG_THRESH_HI = 0.5
+__C.TRAIN.BG_THRESH_LO = 0.0
+
+# Use horizontally-flipped images during training?
+__C.TRAIN.USE_FLIPPED = True
+
+# Overlap required between an RoI and a ground-truth box in order for that
+# (RoI, gt box) pair to be used as a bounding-box regression training example
+__C.TRAIN.BBOX_THRESH = 0.5
+
+# Snapshot (model checkpoint) period
+# Divide by NUM_GPUS to determine actual period (e.g., 80000/8 => 10000 iters)
+# to allow for linear training schedule scaling
+__C.TRAIN.SNAPSHOT_ITERS = 80000
+
+# Train using these proposals
+# During training, all proposals specified in the file are used (no limit is
+# applied)
+# Proposal files must be in correspondence with the datasets listed in
+# TRAIN.DATASETS
+__C.TRAIN.PROPOSAL_FILES = ()
+
+# Make minibatches from images that have similar aspect ratios (i.e. both
+# tall and thin or both short and wide)
+# This feature is critical for saving memory (and makes training slightly
+# faster)
+__C.TRAIN.ASPECT_GROUPING = True
+
+# ---------------------------------------------------------------------------- #
+# RPN training options
+# ---------------------------------------------------------------------------- #
+
+# Run GenerateProposals on GPU if set to True
+__C.TRAIN.GENERATE_PROPOSALS_ON_GPU = False
+
+# Minimum overlap required between an anchor and ground-truth box for the
+# (anchor, gt box) pair to be a positive example (IOU >= thresh ==> positive RPN
+# example)
+__C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7
+
+# Maximum overlap allowed between an anchor and ground-truth box for the
+# (anchor, gt box) pair to be a negative examples (IOU < thresh ==> negative RPN
+# example)
+__C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
+
+# Target fraction of foreground (positive) examples per RPN minibatch
+__C.TRAIN.RPN_FG_FRACTION = 0.5
+
+# Total number of RPN examples per image
+__C.TRAIN.RPN_BATCH_SIZE_PER_IM = 256
+
+# NMS threshold used on RPN proposals (used during end-to-end training with RPN)
+__C.TRAIN.RPN_NMS_THRESH = 0.7
+
+# Number of top scoring RPN proposals to keep before applying NMS
+# When FPN is used, this is *per FPN level* (not total)
+__C.TRAIN.RPN_PRE_NMS_TOP_N = 12000
+
+# Number of top scoring RPN proposals to keep after applying NMS
+# This is the total number of RPN proposals produced (for both FPN and non-FPN
+# cases)
+__C.TRAIN.RPN_POST_NMS_TOP_N = 2000
+
+# Remove RPN anchors that go outside the image by RPN_STRADDLE_THRESH pixels
+# Set to -1 or a large value, e.g. 100000, to disable pruning anchors
+__C.TRAIN.RPN_STRADDLE_THRESH = 0
+
+# Proposal height and width both need to be greater than RPN_MIN_SIZE
+# (at orig image scale; not scale used during training or inference)
+__C.TRAIN.RPN_MIN_SIZE = 0
+
+# Filter proposals that are inside of crowd regions by CROWD_FILTER_THRESH
+# "Inside" is measured as: proposal-with-crowd intersection area divided by
+# proposal area
+__C.TRAIN.CROWD_FILTER_THRESH = 0.7
+
+# Ignore ground-truth objects with area < this threshold
+__C.TRAIN.GT_MIN_AREA = -1
+
+# Freeze the backbone architecture during training if set to True
+__C.TRAIN.FREEZE_CONV_BODY = False
+
+# Training will resume from the latest snapshot (model checkpoint) found in the
+# output directory
+__C.TRAIN.AUTO_RESUME = True
+
+# Training will copy TRAIN.WEIGHTS and treat it as a candidate checkpoint
+__C.TRAIN.COPY_WEIGHTS = False
+
+# Add StopGrad at a specified stage so the bottom layers are frozen
+__C.TRAIN.FREEZE_AT = 2
+
+
+# ---------------------------------------------------------------------------- #
+# Data loader options (see detectron/roi_data/loader.py for more info)
+# ---------------------------------------------------------------------------- #
+__C.DATA_LOADER = AttrDict()
+
+# Number of Python threads to use for the data loader (warning: using too many
+# threads can cause GIL-based interference with Python Ops leading to *slower*
+# training; 4 seems to be the sweet spot in our experience)
+__C.DATA_LOADER.NUM_THREADS = 4
+
+# Size of the shared minibatch queue
+__C.DATA_LOADER.MINIBATCH_QUEUE_SIZE = 64
+
+# Capacity of the per GPU blobs queue
+__C.DATA_LOADER.BLOBS_QUEUE_CAPACITY = 8
+
+
+# ---------------------------------------------------------------------------- #
+# Inference ('test') options
+# ---------------------------------------------------------------------------- #
+__C.TEST = AttrDict()
+
+# Initialize network with weights from this .pkl file
+__C.TEST.WEIGHTS = ''
+
+# Datasets to test on
+# Available dataset list: detectron.datasets.dataset_catalog.datasets()
+# If multiple datasets are listed, testing is performed on each one sequentially
+__C.TEST.DATASETS = ()
+
+# Scale to use during testing
+__C.TEST.SCALE = 600
+
+# Max pixel size of the longest side of a scaled input image
+__C.TEST.MAX_SIZE = 1000
+
+# Overlap threshold used for non-maximum suppression (suppress boxes with
+# IoU >= this threshold)
+__C.TEST.NMS = 0.3
+
+# Apply Fast R-CNN style bounding-box regression if True
+__C.TEST.BBOX_REG = True
+
+# Test using these proposal files (must correspond with TEST.DATASETS)
+__C.TEST.PROPOSAL_FILES = ()
+
+# Run GenerateProposals on GPU if set to True
+__C.TEST.GENERATE_PROPOSALS_ON_GPU = False
+
+# Limit on the number of proposals per image used during inference
+__C.TEST.PROPOSAL_LIMIT = 2000
+
+# NMS threshold used on RPN proposals
+__C.TEST.RPN_NMS_THRESH = 0.7
+
+# Number of top scoring RPN proposals to keep before applying NMS
+# When FPN is used, this is *per FPN level* (not total)
+__C.TEST.RPN_PRE_NMS_TOP_N = 12000
+
+# Number of top scoring RPN proposals to keep after applying NMS
+# This is the total number of RPN proposals produced (for both FPN and non-FPN
+# cases)
+__C.TEST.RPN_POST_NMS_TOP_N = 2000
+
+# Proposal height and width both need to be greater than RPN_MIN_SIZE
+# (at orig image scale; not scale used during training or inference)
+__C.TEST.RPN_MIN_SIZE = 0
+
+# Maximum number of detections to return per image (100 is based on the limit
+# established for the COCO dataset)
+__C.TEST.DETECTIONS_PER_IM = 100
+
+# Minimum score threshold (assuming scores in a [0, 1] range); a value chosen to
+# balance obtaining high recall with not having too many low precision
+# detections that will slow down inference post processing steps (like NMS)
+__C.TEST.SCORE_THRESH = 0.05
+
+# Save detection results files if True
+# If false, results files are cleaned up (they can be large) after local
+# evaluation
+__C.TEST.COMPETITION_MODE = True
+
+# Evaluate detections with the COCO json dataset eval code even if it's not the
+# evaluation code for the dataset (e.g. evaluate PASCAL VOC results using the
+# COCO API to get COCO style AP on PASCAL VOC)
+__C.TEST.FORCE_JSON_DATASET_EVAL = False
+
+# [Inferred value; do not set directly in a config]
+# Indicates if precomputed proposals are used at test time
+# Not set for 1-stage models and 2-stage models with RPN subnetwork enabled
+__C.TEST.PRECOMPUTED_PROPOSALS = True
+
+# Evaluate proposals in class-specific Average Recall (AR).
+# It means that one first computes AR within each category and then averages
+# over the categories. It is not biased towards the AR of frequent categories
+# compared with class-agnostic AR.
+__C.TEST.CLASS_SPECIFIC_AR = False
+
+# ---------------------------------------------------------------------------- #
+# Test-time augmentations for bounding box detection
+# See configs/test_time_aug/e2e_mask_rcnn_R-50-FPN_2x.yaml for an example
+# ---------------------------------------------------------------------------- #
+__C.TEST.BBOX_AUG = AttrDict()
+
+# Enable test-time augmentation for bounding box detection if True
+__C.TEST.BBOX_AUG.ENABLED = False
+
+# Heuristic used to combine predicted box scores
+#   Valid options: ('ID', 'AVG', 'UNION')
+__C.TEST.BBOX_AUG.SCORE_HEUR = 'UNION'
+
+# Heuristic used to combine predicted box coordinates
+#   Valid options: ('ID', 'AVG', 'UNION')
+__C.TEST.BBOX_AUG.COORD_HEUR = 'UNION'
+
+# Horizontal flip at the original scale (id transform)
+__C.TEST.BBOX_AUG.H_FLIP = False
+
+# Each scale is the pixel size of an image's shortest side
+__C.TEST.BBOX_AUG.SCALES = ()
+
+# Max pixel size of the longer side
+__C.TEST.BBOX_AUG.MAX_SIZE = 4000
+
+# Horizontal flip at each scale
+__C.TEST.BBOX_AUG.SCALE_H_FLIP = False
+
+# Apply scaling based on object size
+__C.TEST.BBOX_AUG.SCALE_SIZE_DEP = False
+__C.TEST.BBOX_AUG.AREA_TH_LO = 50**2
+__C.TEST.BBOX_AUG.AREA_TH_HI = 180**2
+
+# Each aspect ratio is relative to image width
+__C.TEST.BBOX_AUG.ASPECT_RATIOS = ()
+
+# Horizontal flip at each aspect ratio
+__C.TEST.BBOX_AUG.ASPECT_RATIO_H_FLIP = False
+
+# ---------------------------------------------------------------------------- #
+# Test-time augmentations for mask detection
+# See configs/test_time_aug/e2e_mask_rcnn_R-50-FPN_2x.yaml for an example
+# ---------------------------------------------------------------------------- #
+__C.TEST.MASK_AUG = AttrDict()
+
+# Enable test-time augmentation for instance mask detection if True
+__C.TEST.MASK_AUG.ENABLED = False
+
+# Heuristic used to combine mask predictions
+# SOFT prefix indicates that the computation is performed on soft masks
+#   Valid options: ('SOFT_AVG', 'SOFT_MAX', 'LOGIT_AVG')
+__C.TEST.MASK_AUG.HEUR = 'SOFT_AVG'
+
+# Horizontal flip at the original scale (id transform)
+__C.TEST.MASK_AUG.H_FLIP = False
+
+# Each scale is the pixel size of an image's shortest side
+__C.TEST.MASK_AUG.SCALES = ()
+
+# Max pixel size of the longer side
+__C.TEST.MASK_AUG.MAX_SIZE = 4000
+
+# Horizontal flip at each scale
+__C.TEST.MASK_AUG.SCALE_H_FLIP = False
+
+# Apply scaling based on object size
+__C.TEST.MASK_AUG.SCALE_SIZE_DEP = False
+__C.TEST.MASK_AUG.AREA_TH = 180**2
+
+# Each aspect ratio is relative to image width
+__C.TEST.MASK_AUG.ASPECT_RATIOS = ()
+
+# Horizontal flip at each aspect ratio
+__C.TEST.MASK_AUG.ASPECT_RATIO_H_FLIP = False
+
+# ---------------------------------------------------------------------------- #
+# Test-augmentations for keypoints detection
+# configs/test_time_aug/keypoint_rcnn_R-50-FPN_1x.yaml
+# ---------------------------------------------------------------------------- #
+__C.TEST.KPS_AUG = AttrDict()
+
+# Enable test-time augmentation for keypoint detection if True
+__C.TEST.KPS_AUG.ENABLED = False
+
+# Heuristic used to combine keypoint predictions
+#   Valid options: ('HM_AVG', 'HM_MAX')
+__C.TEST.KPS_AUG.HEUR = 'HM_AVG'
+
+# Horizontal flip at the original scale (id transform)
+__C.TEST.KPS_AUG.H_FLIP = False
+
+# Each scale is the pixel size of an image's shortest side
+__C.TEST.KPS_AUG.SCALES = ()
+
+# Max pixel size of the longer side
+__C.TEST.KPS_AUG.MAX_SIZE = 4000
+
+# Horizontal flip at each scale
+__C.TEST.KPS_AUG.SCALE_H_FLIP = False
+
+# Apply scaling based on object size
+__C.TEST.KPS_AUG.SCALE_SIZE_DEP = False
+__C.TEST.KPS_AUG.AREA_TH = 180**2
+
+# Eeach aspect ratio is realtive to image width
+__C.TEST.KPS_AUG.ASPECT_RATIOS = ()
+
+# Horizontal flip at each aspect ratio
+__C.TEST.KPS_AUG.ASPECT_RATIO_H_FLIP = False
+
+# ---------------------------------------------------------------------------- #
+# Soft NMS
+# ---------------------------------------------------------------------------- #
+__C.TEST.SOFT_NMS = AttrDict()
+
+# Use soft NMS instead of standard NMS if set to True
+__C.TEST.SOFT_NMS.ENABLED = False
+# See soft NMS paper for definition of these options
+__C.TEST.SOFT_NMS.METHOD = 'linear'
+__C.TEST.SOFT_NMS.SIGMA = 0.5
+# For the soft NMS overlap threshold, we simply use TEST.NMS
+
+# ---------------------------------------------------------------------------- #
+# Bounding box voting (from the Multi-Region CNN paper)
+# ---------------------------------------------------------------------------- #
+__C.TEST.BBOX_VOTE = AttrDict()
+
+# Use box voting if set to True
+__C.TEST.BBOX_VOTE.ENABLED = False
+
+# We use TEST.NMS threshold for the NMS step. VOTE_TH overlap threshold
+# is used to select voting boxes (IoU >= VOTE_TH) for each box that survives NMS
+__C.TEST.BBOX_VOTE.VOTE_TH = 0.8
+
+# The method used to combine scores when doing bounding box voting
+# Valid options include ('ID', 'AVG', 'IOU_AVG', 'GENERALIZED_AVG', 'QUASI_SUM')
+__C.TEST.BBOX_VOTE.SCORING_METHOD = 'ID'
+
+# Hyperparameter used by the scoring method (it has different meanings for
+# different methods)
+__C.TEST.BBOX_VOTE.SCORING_METHOD_BETA = 1.0
+
+
+# ---------------------------------------------------------------------------- #
+# Model options
+# ---------------------------------------------------------------------------- #
+__C.MODEL = AttrDict()
+
+# The type of model to use
+# The string must match a function in the modeling.model_builder module
+# (e.g., 'generalized_rcnn', 'mask_rcnn', ...)
+__C.MODEL.TYPE = ''
+
+# The backbone conv body to use
+# The string must match a function that is imported in modeling.model_builder
+# (e.g., 'FPN.add_fpn_ResNet101_conv5_body' to specify a ResNet-101-FPN
+# backbone)
+__C.MODEL.CONV_BODY = ''
+
+# Number of classes in the dataset; must be set
+# E.g., 81 for COCO (80 foreground + 1 background)
+__C.MODEL.NUM_CLASSES = -1
+
+# Use a class agnostic bounding box regressor instead of the default per-class
+# regressor
+__C.MODEL.CLS_AGNOSTIC_BBOX_REG = False
+
+# Default weights on (dx, dy, dw, dh) for normalizing bbox regression targets
+# These are empirically chosen to approximately lead to unit variance targets
+__C.MODEL.BBOX_REG_WEIGHTS = (10., 10., 5., 5.)
+
+# The meaning of FASTER_RCNN depends on the context (training vs. inference):
+# 1) During training, FASTER_RCNN = True means that end-to-end training will be
+#    used to jointly train the RPN subnetwork and the Fast R-CNN subnetwork
+#    (Faster R-CNN = RPN + Fast R-CNN).
+# 2) During inference, FASTER_RCNN = True means that the model's RPN subnetwork
+#    will be used to generate proposals rather than relying on precomputed
+#    proposals. Note that FASTER_RCNN = True can be used at inference time even
+#    if the Faster R-CNN model was trained with stagewise training (which
+#    consists of alternating between RPN and Fast R-CNN training in a way that
+#    finally leads to a single network).
+__C.MODEL.FASTER_RCNN = False
+
+# Indicates the model makes instance mask predictions (as in Mask R-CNN)
+__C.MODEL.MASK_ON = False
+
+# Indicates the model makes keypoint predictions (as in Mask R-CNN for
+# keypoints)
+__C.MODEL.KEYPOINTS_ON = False
+
+# Indicates the model's computation terminates with the production of RPN
+# proposals (i.e., it outputs proposals ONLY, no actual object detections)
+__C.MODEL.RPN_ONLY = False
+
+# Caffe2 net execution type
+# Use 'prof_dag' to get profiling statistics
+__C.MODEL.EXECUTION_TYPE = 'dag'
+
+
+# ---------------------------------------------------------------------------- #
+# RetinaNet options
+# ---------------------------------------------------------------------------- #
+__C.RETINANET = AttrDict()
+
+# RetinaNet is used (instead of Fast/er/Mask R-CNN/R-FCN/RPN) if True
+__C.RETINANET.RETINANET_ON = False
+
+# Anchor aspect ratios to use
+__C.RETINANET.ASPECT_RATIOS = (0.5, 1.0, 2.0)
+
+# Anchor scales per octave
+__C.RETINANET.SCALES_PER_OCTAVE = 3
+
+# At each FPN level, we generate anchors based on their scale, aspect_ratio,
+# stride of the level, and we multiply the resulting anchor by ANCHOR_SCALE
+__C.RETINANET.ANCHOR_SCALE = 4
+
+# Convolutions to use in the cls and bbox tower
+# NOTE: this doesn't include the last conv for logits
+__C.RETINANET.NUM_CONVS = 4
+
+# Weight for bbox_regression loss
+__C.RETINANET.BBOX_REG_WEIGHT = 1.0
+
+# Smooth L1 loss beta for bbox regression
+__C.RETINANET.BBOX_REG_BETA = 0.11
+
+# During inference, #locs to select based on cls score before NMS is performed
+# per FPN level
+__C.RETINANET.PRE_NMS_TOP_N = 1000
+
+# IoU overlap ratio for labeling an anchor as positive
+# Anchors with >= iou overlap are labeled positive
+__C.RETINANET.POSITIVE_OVERLAP = 0.5
+
+# IoU overlap ratio for labeling an anchor as negative
+# Anchors with < iou overlap are labeled negative
+__C.RETINANET.NEGATIVE_OVERLAP = 0.4
+
+# Focal loss parameter: alpha
+__C.RETINANET.LOSS_ALPHA = 0.25
+
+# Focal loss parameter: gamma
+__C.RETINANET.LOSS_GAMMA = 2.0
+
+# Prior prob for the positives at the beginning of training. This is used to set
+# the bias init for the logits layer
+__C.RETINANET.PRIOR_PROB = 0.01
+
+# Whether classification and bbox branch tower should be shared or not
+__C.RETINANET.SHARE_CLS_BBOX_TOWER = False
+
+# Use class specific bounding box regression instead of the default class
+# agnostic regression
+__C.RETINANET.CLASS_SPECIFIC_BBOX = False
+
+# Whether softmax should be used in classification branch training
+__C.RETINANET.SOFTMAX = False
+
+# Inference cls score threshold, anchors with score > INFERENCE_TH are
+# considered for inference
+__C.RETINANET.INFERENCE_TH = 0.05
+
+
+# ---------------------------------------------------------------------------- #
+# Solver options
+# Note: all solver options are used exactly as specified; the implication is
+# that if you switch from training on 1 GPU to N GPUs, you MUST adjust the
+# solver configuration accordingly. We suggest using gradual warmup and the
+# linear learning rate scaling rule as described in
+# "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour" Goyal et al.
+# https://arxiv.org/abs/1706.02677
+# ---------------------------------------------------------------------------- #
+__C.SOLVER = AttrDict()
+
+# Base learning rate for the specified schedule
+__C.SOLVER.BASE_LR = 0.001
+
+# Schedule type (see functions in utils.lr_policy for options)
+# E.g., 'step', 'steps_with_decay', ...
+__C.SOLVER.LR_POLICY = 'step'
+
+# Some LR Policies (by example):
+# 'step'
+#   lr = SOLVER.BASE_LR * SOLVER.GAMMA ** (cur_iter // SOLVER.STEP_SIZE)
+# 'steps_with_decay'
+#   SOLVER.STEPS = [0, 60000, 80000]
+#   SOLVER.GAMMA = 0.1
+#   lr = SOLVER.BASE_LR * SOLVER.GAMMA ** current_step
+#   iters [0, 59999] are in current_step = 0, iters [60000, 79999] are in
+#   current_step = 1, and so on
+# 'steps_with_lrs'
+#   SOLVER.STEPS = [0, 60000, 80000]
+#   SOLVER.LRS = [0.02, 0.002, 0.0002]
+#   lr = LRS[current_step]
+# 'cosine_decay'
+#   lr = SOLVER.BASE_LR * (cos(PI * cur_iter / SOLVER.MAX_ITER) * 0.5 + 0.5)
+# 'exp_decay'
+#   lr smoothly decays from SOLVER.BASE_LR to SOLVER.GAMMA * SOLVER.BASE_LR
+#   lr = SOLVER.BASE_LR * exp(np.log(SOLVER.GAMMA) * cur_iter / SOLVER.MAX_ITER)
+
+# Hyperparameter used by the specified policy
+# For 'step', the current LR is multiplied by SOLVER.GAMMA at each step
+# For 'exp_decay', SOLVER.GAMMA is the ratio between the final and initial LR.
+__C.SOLVER.GAMMA = 0.1
+
+# Uniform step size for 'steps' policy
+__C.SOLVER.STEP_SIZE = 30000
+
+# Non-uniform step iterations for 'steps_with_decay' or 'steps_with_lrs'
+# policies
+__C.SOLVER.STEPS = []
+
+# Learning rates to use with 'steps_with_lrs' policy
+__C.SOLVER.LRS = []
+
+# Maximum number of SGD iterations
+__C.SOLVER.MAX_ITER = 40000
+
+# Momentum to use with SGD
+__C.SOLVER.MOMENTUM = 0.9
+
+# L2 regularization hyperparameter
+__C.SOLVER.WEIGHT_DECAY = 0.0005
+# L2 regularization hyperparameter for GroupNorm's parameters
+__C.SOLVER.WEIGHT_DECAY_GN = 0.0
+
+# Warm up to SOLVER.BASE_LR over this number of SGD iterations
+__C.SOLVER.WARM_UP_ITERS = 500
+
+# Start the warm up from SOLVER.BASE_LR * SOLVER.WARM_UP_FACTOR
+__C.SOLVER.WARM_UP_FACTOR = 1.0 / 3.0
+
+# WARM_UP_METHOD can be either 'constant' or 'linear' (i.e., gradual)
+__C.SOLVER.WARM_UP_METHOD = 'linear'
+
+# Scale the momentum update history by new_lr / old_lr when updating the
+# learning rate (this is correct given MomentumSGDUpdateOp)
+__C.SOLVER.SCALE_MOMENTUM = True
+# Only apply the correction if the relative LR change exceeds this threshold
+# (prevents ever change in linear warm up from scaling the momentum by a tiny
+# amount; momentum scaling is only important if the LR change is large)
+__C.SOLVER.SCALE_MOMENTUM_THRESHOLD = 1.1
+
+# Suppress logging of changes to LR unless the relative change exceeds this
+# threshold (prevents linear warm up from spamming the training log)
+__C.SOLVER.LOG_LR_CHANGE_THRESHOLD = 1.1
+
+
+# ---------------------------------------------------------------------------- #
+# Fast R-CNN options
+# ---------------------------------------------------------------------------- #
+__C.FAST_RCNN = AttrDict()
+
+# The type of RoI head to use for bounding box classification and regression
+# The string must match a function this is imported in modeling.model_builder
+# (e.g., 'head_builder.add_roi_2mlp_head' to specify a two hidden layer MLP)
+__C.FAST_RCNN.ROI_BOX_HEAD = ''
+
+# Hidden layer dimension when using an MLP for the RoI box head
+__C.FAST_RCNN.MLP_HEAD_DIM = 1024
+
+# Hidden Conv layer dimension when using Convs for the RoI box head
+__C.FAST_RCNN.CONV_HEAD_DIM = 256
+# Number of stacked Conv layers in the RoI box head
+__C.FAST_RCNN.NUM_STACKED_CONVS = 4
+
+# RoI transformation function (e.g., RoIPool or RoIAlign)
+# (RoIPoolF is the same as RoIPool; ignore the trailing 'F')
+__C.FAST_RCNN.ROI_XFORM_METHOD = 'RoIPoolF'
+
+# Number of grid sampling points in RoIAlign (usually use 2)
+# Only applies to RoIAlign
+__C.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO = 0
+
+# RoI transform output resolution
+# Note: some models may have constraints on what they can use, e.g. they use
+# pretrained FC layers like in VGG16, and will ignore this option
+__C.FAST_RCNN.ROI_XFORM_RESOLUTION = 14
+
+
+# ---------------------------------------------------------------------------- #
+# RPN options
+# ---------------------------------------------------------------------------- #
+__C.RPN = AttrDict()
+
+# [Infered value; do not set directly in a config]
+# Indicates that the model contains an RPN subnetwork
+__C.RPN.RPN_ON = False
+
+# RPN anchor sizes given in absolute pixels w.r.t. the scaled network input
+# Note: these options are *not* used by FPN RPN; see FPN.RPN* options
+__C.RPN.SIZES = (64, 128, 256, 512)
+
+# Stride of the feature map that RPN is attached
+__C.RPN.STRIDE = 16
+
+# RPN anchor aspect ratios
+__C.RPN.ASPECT_RATIOS = (0.5, 1, 2)
+
+
+# ---------------------------------------------------------------------------- #
+# FPN options
+# ---------------------------------------------------------------------------- #
+__C.FPN = AttrDict()
+
+# FPN is enabled if True
+__C.FPN.FPN_ON = False
+
+# Channel dimension of the FPN feature levels
+__C.FPN.DIM = 256
+
+# Initialize the lateral connections to output zero if True
+__C.FPN.ZERO_INIT_LATERAL = False
+
+# Stride of the coarsest FPN level
+# This is needed so the input can be padded properly
+__C.FPN.COARSEST_STRIDE = 32
+
+#
+# FPN may be used for just RPN, just object detection, or both
+#
+
+# Use FPN for RoI transform for object detection if True
+__C.FPN.MULTILEVEL_ROIS = False
+# Hyperparameters for the RoI-to-FPN level mapping heuristic
+__C.FPN.ROI_CANONICAL_SCALE = 224  # s0
+__C.FPN.ROI_CANONICAL_LEVEL = 4  # k0: where s0 maps to
+# Coarsest level of the FPN pyramid
+__C.FPN.ROI_MAX_LEVEL = 5
+# Finest level of the FPN pyramid
+__C.FPN.ROI_MIN_LEVEL = 2
+
+# Use FPN for RPN if True
+__C.FPN.MULTILEVEL_RPN = False
+# Coarsest level of the FPN pyramid
+__C.FPN.RPN_MAX_LEVEL = 6
+# Finest level of the FPN pyramid
+__C.FPN.RPN_MIN_LEVEL = 2
+# FPN RPN anchor aspect ratios
+__C.FPN.RPN_ASPECT_RATIOS = (0.5, 1, 2)
+# RPN anchors start at this size on RPN_MIN_LEVEL
+# The anchor size doubled each level after that
+# With a default of 32 and levels 2 to 6, we get anchor sizes of 32 to 512
+__C.FPN.RPN_ANCHOR_START_SIZE = 32
+# Use extra FPN levels, as done in the RetinaNet paper
+__C.FPN.EXTRA_CONV_LEVELS = False
+# Use GroupNorm in the FPN-specific layers (lateral, etc.)
+__C.FPN.USE_GN = False
+
+
+# ---------------------------------------------------------------------------- #
+# Mask R-CNN options ("MRCNN" means Mask R-CNN)
+# ---------------------------------------------------------------------------- #
+__C.MRCNN = AttrDict()
+
+# The type of RoI head to use for instance mask prediction
+# The string must match a function this is imported in modeling.model_builder
+# (e.g., 'mask_rcnn_heads.ResNet_mask_rcnn_fcn_head_v1up4convs')
+__C.MRCNN.ROI_MASK_HEAD = ''
+
+# Resolution of mask predictions
+__C.MRCNN.RESOLUTION = 14
+
+# RoI transformation function and associated options
+__C.MRCNN.ROI_XFORM_METHOD = 'RoIAlign'
+
+# RoI transformation function (e.g., RoIPool or RoIAlign)
+__C.MRCNN.ROI_XFORM_RESOLUTION = 7
+
+# Number of grid sampling points in RoIAlign (usually use 2)
+# Only applies to RoIAlign
+__C.MRCNN.ROI_XFORM_SAMPLING_RATIO = 0
+
+# Number of channels in the mask head
+__C.MRCNN.DIM_REDUCED = 256
+
+# Use dilated convolution in the mask head
+__C.MRCNN.DILATION = 2
+
+# Upsample the predicted masks by this factor
+__C.MRCNN.UPSAMPLE_RATIO = 1
+
+# Use a fully-connected layer to predict the final masks instead of a conv layer
+__C.MRCNN.USE_FC_OUTPUT = False
+
+# Weight initialization method for the mask head and mask output layers
+__C.MRCNN.CONV_INIT = 'GaussianFill'
+
+# Use class specific mask predictions if True (otherwise use class agnostic mask
+# predictions)
+__C.MRCNN.CLS_SPECIFIC_MASK = True
+
+# Multi-task loss weight for masks
+__C.MRCNN.WEIGHT_LOSS_MASK = 1.0
+
+# Binarization threshold for converting soft masks to hard masks
+__C.MRCNN.THRESH_BINARIZE = 0.5
+
+
+# ---------------------------------------------------------------------------- #
+# Keypoint Mask R-CNN options ("KRCNN" = Mask R-CNN with Keypoint support)
+# ---------------------------------------------------------------------------- #
+__C.KRCNN = AttrDict()
+
+# The type of RoI head to use for instance keypoint prediction
+# The string must match a function this is imported in modeling.model_builder
+# (e.g., 'keypoint_rcnn_heads.add_roi_pose_head_v1convX')
+__C.KRCNN.ROI_KEYPOINTS_HEAD = ''
+
+# Output size (and size loss is computed on), e.g., 56x56
+__C.KRCNN.HEATMAP_SIZE = -1
+
+# Use bilinear interpolation to upsample the final heatmap by this factor
+__C.KRCNN.UP_SCALE = -1
+
+# Apply a ConvTranspose layer to the hidden representation computed by the
+# keypoint head prior to predicting the per-keypoint heatmaps
+__C.KRCNN.USE_DECONV = False
+# Channel dimension of the hidden representation produced by the ConvTranspose
+__C.KRCNN.DECONV_DIM = 256
+
+# Use a ConvTranspose layer to predict the per-keypoint heatmaps
+__C.KRCNN.USE_DECONV_OUTPUT = False
+
+# Use dilation in the keypoint head
+__C.KRCNN.DILATION = 1
+
+# Size of the kernels to use in all ConvTranspose operations
+__C.KRCNN.DECONV_KERNEL = 4
+
+# Number of keypoints in the dataset (e.g., 17 for COCO)
+__C.KRCNN.NUM_KEYPOINTS = -1
+
+# Number of stacked Conv layers in keypoint head
+__C.KRCNN.NUM_STACKED_CONVS = 8
+
+# Dimension of the hidden representation output by the keypoint head
+__C.KRCNN.CONV_HEAD_DIM = 256
+
+# Conv kernel size used in the keypoint head
+__C.KRCNN.CONV_HEAD_KERNEL = 3
+# Conv kernel weight filling function
+__C.KRCNN.CONV_INIT = 'GaussianFill'
+
+# Use NMS based on OKS if True
+__C.KRCNN.NMS_OKS = False
+
+# Source of keypoint confidence
+#   Valid options: ('bbox', 'logit', 'prob')
+__C.KRCNN.KEYPOINT_CONFIDENCE = 'bbox'
+
+# Standard ROI XFORM options (see FAST_RCNN or MRCNN options)
+__C.KRCNN.ROI_XFORM_METHOD = 'RoIAlign'
+__C.KRCNN.ROI_XFORM_RESOLUTION = 7
+__C.KRCNN.ROI_XFORM_SAMPLING_RATIO = 0
+
+# Minimum number of labeled keypoints that must exist in a minibatch (otherwise
+# the minibatch is discarded)
+__C.KRCNN.MIN_KEYPOINT_COUNT_FOR_VALID_MINIBATCH = 20
+
+# When infering the keypoint locations from the heatmap, don't scale the heatmap
+# below this minimum size
+__C.KRCNN.INFERENCE_MIN_SIZE = 0
+
+# Multi-task loss weight to use for keypoints
+# Recommended values:
+#   - use 1.0 if KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS is True
+#   - use 4.0 if KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS is False
+__C.KRCNN.LOSS_WEIGHT = 1.0
+
+# Normalize by the total number of visible keypoints in the minibatch if True.
+# Otherwise, normalize by the total number of keypoints that could ever exist
+# in the minibatch. See comments in modeling.model_builder.add_keypoint_losses
+# for detailed discussion.
+__C.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS = True
+
+
+# ---------------------------------------------------------------------------- #
+# R-FCN options
+# ---------------------------------------------------------------------------- #
+__C.RFCN = AttrDict()
+
+# Position-sensitive RoI pooling output grid size (height and width)
+__C.RFCN.PS_GRID_SIZE = 3
+
+
+# ---------------------------------------------------------------------------- #
+# ResNets options ("ResNets" = ResNet and ResNeXt)
+# ---------------------------------------------------------------------------- #
+__C.RESNETS = AttrDict()
+
+# Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt
+__C.RESNETS.NUM_GROUPS = 1
+
+# Baseline width of each group
+__C.RESNETS.WIDTH_PER_GROUP = 64
+
+# Place the stride 2 conv on the 1x1 filter
+# Use True only for the original MSRA ResNet; use False for C2 and Torch models
+__C.RESNETS.STRIDE_1X1 = True
+
+# Residual transformation function
+__C.RESNETS.TRANS_FUNC = 'bottleneck_transformation'
+# ResNet's stem function (conv1 and pool1)
+__C.RESNETS.STEM_FUNC = 'basic_bn_stem'
+# ResNet's shortcut function
+__C.RESNETS.SHORTCUT_FUNC = 'basic_bn_shortcut'
+
+# Apply dilation in stage "res5"
+__C.RESNETS.RES5_DILATION = 1
+
+
+# ---------------------------------------------------------------------------- #
+# GroupNorm options
+# ---------------------------------------------------------------------------- #
+__C.GROUP_NORM = AttrDict()
+# Number of dimensions per group in GroupNorm (-1 if using NUM_GROUPS)
+__C.GROUP_NORM.DIM_PER_GP = -1
+# Number of groups in GroupNorm (-1 if using DIM_PER_GP)
+__C.GROUP_NORM.NUM_GROUPS = 32
+# GroupNorm's small constant in the denominator
+__C.GROUP_NORM.EPSILON = 1e-5
+
+
+# ---------------------------------------------------------------------------- #
+# Misc options
+# ---------------------------------------------------------------------------- #
+
+# Number of GPUs to use (applies to both training and testing)
+__C.NUM_GPUS = 1
+
+# Use NCCL for all reduce, otherwise use muji
+# Warning: if set to True, you may experience deadlocks
+__C.USE_NCCL = False
+
+# The mapping from image coordinates to feature map coordinates might cause
+# some boxes that are distinct in image space to become identical in feature
+# coordinates. If DEDUP_BOXES > 0, then DEDUP_BOXES is used as the scale factor
+# for identifying duplicate boxes.
+# 1/16 is correct for {Alex,Caffe}Net, VGG_CNN_M_1024, and VGG16
+__C.DEDUP_BOXES = 1 / 16.
+
+# Clip bounding box transformation predictions to prevent np.exp from
+# overflowing
+# Heuristic choice based on that would scale a 16 pixel anchor up to 1000 pixels
+__C.BBOX_XFORM_CLIP = np.log(1000. / 16.)
+
+# Pixel mean values (BGR order) as a (1, 1, 3) array
+# We use the same pixel mean for all networks even though it's not exactly what
+# they were trained with
+# "Fun" fact: the history of where these values comes from is lost
+__C.PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]])
+
+# For reproducibility...but not really because modern fast GPU libraries use
+# non-deterministic op implementations
+__C.RNG_SEED = 3
+
+# A small number that's used many times
+__C.EPS = 1e-14
+
+# Root directory of project
+__C.ROOT_DIR = os.getcwd()
+
+# Output basedir
+__C.OUTPUT_DIR = '/tmp'
+
+# Name (or path to) the matlab executable
+__C.MATLAB = 'matlab'
+
+# Reduce memory usage with memonger gradient blob sharing
+__C.MEMONGER = True
+
+# Futher reduce memory by allowing forward pass activations to be shared when
+# possible. Note that this will cause activation blob inspection (values,
+# shapes, etc.) to be meaningless when activation blobs are reused.
+__C.MEMONGER_SHARE_ACTIVATIONS = False
+
+# Dump detection visualizations
+__C.VIS = False
+
+# Score threshold for visualization
+__C.VIS_TH = 0.9
+
+# Expected results should take the form of a list of expectations, each
+# specified by four elements (dataset, task, metric, expected value). For
+# example: [['coco_2014_minival', 'box_proposal', 'AR@1000', 0.387]]
+__C.EXPECTED_RESULTS = []
+# Absolute and relative tolerance to use when comparing to EXPECTED_RESULTS
+__C.EXPECTED_RESULTS_RTOL = 0.1
+__C.EXPECTED_RESULTS_ATOL = 0.005
+# When the expected value specifies a mean and standard deviation, we check
+# that the actual value is within mean +/- SIGMA_TOL * std
+__C.EXPECTED_RESULTS_SIGMA_TOL = 4
+# Set to send email in case of an EXPECTED_RESULTS failure
+__C.EXPECTED_RESULTS_EMAIL = ''
+
+# Models and proposals referred to by URL are downloaded to a local cache
+# specified by DOWNLOAD_CACHE
+__C.DOWNLOAD_CACHE = '/tmp/detectron-download-cache'
+
+
+# ---------------------------------------------------------------------------- #
+# Cluster options
+# ---------------------------------------------------------------------------- #
+__C.CLUSTER = AttrDict()
+
+# Flag to indicate if the code is running in a cluster environment
+__C.CLUSTER.ON_CLUSTER = False
+
+
+# ---------------------------------------------------------------------------- #
+# Deprecated options
+# If an option is removed from the code and you don't want to break existing
+# yaml configs, you can add the full config key as a string to the set below.
+# ---------------------------------------------------------------------------- #
+_DEPRECATED_KEYS = set(
+    {
+        'FINAL_MSG',
+        'MODEL.DILATION',
+        'ROOT_GPU_ID',
+        'RPN.ON',
+        'TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED',
+        'TRAIN.DROPOUT',
+        'USE_GPU_NMS',
+        'TEST.NUM_TEST_IMAGES',
+    }
+)
+
+
+# ---------------------------------------------------------------------------- #
+# Renamed options
+# If you rename a config option, record the mapping from the old name to the new
+# name in the dictionary below. Optionally, if the type also changed, you can
+# make the value a tuple that specifies first the renamed key and then
+# instructions for how to edit the config file.
+# ---------------------------------------------------------------------------- #
+_RENAMED_KEYS = {
+    'EXAMPLE.RENAMED.KEY': 'EXAMPLE.KEY',  # Dummy example to follow
+    'MODEL.PS_GRID_SIZE': 'RFCN.PS_GRID_SIZE',
+    'MODEL.ROI_HEAD': 'FAST_RCNN.ROI_BOX_HEAD',
+    'MRCNN.MASK_HEAD_NAME': 'MRCNN.ROI_MASK_HEAD',
+    'TRAIN.DATASET': (
+        'TRAIN.DATASETS',
+        "Also convert to a tuple, e.g., " +
+        "'coco_2014_train' -> ('coco_2014_train',) or " +
+        "'coco_2014_train:coco_2014_valminusminival' -> " +
+        "('coco_2014_train', 'coco_2014_valminusminival')"
+    ),
+    'TRAIN.PROPOSAL_FILE': (
+        'TRAIN.PROPOSAL_FILES',
+        "Also convert to a tuple, e.g., " +
+        "'path/to/file' -> ('path/to/file',) or " +
+        "'path/to/file1:path/to/file2' -> " +
+        "('path/to/file1', 'path/to/file2')"
+    ),
+    'TEST.SCALES': (
+        'TEST.SCALE',
+        "Also convert from a tuple, e.g. (600, ), " +
+        "to a integer, e.g. 600."
+    ),
+    'TEST.DATASET': (
+        'TEST.DATASETS',
+        "Also convert from a string, e.g 'coco_2014_minival', " +
+        "to a tuple, e.g. ('coco_2014_minival', )."
+    ),
+    'TEST.PROPOSAL_FILE': (
+        'TEST.PROPOSAL_FILES',
+        "Also convert from a string, e.g. '/path/to/props.pkl', " +
+        "to a tuple, e.g. ('/path/to/props.pkl', )."
+    ),
+}
+
+
+# ---------------------------------------------------------------------------- #
+# Renamed modules
+# If a module containing a data structure used in the config (e.g. AttrDict)
+# is renamed/moved and you don't want to break loading of existing yaml configs
+# (e.g. from weights files) you can specify the renamed module below.
+# ---------------------------------------------------------------------------- #
+_RENAMED_MODULES = {
+    'utils.collections': 'detectron.utils.collections',
+}
+
+
+def assert_and_infer_cfg(cache_urls=True, make_immutable=True):
+    """Call this function in your script after you have finished setting all cfg
+    values that are necessary (e.g., merging a config from a file, merging
+    command line config options, etc.). By default, this function will also
+    mark the global cfg as immutable to prevent changing the global cfg settings
+    during script execution (which can lead to hard to debug errors or code
+    that's harder to understand than is necessary).
+    """
+    if __C.MODEL.RPN_ONLY or __C.MODEL.FASTER_RCNN:
+        __C.RPN.RPN_ON = True
+    if __C.RPN.RPN_ON or __C.RETINANET.RETINANET_ON:
+        __C.TEST.PRECOMPUTED_PROPOSALS = False
+    if cache_urls:
+        cache_cfg_urls()
+    if make_immutable:
+        cfg.immutable(True)
+
+
+def cache_cfg_urls():
+    """Download URLs in the config, cache them locally, and rewrite cfg to make
+    use of the locally cached file.
+    """
+    __C.TRAIN.WEIGHTS = cache_url(__C.TRAIN.WEIGHTS, __C.DOWNLOAD_CACHE)
+    __C.TEST.WEIGHTS = cache_url(__C.TEST.WEIGHTS, __C.DOWNLOAD_CACHE)
+    __C.TRAIN.PROPOSAL_FILES = tuple(
+        cache_url(f, __C.DOWNLOAD_CACHE) for f in __C.TRAIN.PROPOSAL_FILES
+    )
+    __C.TEST.PROPOSAL_FILES = tuple(
+        cache_url(f, __C.DOWNLOAD_CACHE) for f in __C.TEST.PROPOSAL_FILES
+    )
+
+
+def get_output_dir(datasets, training=True):
+    """Get the output directory determined by the current global config."""
+    assert isinstance(datasets, tuple([tuple, list] + list(six.string_types))), \
+        'datasets argument must be of type tuple, list or string'
+    is_string = isinstance(datasets, six.string_types)
+    dataset_name = datasets if is_string else ':'.join(datasets)
+    tag = 'train' if training else 'test'
+    # <output-dir>/<train|test>/<dataset-name>/<model-type>/
+    outdir = osp.join(__C.OUTPUT_DIR, tag, dataset_name, __C.MODEL.TYPE)
+    if not osp.exists(outdir):
+        os.makedirs(outdir)
+    return outdir
+
+
+def load_cfg(cfg_to_load):
+    """Wrapper around yaml.load used for maintaining backward compatibility"""
+    file_types = [file, io.IOBase] if six.PY2 else [io.IOBase]  # noqa false positive
+    expected_types = tuple(file_types + list(six.string_types))
+    assert isinstance(cfg_to_load, expected_types), \
+        'Expected one of {}, got {}'.format(expected_types, type(cfg_to_load))
+    if isinstance(cfg_to_load, tuple(file_types)):
+        cfg_to_load = ''.join(cfg_to_load.readlines())
+    for old_module, new_module in iteritems(_RENAMED_MODULES):
+        # yaml object encoding: !!python/object/new:<module>.<object>
+        old_module, new_module = 'new:' + old_module, 'new:' + new_module
+        cfg_to_load = cfg_to_load.replace(old_module, new_module)
+    # Import inline due to a circular dependency between env.py and config.py
+    import detectron.utils.env as envu
+    return envu.yaml_load(cfg_to_load)
+
+
+def merge_cfg_from_file(cfg_filename):
+    """Load a yaml config file and merge it into the global config."""
+    with open(cfg_filename, 'r') as f:
+        yaml_cfg = AttrDict(load_cfg(f))
+    _merge_a_into_b(yaml_cfg, __C)
+
+
+def merge_cfg_from_cfg(cfg_other):
+    """Merge `cfg_other` into the global config."""
+    _merge_a_into_b(cfg_other, __C)
+
+
+def merge_cfg_from_list(cfg_list):
+    """Merge config keys, values in a list (e.g., from command line) into the
+    global config. For example, `cfg_list = ['TEST.NMS', 0.5]`.
+    """
+    assert len(cfg_list) % 2 == 0
+    for full_key, v in zip(cfg_list[0::2], cfg_list[1::2]):
+        if _key_is_deprecated(full_key):
+            continue
+        if _key_is_renamed(full_key):
+            _raise_key_rename_error(full_key)
+        key_list = full_key.split('.')
+        d = __C
+        for subkey in key_list[:-1]:
+            assert subkey in d, 'Non-existent key: {}'.format(full_key)
+            d = d[subkey]
+        subkey = key_list[-1]
+        assert subkey in d, 'Non-existent key: {}'.format(full_key)
+        value = _decode_cfg_value(v)
+        value = _check_and_coerce_cfg_value_type(
+            value, d[subkey], subkey, full_key
+        )
+        d[subkey] = value
+
+
+def _merge_a_into_b(a, b, stack=None):
+    """Merge config dictionary a into config dictionary b, clobbering the
+    options in b whenever they are also specified in a.
+    """
+    assert isinstance(a, AttrDict), \
+        '`a` (cur type {}) must be an instance of {}'.format(type(a), AttrDict)
+    assert isinstance(b, AttrDict), \
+        '`b` (cur type {}) must be an instance of {}'.format(type(b), AttrDict)
+
+    for k, v_ in a.items():
+        full_key = '.'.join(stack) + '.' + k if stack is not None else k
+        # a must specify keys that are in b
+        if k not in b:
+            if _key_is_deprecated(full_key):
+                continue
+            elif _key_is_renamed(full_key):
+                _raise_key_rename_error(full_key)
+            else:
+                raise KeyError('Non-existent config key: {}'.format(full_key))
+
+        v = copy.deepcopy(v_)
+        v = _decode_cfg_value(v)
+        v = _check_and_coerce_cfg_value_type(v, b[k], k, full_key)
+
+        # Recursively merge dicts
+        if isinstance(v, AttrDict):
+            try:
+                stack_push = [k] if stack is None else stack + [k]
+                _merge_a_into_b(v, b[k], stack=stack_push)
+            except BaseException:
+                raise
+        else:
+            b[k] = v
+
+
+def _key_is_deprecated(full_key):
+    if full_key in _DEPRECATED_KEYS:
+        logger.warn(
+            'Deprecated config key (ignoring): {}'.format(full_key)
+        )
+        return True
+    return False
+
+
+def _key_is_renamed(full_key):
+    return full_key in _RENAMED_KEYS
+
+
+def _raise_key_rename_error(full_key):
+    new_key = _RENAMED_KEYS[full_key]
+    if isinstance(new_key, tuple):
+        msg = ' Note: ' + new_key[1]
+        new_key = new_key[0]
+    else:
+        msg = ''
+    raise KeyError(
+        'Key {} was renamed to {}; please update your config.{}'.
+        format(full_key, new_key, msg)
+    )
+
+
+def _decode_cfg_value(v):
+    """Decodes a raw config value (e.g., from a yaml config files or command
+    line argument) into a Python object.
+    """
+    # Configs parsed from raw yaml will contain dictionary keys that need to be
+    # converted to AttrDict objects
+    if isinstance(v, dict):
+        return AttrDict(v)
+    # All remaining processing is only applied to strings
+    if not isinstance(v, six.string_types):
+        return v
+    # Try to interpret `v` as a:
+    #   string, number, tuple, list, dict, boolean, or None
+    try:
+        v = literal_eval(v)
+    # The following two excepts allow v to pass through when it represents a
+    # string.
+    #
+    # Longer explanation:
+    # The type of v is always a string (before calling literal_eval), but
+    # sometimes it *represents* a string and other times a data structure, like
+    # a list. In the case that v represents a string, what we got back from the
+    # yaml parser is 'foo' *without quotes* (so, not '"foo"'). literal_eval is
+    # ok with '"foo"', but will raise a ValueError if given 'foo'. In other
+    # cases, like paths (v = 'foo/bar' and not v = '"foo/bar"'), literal_eval
+    # will raise a SyntaxError.
+    except ValueError:
+        pass
+    except SyntaxError:
+        pass
+    return v
+
+
+def _check_and_coerce_cfg_value_type(value_a, value_b, key, full_key):
+    """Checks that `value_a`, which is intended to replace `value_b` is of the
+    right type. The type is correct if it matches exactly or is one of a few
+    cases in which the type can be easily coerced.
+    """
+    # The types must match (with some exceptions)
+    type_b = type(value_b)
+    type_a = type(value_a)
+    if type_a is type_b:
+        return value_a
+
+    # Exceptions: numpy arrays, strings, tuple<->list
+    if isinstance(value_b, np.ndarray):
+        value_a = np.array(value_a, dtype=value_b.dtype)
+    elif isinstance(value_b, six.string_types):
+        value_a = str(value_a)
+    elif isinstance(value_a, tuple) and isinstance(value_b, list):
+        value_a = list(value_a)
+    elif isinstance(value_a, list) and isinstance(value_b, tuple):
+        value_a = tuple(value_a)
+    else:
+        raise ValueError(
+            'Type mismatch ({} vs. {}) with values ({} vs. {}) for config '
+            'key: {}'.format(type_b, type_a, value_b, value_a, full_key)
+        )
+    return value_a
diff --git a/detectron/core/rpn_generator.py b/detectron/core/rpn_generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..d15819f68c354b618cf3e13b9055d247d8aa39fb
--- /dev/null
+++ b/detectron/core/rpn_generator.py
@@ -0,0 +1,279 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+#
+# Based on:
+# --------------------------------------------------------
+# Faster R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+
+"""Functions for RPN proposal generation."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import cv2
+import datetime
+import logging
+import numpy as np
+import os
+
+from caffe2.python import core
+from caffe2.python import workspace
+
+from detectron.core.config import cfg
+from detectron.datasets import task_evaluation
+from detectron.datasets.json_dataset import JsonDataset
+from detectron.modeling import model_builder
+from detectron.utils.io import save_object
+from detectron.utils.timer import Timer
+import detectron.utils.blob as blob_utils
+import detectron.utils.c2 as c2_utils
+import detectron.utils.env as envu
+import detectron.utils.net as nu
+import detectron.utils.subprocess as subprocess_utils
+
+logger = logging.getLogger(__name__)
+
+
+def generate_rpn_on_dataset(
+    weights_file,
+    dataset_name,
+    _proposal_file_ignored,
+    output_dir,
+    multi_gpu=False,
+    gpu_id=0
+):
+    """Run inference on a dataset."""
+    dataset = JsonDataset(dataset_name)
+    test_timer = Timer()
+    test_timer.tic()
+    if multi_gpu:
+        num_images = len(dataset.get_roidb())
+        _boxes, _scores, _ids, rpn_file = multi_gpu_generate_rpn_on_dataset(
+            weights_file, dataset_name, _proposal_file_ignored, num_images,
+            output_dir
+        )
+    else:
+        # Processes entire dataset range by default
+        _boxes, _scores, _ids, rpn_file = generate_rpn_on_range(
+            weights_file,
+            dataset_name,
+            _proposal_file_ignored,
+            output_dir,
+            gpu_id=gpu_id
+        )
+    test_timer.toc()
+    logger.info('Total inference time: {:.3f}s'.format(test_timer.average_time))
+    return evaluate_proposal_file(dataset, rpn_file, output_dir)
+
+
+def multi_gpu_generate_rpn_on_dataset(
+    weights_file, dataset_name, _proposal_file_ignored, num_images, output_dir
+):
+    """Multi-gpu inference on a dataset."""
+    # Retrieve the test_net binary path
+    binary_dir = envu.get_runtime_dir()
+    binary_ext = envu.get_py_bin_ext()
+    binary = os.path.join(binary_dir, 'test_net' + binary_ext)
+    assert os.path.exists(binary), 'Binary \'{}\' not found'.format(binary)
+
+    # Pass the target dataset via the command line
+    opts = ['TEST.DATASETS', '("{}",)'.format(dataset_name)]
+    opts += ['TEST.WEIGHTS', weights_file]
+
+    # Run inference in parallel in subprocesses
+    outputs = subprocess_utils.process_in_parallel(
+        'rpn_proposals', num_images, binary, output_dir, opts
+    )
+
+    # Collate the results from each subprocess
+    boxes, scores, ids = [], [], []
+    for rpn_data in outputs:
+        boxes += rpn_data['boxes']
+        scores += rpn_data['scores']
+        ids += rpn_data['ids']
+    rpn_file = os.path.join(output_dir, 'rpn_proposals.pkl')
+    cfg_yaml = envu.yaml_dump(cfg)
+    save_object(
+        dict(boxes=boxes, scores=scores, ids=ids, cfg=cfg_yaml), rpn_file
+    )
+    logger.info('Wrote RPN proposals to {}'.format(os.path.abspath(rpn_file)))
+    return boxes, scores, ids, rpn_file
+
+
+def generate_rpn_on_range(
+    weights_file,
+    dataset_name,
+    _proposal_file_ignored,
+    output_dir,
+    ind_range=None,
+    gpu_id=0
+):
+    """Run inference on all images in a dataset or over an index range of images
+    in a dataset using a single GPU.
+    """
+    assert cfg.MODEL.RPN_ONLY or cfg.MODEL.FASTER_RCNN
+
+    roidb, start_ind, end_ind, total_num_images = get_roidb(
+        dataset_name, ind_range
+    )
+    logger.info(
+        'Output will be saved to: {:s}'.format(os.path.abspath(output_dir))
+    )
+
+    model = model_builder.create(cfg.MODEL.TYPE, train=False, gpu_id=gpu_id)
+    nu.initialize_gpu_from_weights_file(
+        model, weights_file, gpu_id=gpu_id,
+    )
+    model_builder.add_inference_inputs(model)
+    workspace.CreateNet(model.net)
+
+    boxes, scores, ids = generate_proposals_on_roidb(
+        model,
+        roidb,
+        start_ind=start_ind,
+        end_ind=end_ind,
+        total_num_images=total_num_images,
+        gpu_id=gpu_id,
+    )
+
+    cfg_yaml = envu.yaml_dump(cfg)
+    if ind_range is not None:
+        rpn_name = 'rpn_proposals_range_%s_%s.pkl' % tuple(ind_range)
+    else:
+        rpn_name = 'rpn_proposals.pkl'
+    rpn_file = os.path.join(output_dir, rpn_name)
+    save_object(
+        dict(boxes=boxes, scores=scores, ids=ids, cfg=cfg_yaml), rpn_file
+    )
+    logger.info('Wrote RPN proposals to {}'.format(os.path.abspath(rpn_file)))
+    return boxes, scores, ids, rpn_file
+
+
+def generate_proposals_on_roidb(
+    model, roidb, start_ind=None, end_ind=None, total_num_images=None,
+    gpu_id=0,
+):
+    """Generate RPN proposals on all images in an imdb."""
+    _t = Timer()
+    num_images = len(roidb)
+    roidb_boxes = [[] for _ in range(num_images)]
+    roidb_scores = [[] for _ in range(num_images)]
+    roidb_ids = [[] for _ in range(num_images)]
+    if start_ind is None:
+        start_ind = 0
+        end_ind = num_images
+        total_num_images = num_images
+    for i in range(num_images):
+        roidb_ids[i] = roidb[i]['id']
+        im = cv2.imread(roidb[i]['image'])
+        with c2_utils.NamedCudaScope(gpu_id):
+            _t.tic()
+            roidb_boxes[i], roidb_scores[i] = im_proposals(model, im)
+            _t.toc()
+        if i % 10 == 0:
+            ave_time = _t.average_time
+            eta_seconds = ave_time * (num_images - i - 1)
+            eta = str(datetime.timedelta(seconds=int(eta_seconds)))
+            logger.info(
+                (
+                    'rpn_generate: range [{:d}, {:d}] of {:d}: '
+                    '{:d}/{:d} {:.3f}s (eta: {})'
+                ).format(
+                    start_ind + 1, end_ind, total_num_images, start_ind + i + 1,
+                    start_ind + num_images, ave_time, eta
+                )
+            )
+
+    return roidb_boxes, roidb_scores, roidb_ids
+
+
+def im_proposals(model, im):
+    """Generate RPN proposals on a single image."""
+    inputs = {}
+    inputs['data'], im_scale, inputs['im_info'] = \
+        blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE)
+    for k, v in inputs.items():
+        workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False))
+    workspace.RunNet(model.net.Proto().name)
+
+    if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:
+        k_max = cfg.FPN.RPN_MAX_LEVEL
+        k_min = cfg.FPN.RPN_MIN_LEVEL
+        rois_names = [
+            core.ScopedName('rpn_rois_fpn' + str(l))
+            for l in range(k_min, k_max + 1)
+        ]
+        score_names = [
+            core.ScopedName('rpn_roi_probs_fpn' + str(l))
+            for l in range(k_min, k_max + 1)
+        ]
+        blobs = workspace.FetchBlobs(rois_names + score_names)
+        # Combine predictions across all levels and retain the top scoring
+        boxes = np.concatenate(blobs[:len(rois_names)])
+        scores = np.concatenate(blobs[len(rois_names):]).squeeze()
+        # Discussion: one could do NMS again after combining predictions from
+        # the different FPN levels. Conceptually, it's probably the right thing
+        # to do. For arbitrary reasons, the original FPN RPN implementation did
+        # not do another round of NMS.
+        inds = np.argsort(-scores)[:cfg.TEST.RPN_POST_NMS_TOP_N]
+        scores = scores[inds]
+        boxes = boxes[inds, :]
+    else:
+        boxes, scores = workspace.FetchBlobs(
+            [core.ScopedName('rpn_rois'),
+             core.ScopedName('rpn_roi_probs')]
+        )
+        scores = scores.squeeze()
+
+    # Column 0 is the batch index in the (batch ind, x1, y1, x2, y2) encoding,
+    # so we remove it since we just want to return boxes
+    # Scale proposals back to the original input image scale
+    boxes = boxes[:, 1:] / im_scale
+    return boxes, scores
+
+
+def get_roidb(dataset_name, ind_range):
+    """Get the roidb for the dataset specified in the global cfg. Optionally
+    restrict it to a range of indices if ind_range is a pair of integers.
+    """
+    dataset = JsonDataset(dataset_name)
+    roidb = dataset.get_roidb()
+
+    if ind_range is not None:
+        total_num_images = len(roidb)
+        start, end = ind_range
+        roidb = roidb[start:end]
+    else:
+        start = 0
+        end = len(roidb)
+        total_num_images = end
+
+    return roidb, start, end, total_num_images
+
+
+def evaluate_proposal_file(dataset, proposal_file, output_dir):
+    """Evaluate box proposal average recall."""
+    roidb = dataset.get_roidb(gt=True, proposal_file=proposal_file)
+    results = task_evaluation.evaluate_box_proposals(dataset, roidb)
+    task_evaluation.log_box_proposal_results(results)
+    recall_file = os.path.join(output_dir, 'rpn_proposal_recall.pkl')
+    save_object(results, recall_file)
+    return results
diff --git a/detectron/core/test.py b/detectron/core/test.py
new file mode 100644
index 0000000000000000000000000000000000000000..3f9980681f2aaaf1fe93bb545478cdbfa1a90879
--- /dev/null
+++ b/detectron/core/test.py
@@ -0,0 +1,949 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+#
+# Based on:
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+
+"""Inference functionality for most Detectron models."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from collections import defaultdict
+import cv2
+import logging
+import numpy as np
+
+from caffe2.python import core
+from caffe2.python import workspace
+import pycocotools.mask as mask_util
+
+from detectron.core.config import cfg
+from detectron.utils.timer import Timer
+import detectron.core.test_retinanet as test_retinanet
+import detectron.modeling.FPN as fpn
+import detectron.utils.blob as blob_utils
+import detectron.utils.boxes as box_utils
+import detectron.utils.image as image_utils
+import detectron.utils.keypoints as keypoint_utils
+
+logger = logging.getLogger(__name__)
+
+
+def im_detect_all(model, im, box_proposals, timers=None):
+    if timers is None:
+        timers = defaultdict(Timer)
+
+    # Handle RetinaNet testing separately for now
+    if cfg.RETINANET.RETINANET_ON:
+        cls_boxes = test_retinanet.im_detect_bbox(model, im, timers)
+        return cls_boxes, None, None
+
+    timers['im_detect_bbox'].tic()
+    if cfg.TEST.BBOX_AUG.ENABLED:
+        scores, boxes, im_scale = im_detect_bbox_aug(model, im, box_proposals)
+    else:
+        scores, boxes, im_scale = im_detect_bbox(
+            model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes=box_proposals
+        )
+    timers['im_detect_bbox'].toc()
+
+    # score and boxes are from the whole image after score thresholding and nms
+    # (they are not separated by class)
+    # cls_boxes boxes and scores are separated by class and in the format used
+    # for evaluating results
+    timers['misc_bbox'].tic()
+    scores, boxes, cls_boxes = box_results_with_nms_and_limit(scores, boxes)
+    timers['misc_bbox'].toc()
+
+    if cfg.MODEL.MASK_ON and boxes.shape[0] > 0:
+        timers['im_detect_mask'].tic()
+        if cfg.TEST.MASK_AUG.ENABLED:
+            masks = im_detect_mask_aug(model, im, boxes)
+        else:
+            masks = im_detect_mask(model, im_scale, boxes)
+        timers['im_detect_mask'].toc()
+
+        timers['misc_mask'].tic()
+        cls_segms = segm_results(
+            cls_boxes, masks, boxes, im.shape[0], im.shape[1]
+        )
+        timers['misc_mask'].toc()
+    else:
+        cls_segms = None
+
+    if cfg.MODEL.KEYPOINTS_ON and boxes.shape[0] > 0:
+        timers['im_detect_keypoints'].tic()
+        if cfg.TEST.KPS_AUG.ENABLED:
+            heatmaps = im_detect_keypoints_aug(model, im, boxes)
+        else:
+            heatmaps = im_detect_keypoints(model, im_scale, boxes)
+        timers['im_detect_keypoints'].toc()
+
+        timers['misc_keypoints'].tic()
+        cls_keyps = keypoint_results(cls_boxes, heatmaps, boxes)
+        timers['misc_keypoints'].toc()
+    else:
+        cls_keyps = None
+
+    return cls_boxes, cls_segms, cls_keyps
+
+
+def im_conv_body_only(model, im, target_scale, target_max_size):
+    """Runs `model.conv_body_net` on the given image `im`."""
+    im_blob, im_scale, _im_info = blob_utils.get_image_blob(
+        im, target_scale, target_max_size
+    )
+    workspace.FeedBlob(core.ScopedName('data'), im_blob)
+    workspace.RunNet(model.conv_body_net.Proto().name)
+    return im_scale
+
+
+def im_detect_bbox(model, im, target_scale, target_max_size, boxes=None):
+    """Bounding box object detection for an image with given box proposals.
+
+    Arguments:
+        model (DetectionModelHelper): the detection model to use
+        im (ndarray): color image to test (in BGR order)
+        boxes (ndarray): R x 4 array of object proposals in 0-indexed
+            [x1, y1, x2, y2] format, or None if using RPN
+
+    Returns:
+        scores (ndarray): R x K array of object class scores for K classes
+            (K includes background as object category 0)
+        boxes (ndarray): R x 4*K array of predicted bounding boxes
+        im_scales (list): list of image scales used in the input blob (as
+            returned by _get_blobs and for use with im_detect_mask, etc.)
+    """
+    inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size)
+
+    # When mapping from image ROIs to feature map ROIs, there's some aliasing
+    # (some distinct image ROIs get mapped to the same feature ROI).
+    # Here, we identify duplicate feature ROIs, so we only compute features
+    # on the unique subset.
+    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
+        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
+        hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v)
+        _, index, inv_index = np.unique(
+            hashes, return_index=True, return_inverse=True
+        )
+        inputs['rois'] = inputs['rois'][index, :]
+        boxes = boxes[index, :]
+
+    # Add multi-level rois for FPN
+    if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN:
+        _add_multilevel_rois_for_test(inputs, 'rois')
+
+    for k, v in inputs.items():
+        workspace.FeedBlob(core.ScopedName(k), v)
+    workspace.RunNet(model.net.Proto().name)
+
+    # Read out blobs
+    if cfg.MODEL.FASTER_RCNN:
+        rois = workspace.FetchBlob(core.ScopedName('rois'))
+        # unscale back to raw image space
+        boxes = rois[:, 1:5] / im_scale
+
+    # Softmax class probabilities
+    scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze()
+    # In case there is 1 proposal
+    scores = scores.reshape([-1, scores.shape[-1]])
+
+    if cfg.TEST.BBOX_REG:
+        # Apply bounding-box regression deltas
+        box_deltas = workspace.FetchBlob(core.ScopedName('bbox_pred')).squeeze()
+        # In case there is 1 proposal
+        box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]])
+        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
+            # Remove predictions for bg class (compat with MSRA code)
+            box_deltas = box_deltas[:, -4:]
+        pred_boxes = box_utils.bbox_transform(
+            boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS
+        )
+        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
+        if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
+            pred_boxes = np.tile(pred_boxes, (1, scores.shape[1]))
+    else:
+        # Simply repeat the boxes, once for each class
+        pred_boxes = np.tile(boxes, (1, scores.shape[1]))
+
+    if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
+        # Map scores and predictions back to the original set of boxes
+        scores = scores[inv_index, :]
+        pred_boxes = pred_boxes[inv_index, :]
+
+    return scores, pred_boxes, im_scale
+
+
+def im_detect_bbox_aug(model, im, box_proposals=None):
+    """Performs bbox detection with test-time augmentations.
+    Function signature is the same as for im_detect_bbox.
+    """
+    assert not cfg.TEST.BBOX_AUG.SCALE_SIZE_DEP, \
+        'Size dependent scaling not implemented'
+    assert not cfg.TEST.BBOX_AUG.SCORE_HEUR == 'UNION' or \
+        cfg.TEST.BBOX_AUG.COORD_HEUR == 'UNION', \
+        'Coord heuristic must be union whenever score heuristic is union'
+    assert not cfg.TEST.BBOX_AUG.COORD_HEUR == 'UNION' or \
+        cfg.TEST.BBOX_AUG.SCORE_HEUR == 'UNION', \
+        'Score heuristic must be union whenever coord heuristic is union'
+    assert not cfg.MODEL.FASTER_RCNN or \
+        cfg.TEST.BBOX_AUG.SCORE_HEUR == 'UNION', \
+        'Union heuristic must be used to combine Faster RCNN predictions'
+
+    # Collect detections computed under different transformations
+    scores_ts = []
+    boxes_ts = []
+
+    def add_preds_t(scores_t, boxes_t):
+        scores_ts.append(scores_t)
+        boxes_ts.append(boxes_t)
+
+    # Perform detection on the horizontally flipped image
+    if cfg.TEST.BBOX_AUG.H_FLIP:
+        scores_hf, boxes_hf, _ = im_detect_bbox_hflip(
+            model,
+            im,
+            cfg.TEST.SCALE,
+            cfg.TEST.MAX_SIZE,
+            box_proposals=box_proposals
+        )
+        add_preds_t(scores_hf, boxes_hf)
+
+    # Compute detections at different scales
+    for scale in cfg.TEST.BBOX_AUG.SCALES:
+        max_size = cfg.TEST.BBOX_AUG.MAX_SIZE
+        scores_scl, boxes_scl = im_detect_bbox_scale(
+            model, im, scale, max_size, box_proposals
+        )
+        add_preds_t(scores_scl, boxes_scl)
+
+        if cfg.TEST.BBOX_AUG.SCALE_H_FLIP:
+            scores_scl_hf, boxes_scl_hf = im_detect_bbox_scale(
+                model, im, scale, max_size, box_proposals, hflip=True
+            )
+            add_preds_t(scores_scl_hf, boxes_scl_hf)
+
+    # Perform detection at different aspect ratios
+    for aspect_ratio in cfg.TEST.BBOX_AUG.ASPECT_RATIOS:
+        scores_ar, boxes_ar = im_detect_bbox_aspect_ratio(
+            model, im, aspect_ratio, box_proposals
+        )
+        add_preds_t(scores_ar, boxes_ar)
+
+        if cfg.TEST.BBOX_AUG.ASPECT_RATIO_H_FLIP:
+            scores_ar_hf, boxes_ar_hf = im_detect_bbox_aspect_ratio(
+                model, im, aspect_ratio, box_proposals, hflip=True
+            )
+            add_preds_t(scores_ar_hf, boxes_ar_hf)
+
+    # Compute detections for the original image (identity transform) last to
+    # ensure that the Caffe2 workspace is populated with blobs corresponding
+    # to the original image on return (postcondition of im_detect_bbox)
+    scores_i, boxes_i, im_scale_i = im_detect_bbox(
+        model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes=box_proposals
+    )
+    add_preds_t(scores_i, boxes_i)
+
+    # Combine the predicted scores
+    if cfg.TEST.BBOX_AUG.SCORE_HEUR == 'ID':
+        scores_c = scores_i
+    elif cfg.TEST.BBOX_AUG.SCORE_HEUR == 'AVG':
+        scores_c = np.mean(scores_ts, axis=0)
+    elif cfg.TEST.BBOX_AUG.SCORE_HEUR == 'UNION':
+        scores_c = np.vstack(scores_ts)
+    else:
+        raise NotImplementedError(
+            'Score heur {} not supported'.format(cfg.TEST.BBOX_AUG.SCORE_HEUR)
+        )
+
+    # Combine the predicted boxes
+    if cfg.TEST.BBOX_AUG.COORD_HEUR == 'ID':
+        boxes_c = boxes_i
+    elif cfg.TEST.BBOX_AUG.COORD_HEUR == 'AVG':
+        boxes_c = np.mean(boxes_ts, axis=0)
+    elif cfg.TEST.BBOX_AUG.COORD_HEUR == 'UNION':
+        boxes_c = np.vstack(boxes_ts)
+    else:
+        raise NotImplementedError(
+            'Coord heur {} not supported'.format(cfg.TEST.BBOX_AUG.COORD_HEUR)
+        )
+
+    return scores_c, boxes_c, im_scale_i
+
+
+def im_detect_bbox_hflip(
+    model, im, target_scale, target_max_size, box_proposals=None
+):
+    """Performs bbox detection on the horizontally flipped image.
+    Function signature is the same as for im_detect_bbox.
+    """
+    # Compute predictions on the flipped image
+    im_hf = im[:, ::-1, :]
+    im_width = im.shape[1]
+
+    if not cfg.MODEL.FASTER_RCNN:
+        box_proposals_hf = box_utils.flip_boxes(box_proposals, im_width)
+    else:
+        box_proposals_hf = None
+
+    scores_hf, boxes_hf, im_scale = im_detect_bbox(
+        model, im_hf, target_scale, target_max_size, boxes=box_proposals_hf
+    )
+
+    # Invert the detections computed on the flipped image
+    boxes_inv = box_utils.flip_boxes(boxes_hf, im_width)
+
+    return scores_hf, boxes_inv, im_scale
+
+
+def im_detect_bbox_scale(
+    model, im, target_scale, target_max_size, box_proposals=None, hflip=False
+):
+    """Computes bbox detections at the given scale.
+    Returns predictions in the original image space.
+    """
+    if hflip:
+        scores_scl, boxes_scl, _ = im_detect_bbox_hflip(
+            model, im, target_scale, target_max_size, box_proposals=box_proposals
+        )
+    else:
+        scores_scl, boxes_scl, _ = im_detect_bbox(
+            model, im, target_scale, target_max_size, boxes=box_proposals
+        )
+    return scores_scl, boxes_scl
+
+
+def im_detect_bbox_aspect_ratio(
+    model, im, aspect_ratio, box_proposals=None, hflip=False
+):
+    """Computes bbox detections at the given width-relative aspect ratio.
+    Returns predictions in the original image space.
+    """
+    # Compute predictions on the transformed image
+    im_ar = image_utils.aspect_ratio_rel(im, aspect_ratio)
+
+    if not cfg.MODEL.FASTER_RCNN:
+        box_proposals_ar = box_utils.aspect_ratio(box_proposals, aspect_ratio)
+    else:
+        box_proposals_ar = None
+
+    if hflip:
+        scores_ar, boxes_ar, _ = im_detect_bbox_hflip(
+            model,
+            im_ar,
+            cfg.TEST.SCALE,
+            cfg.TEST.MAX_SIZE,
+            box_proposals=box_proposals_ar
+        )
+    else:
+        scores_ar, boxes_ar, _ = im_detect_bbox(
+            model,
+            im_ar,
+            cfg.TEST.SCALE,
+            cfg.TEST.MAX_SIZE,
+            boxes=box_proposals_ar
+        )
+
+    # Invert the detected boxes
+    boxes_inv = box_utils.aspect_ratio(boxes_ar, 1.0 / aspect_ratio)
+
+    return scores_ar, boxes_inv
+
+
+def im_detect_mask(model, im_scale, boxes):
+    """Infer instance segmentation masks. This function must be called after
+    im_detect_bbox as it assumes that the Caffe2 workspace is already populated
+    with the necessary blobs.
+
+    Arguments:
+        model (DetectionModelHelper): the detection model to use
+        im_scales (list): image blob scales as returned by im_detect_bbox
+        boxes (ndarray): R x 4 array of bounding box detections (e.g., as
+            returned by im_detect_bbox)
+
+    Returns:
+        pred_masks (ndarray): R x K x M x M array of class specific soft masks
+            output by the network (must be processed by segm_results to convert
+            into hard masks in the original image coordinate space)
+    """
+    M = cfg.MRCNN.RESOLUTION
+    if boxes.shape[0] == 0:
+        pred_masks = np.zeros((0, M, M), np.float32)
+        return pred_masks
+
+    inputs = {'mask_rois': _get_rois_blob(boxes, im_scale)}
+    # Add multi-level rois for FPN
+    if cfg.FPN.MULTILEVEL_ROIS:
+        _add_multilevel_rois_for_test(inputs, 'mask_rois')
+
+    for k, v in inputs.items():
+        workspace.FeedBlob(core.ScopedName(k), v)
+    workspace.RunNet(model.mask_net.Proto().name)
+
+    # Fetch masks
+    pred_masks = workspace.FetchBlob(
+        core.ScopedName('mask_fcn_probs')
+    ).squeeze()
+
+    if cfg.MRCNN.CLS_SPECIFIC_MASK:
+        pred_masks = pred_masks.reshape([-1, cfg.MODEL.NUM_CLASSES, M, M])
+    else:
+        pred_masks = pred_masks.reshape([-1, 1, M, M])
+
+    return pred_masks
+
+
+def im_detect_mask_aug(model, im, boxes):
+    """Performs mask detection with test-time augmentations.
+
+    Arguments:
+        model (DetectionModelHelper): the detection model to use
+        im (ndarray): BGR image to test
+        boxes (ndarray): R x 4 array of bounding boxes
+
+    Returns:
+        masks (ndarray): R x K x M x M array of class specific soft masks
+    """
+    assert not cfg.TEST.MASK_AUG.SCALE_SIZE_DEP, \
+        'Size dependent scaling not implemented'
+
+    # Collect masks computed under different transformations
+    masks_ts = []
+
+    # Compute masks for the original image (identity transform)
+    im_scale_i = im_conv_body_only(model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE)
+    masks_i = im_detect_mask(model, im_scale_i, boxes)
+    masks_ts.append(masks_i)
+
+    # Perform mask detection on the horizontally flipped image
+    if cfg.TEST.MASK_AUG.H_FLIP:
+        masks_hf = im_detect_mask_hflip(
+            model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes
+        )
+        masks_ts.append(masks_hf)
+
+    # Compute detections at different scales
+    for scale in cfg.TEST.MASK_AUG.SCALES:
+        max_size = cfg.TEST.MASK_AUG.MAX_SIZE
+        masks_scl = im_detect_mask_scale(model, im, scale, max_size, boxes)
+        masks_ts.append(masks_scl)
+
+        if cfg.TEST.MASK_AUG.SCALE_H_FLIP:
+            masks_scl_hf = im_detect_mask_scale(
+                model, im, scale, max_size, boxes, hflip=True
+            )
+            masks_ts.append(masks_scl_hf)
+
+    # Compute masks at different aspect ratios
+    for aspect_ratio in cfg.TEST.MASK_AUG.ASPECT_RATIOS:
+        masks_ar = im_detect_mask_aspect_ratio(model, im, aspect_ratio, boxes)
+        masks_ts.append(masks_ar)
+
+        if cfg.TEST.MASK_AUG.ASPECT_RATIO_H_FLIP:
+            masks_ar_hf = im_detect_mask_aspect_ratio(
+                model, im, aspect_ratio, boxes, hflip=True
+            )
+            masks_ts.append(masks_ar_hf)
+
+    # Combine the predicted soft masks
+    if cfg.TEST.MASK_AUG.HEUR == 'SOFT_AVG':
+        masks_c = np.mean(masks_ts, axis=0)
+    elif cfg.TEST.MASK_AUG.HEUR == 'SOFT_MAX':
+        masks_c = np.amax(masks_ts, axis=0)
+    elif cfg.TEST.MASK_AUG.HEUR == 'LOGIT_AVG':
+
+        def logit(y):
+            return -1.0 * np.log((1.0 - y) / np.maximum(y, 1e-20))
+
+        logit_masks = [logit(y) for y in masks_ts]
+        logit_masks = np.mean(logit_masks, axis=0)
+        masks_c = 1.0 / (1.0 + np.exp(-logit_masks))
+    else:
+        raise NotImplementedError(
+            'Heuristic {} not supported'.format(cfg.TEST.MASK_AUG.HEUR)
+        )
+
+    return masks_c
+
+
+def im_detect_mask_hflip(model, im, target_scale, target_max_size, boxes):
+    """Performs mask detection on the horizontally flipped image.
+    Function signature is the same as for im_detect_mask_aug.
+    """
+    # Compute the masks for the flipped image
+    im_hf = im[:, ::-1, :]
+    boxes_hf = box_utils.flip_boxes(boxes, im.shape[1])
+
+    im_scale = im_conv_body_only(model, im_hf, target_scale, target_max_size)
+    masks_hf = im_detect_mask(model, im_scale, boxes_hf)
+
+    # Invert the predicted soft masks
+    masks_inv = masks_hf[:, :, :, ::-1]
+
+    return masks_inv
+
+
+def im_detect_mask_scale(
+    model, im, target_scale, target_max_size, boxes, hflip=False
+):
+    """Computes masks at the given scale."""
+    if hflip:
+        masks_scl = im_detect_mask_hflip(
+            model, im, target_scale, target_max_size, boxes
+        )
+    else:
+        im_scale = im_conv_body_only(model, im, target_scale, target_max_size)
+        masks_scl = im_detect_mask(model, im_scale, boxes)
+    return masks_scl
+
+
+def im_detect_mask_aspect_ratio(model, im, aspect_ratio, boxes, hflip=False):
+    """Computes mask detections at the given width-relative aspect ratio."""
+
+    # Perform mask detection on the transformed image
+    im_ar = image_utils.aspect_ratio_rel(im, aspect_ratio)
+    boxes_ar = box_utils.aspect_ratio(boxes, aspect_ratio)
+
+    if hflip:
+        masks_ar = im_detect_mask_hflip(
+            model, im_ar, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes_ar
+        )
+    else:
+        im_scale = im_conv_body_only(
+            model, im_ar, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE
+        )
+        masks_ar = im_detect_mask(model, im_scale, boxes_ar)
+
+    return masks_ar
+
+
+def im_detect_keypoints(model, im_scale, boxes):
+    """Infer instance keypoint poses. This function must be called after
+    im_detect_bbox as it assumes that the Caffe2 workspace is already populated
+    with the necessary blobs.
+
+    Arguments:
+        model (DetectionModelHelper): the detection model to use
+        im_scales (list): image blob scales as returned by im_detect_bbox
+        boxes (ndarray): R x 4 array of bounding box detections (e.g., as
+            returned by im_detect_bbox)
+
+    Returns:
+        pred_heatmaps (ndarray): R x J x M x M array of keypoint location
+            logits (softmax inputs) for each of the J keypoint types output
+            by the network (must be processed by keypoint_results to convert
+            into point predictions in the original image coordinate space)
+    """
+    M = cfg.KRCNN.HEATMAP_SIZE
+    if boxes.shape[0] == 0:
+        pred_heatmaps = np.zeros((0, cfg.KRCNN.NUM_KEYPOINTS, M, M), np.float32)
+        return pred_heatmaps
+
+    inputs = {'keypoint_rois': _get_rois_blob(boxes, im_scale)}
+
+    # Add multi-level rois for FPN
+    if cfg.FPN.MULTILEVEL_ROIS:
+        _add_multilevel_rois_for_test(inputs, 'keypoint_rois')
+
+    for k, v in inputs.items():
+        workspace.FeedBlob(core.ScopedName(k), v)
+    workspace.RunNet(model.keypoint_net.Proto().name)
+
+    pred_heatmaps = workspace.FetchBlob(core.ScopedName('kps_score')).squeeze()
+
+    # In case of 1
+    if pred_heatmaps.ndim == 3:
+        pred_heatmaps = np.expand_dims(pred_heatmaps, axis=0)
+
+    return pred_heatmaps
+
+
+def im_detect_keypoints_aug(model, im, boxes):
+    """Computes keypoint predictions with test-time augmentations.
+
+    Arguments:
+        model (DetectionModelHelper): the detection model to use
+        im (ndarray): BGR image to test
+        boxes (ndarray): R x 4 array of bounding boxes
+
+    Returns:
+        heatmaps (ndarray): R x J x M x M array of keypoint location logits
+    """
+
+    # Collect heatmaps predicted under different transformations
+    heatmaps_ts = []
+    # Tag predictions computed under downscaling and upscaling transformations
+    ds_ts = []
+    us_ts = []
+
+    def add_heatmaps_t(heatmaps_t, ds_t=False, us_t=False):
+        heatmaps_ts.append(heatmaps_t)
+        ds_ts.append(ds_t)
+        us_ts.append(us_t)
+
+    # Compute the heatmaps for the original image (identity transform)
+    im_scale = im_conv_body_only(model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE)
+    heatmaps_i = im_detect_keypoints(model, im_scale, boxes)
+    add_heatmaps_t(heatmaps_i)
+
+    # Perform keypoints detection on the horizontally flipped image
+    if cfg.TEST.KPS_AUG.H_FLIP:
+        heatmaps_hf = im_detect_keypoints_hflip(
+            model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes
+        )
+        add_heatmaps_t(heatmaps_hf)
+
+    # Compute detections at different scales
+    for scale in cfg.TEST.KPS_AUG.SCALES:
+        ds_scl = scale < cfg.TEST.SCALE
+        us_scl = scale > cfg.TEST.SCALE
+        heatmaps_scl = im_detect_keypoints_scale(
+            model, im, scale, cfg.TEST.KPS_AUG.MAX_SIZE, boxes
+        )
+        add_heatmaps_t(heatmaps_scl, ds_scl, us_scl)
+
+        if cfg.TEST.KPS_AUG.SCALE_H_FLIP:
+            heatmaps_scl_hf = im_detect_keypoints_scale(
+                model, im, scale, cfg.TEST.KPS_AUG.MAX_SIZE, boxes, hflip=True
+            )
+            add_heatmaps_t(heatmaps_scl_hf, ds_scl, us_scl)
+
+    # Compute keypoints at different aspect ratios
+    for aspect_ratio in cfg.TEST.KPS_AUG.ASPECT_RATIOS:
+        heatmaps_ar = im_detect_keypoints_aspect_ratio(
+            model, im, aspect_ratio, boxes
+        )
+        add_heatmaps_t(heatmaps_ar)
+
+        if cfg.TEST.KPS_AUG.ASPECT_RATIO_H_FLIP:
+            heatmaps_ar_hf = im_detect_keypoints_aspect_ratio(
+                model, im, aspect_ratio, boxes, hflip=True
+            )
+            add_heatmaps_t(heatmaps_ar_hf)
+
+    # Select the heuristic function for combining the heatmaps
+    if cfg.TEST.KPS_AUG.HEUR == 'HM_AVG':
+        np_f = np.mean
+    elif cfg.TEST.KPS_AUG.HEUR == 'HM_MAX':
+        np_f = np.amax
+    else:
+        raise NotImplementedError(
+            'Heuristic {} not supported'.format(cfg.TEST.KPS_AUG.HEUR)
+        )
+
+    def heur_f(hms_ts):
+        return np_f(hms_ts, axis=0)
+
+    # Combine the heatmaps
+    if cfg.TEST.KPS_AUG.SCALE_SIZE_DEP:
+        heatmaps_c = combine_heatmaps_size_dep(
+            heatmaps_ts, ds_ts, us_ts, boxes, heur_f
+        )
+    else:
+        heatmaps_c = heur_f(heatmaps_ts)
+
+    return heatmaps_c
+
+
+def im_detect_keypoints_hflip(model, im, target_scale, target_max_size, boxes):
+    """Computes keypoint predictions on the horizontally flipped image.
+    Function signature is the same as for im_detect_keypoints_aug.
+    """
+    # Compute keypoints for the flipped image
+    im_hf = im[:, ::-1, :]
+    boxes_hf = box_utils.flip_boxes(boxes, im.shape[1])
+
+    im_scale = im_conv_body_only(model, im_hf, target_scale, target_max_size)
+    heatmaps_hf = im_detect_keypoints(model, im_scale, boxes_hf)
+
+    # Invert the predicted keypoints
+    heatmaps_inv = keypoint_utils.flip_heatmaps(heatmaps_hf)
+
+    return heatmaps_inv
+
+
+def im_detect_keypoints_scale(
+    model, im, target_scale, target_max_size, boxes, hflip=False
+):
+    """Computes keypoint predictions at the given scale."""
+    if hflip:
+        heatmaps_scl = im_detect_keypoints_hflip(
+            model, im, target_scale, target_max_size, boxes
+        )
+    else:
+        im_scale = im_conv_body_only(model, im, target_scale, target_max_size)
+        heatmaps_scl = im_detect_keypoints(model, im_scale, boxes)
+    return heatmaps_scl
+
+
+def im_detect_keypoints_aspect_ratio(
+    model, im, aspect_ratio, boxes, hflip=False
+):
+    """Detects keypoints at the given width-relative aspect ratio."""
+
+    # Perform keypoint detectionon the transformed image
+    im_ar = image_utils.aspect_ratio_rel(im, aspect_ratio)
+    boxes_ar = box_utils.aspect_ratio(boxes, aspect_ratio)
+
+    if hflip:
+        heatmaps_ar = im_detect_keypoints_hflip(
+            model, im_ar, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes_ar
+        )
+    else:
+        im_scale = im_conv_body_only(
+            model, im_ar, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE
+        )
+        heatmaps_ar = im_detect_keypoints(model, im_scale, boxes_ar)
+
+    return heatmaps_ar
+
+
+def combine_heatmaps_size_dep(hms_ts, ds_ts, us_ts, boxes, heur_f):
+    """Combines heatmaps while taking object sizes into account."""
+    assert len(hms_ts) == len(ds_ts) and len(ds_ts) == len(us_ts), \
+        'All sets of hms must be tagged with downscaling and upscaling flags'
+
+    # Classify objects into small+medium and large based on their box areas
+    areas = box_utils.boxes_area(boxes)
+    sm_objs = areas < cfg.TEST.KPS_AUG.AREA_TH
+    l_objs = areas >= cfg.TEST.KPS_AUG.AREA_TH
+
+    # Combine heatmaps computed under different transformations for each object
+    hms_c = np.zeros_like(hms_ts[0])
+
+    for i in range(hms_c.shape[0]):
+        hms_to_combine = []
+        for hms_t, ds_t, us_t in zip(hms_ts, ds_ts, us_ts):
+            # Discard downscaling predictions for small and medium objects
+            if sm_objs[i] and ds_t:
+                continue
+            # Discard upscaling predictions for large objects
+            if l_objs[i] and us_t:
+                continue
+            hms_to_combine.append(hms_t[i])
+        hms_c[i] = heur_f(hms_to_combine)
+
+    return hms_c
+
+
+def box_results_with_nms_and_limit(scores, boxes):
+    """Returns bounding-box detection results by thresholding on scores and
+    applying non-maximum suppression (NMS).
+
+    `boxes` has shape (#detections, 4 * #classes), where each row represents
+    a list of predicted bounding boxes for each of the object classes in the
+    dataset (including the background class). The detections in each row
+    originate from the same object proposal.
+
+    `scores` has shape (#detection, #classes), where each row represents a list
+    of object detection confidence scores for each of the object classes in the
+    dataset (including the background class). `scores[i, j]`` corresponds to the
+    box at `boxes[i, j * 4:(j + 1) * 4]`.
+    """
+    num_classes = cfg.MODEL.NUM_CLASSES
+    cls_boxes = [[] for _ in range(num_classes)]
+    # Apply threshold on detection probabilities and apply NMS
+    # Skip j = 0, because it's the background class
+    for j in range(1, num_classes):
+        inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0]
+        scores_j = scores[inds, j]
+        boxes_j = boxes[inds, j * 4:(j + 1) * 4]
+        dets_j = np.hstack((boxes_j, scores_j[:, np.newaxis])).astype(
+            np.float32, copy=False
+        )
+        if cfg.TEST.SOFT_NMS.ENABLED:
+            nms_dets, _ = box_utils.soft_nms(
+                dets_j,
+                sigma=cfg.TEST.SOFT_NMS.SIGMA,
+                overlap_thresh=cfg.TEST.NMS,
+                score_thresh=0.0001,
+                method=cfg.TEST.SOFT_NMS.METHOD
+            )
+        else:
+            keep = box_utils.nms(dets_j, cfg.TEST.NMS)
+            nms_dets = dets_j[keep, :]
+        # Refine the post-NMS boxes using bounding-box voting
+        if cfg.TEST.BBOX_VOTE.ENABLED:
+            nms_dets = box_utils.box_voting(
+                nms_dets,
+                dets_j,
+                cfg.TEST.BBOX_VOTE.VOTE_TH,
+                scoring_method=cfg.TEST.BBOX_VOTE.SCORING_METHOD
+            )
+        cls_boxes[j] = nms_dets
+
+    # Limit to max_per_image detections **over all classes**
+    if cfg.TEST.DETECTIONS_PER_IM > 0:
+        image_scores = np.hstack(
+            [cls_boxes[j][:, -1] for j in range(1, num_classes)]
+        )
+        if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM:
+            image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM]
+            for j in range(1, num_classes):
+                keep = np.where(cls_boxes[j][:, -1] >= image_thresh)[0]
+                cls_boxes[j] = cls_boxes[j][keep, :]
+
+    im_results = np.vstack([cls_boxes[j] for j in range(1, num_classes)])
+    boxes = im_results[:, :-1]
+    scores = im_results[:, -1]
+    return scores, boxes, cls_boxes
+
+
+def segm_results(cls_boxes, masks, ref_boxes, im_h, im_w):
+    num_classes = cfg.MODEL.NUM_CLASSES
+    cls_segms = [[] for _ in range(num_classes)]
+    mask_ind = 0
+    # To work around an issue with cv2.resize (it seems to automatically pad
+    # with repeated border values), we manually zero-pad the masks by 1 pixel
+    # prior to resizing back to the original image resolution. This prevents
+    # "top hat" artifacts. We therefore need to expand the reference boxes by an
+    # appropriate factor.
+    M = cfg.MRCNN.RESOLUTION
+    scale = (M + 2.0) / M
+    ref_boxes = box_utils.expand_boxes(ref_boxes, scale)
+    ref_boxes = ref_boxes.astype(np.int32)
+    padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)
+
+    # skip j = 0, because it's the background class
+    for j in range(1, num_classes):
+        segms = []
+        for _ in range(cls_boxes[j].shape[0]):
+            if cfg.MRCNN.CLS_SPECIFIC_MASK:
+                padded_mask[1:-1, 1:-1] = masks[mask_ind, j, :, :]
+            else:
+                padded_mask[1:-1, 1:-1] = masks[mask_ind, 0, :, :]
+
+            ref_box = ref_boxes[mask_ind, :]
+            w = ref_box[2] - ref_box[0] + 1
+            h = ref_box[3] - ref_box[1] + 1
+            w = np.maximum(w, 1)
+            h = np.maximum(h, 1)
+
+            mask = cv2.resize(padded_mask, (w, h))
+            mask = np.array(mask > cfg.MRCNN.THRESH_BINARIZE, dtype=np.uint8)
+            im_mask = np.zeros((im_h, im_w), dtype=np.uint8)
+
+            x_0 = max(ref_box[0], 0)
+            x_1 = min(ref_box[2] + 1, im_w)
+            y_0 = max(ref_box[1], 0)
+            y_1 = min(ref_box[3] + 1, im_h)
+
+            im_mask[y_0:y_1, x_0:x_1] = mask[
+                (y_0 - ref_box[1]):(y_1 - ref_box[1]),
+                (x_0 - ref_box[0]):(x_1 - ref_box[0])
+            ]
+
+            # Get RLE encoding used by the COCO evaluation API
+            rle = mask_util.encode(
+                np.array(im_mask[:, :, np.newaxis], order='F')
+            )[0]
+            segms.append(rle)
+
+            mask_ind += 1
+
+        cls_segms[j] = segms
+
+    assert mask_ind == masks.shape[0]
+    return cls_segms
+
+
+def keypoint_results(cls_boxes, pred_heatmaps, ref_boxes):
+    num_classes = cfg.MODEL.NUM_CLASSES
+    cls_keyps = [[] for _ in range(num_classes)]
+    person_idx = keypoint_utils.get_person_class_index()
+    xy_preds = keypoint_utils.heatmaps_to_keypoints(pred_heatmaps, ref_boxes)
+
+    # NMS OKS
+    if cfg.KRCNN.NMS_OKS:
+        keep = keypoint_utils.nms_oks(xy_preds, ref_boxes, 0.3)
+        xy_preds = xy_preds[keep, :, :]
+        ref_boxes = ref_boxes[keep, :]
+        pred_heatmaps = pred_heatmaps[keep, :, :, :]
+        cls_boxes[person_idx] = cls_boxes[person_idx][keep, :]
+
+    kps = [xy_preds[i] for i in range(xy_preds.shape[0])]
+    cls_keyps[person_idx] = kps
+    return cls_keyps
+
+
+def _get_rois_blob(im_rois, im_scale):
+    """Converts RoIs into network inputs.
+
+    Arguments:
+        im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates
+        im_scale_factors (list): scale factors as returned by _get_image_blob
+
+    Returns:
+        blob (ndarray): R x 5 matrix of RoIs in the image pyramid with columns
+            [level, x1, y1, x2, y2]
+    """
+    rois, levels = _project_im_rois(im_rois, im_scale)
+    rois_blob = np.hstack((levels, rois))
+    return rois_blob.astype(np.float32, copy=False)
+
+
+def _project_im_rois(im_rois, scales):
+    """Project image RoIs into the image pyramid built by _get_image_blob.
+
+    Arguments:
+        im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates
+        scales (list): scale factors as returned by _get_image_blob
+
+    Returns:
+        rois (ndarray): R x 4 matrix of projected RoI coordinates
+        levels (ndarray): image pyramid levels used by each projected RoI
+    """
+    rois = im_rois.astype(np.float, copy=False) * scales
+    levels = np.zeros((im_rois.shape[0], 1), dtype=np.int)
+    return rois, levels
+
+
+def _add_multilevel_rois_for_test(blobs, name):
+    """Distributes a set of RoIs across FPN pyramid levels by creating new level
+    specific RoI blobs.
+
+    Arguments:
+        blobs (dict): dictionary of blobs
+        name (str): a key in 'blobs' identifying the source RoI blob
+
+    Returns:
+        [by ref] blobs (dict): new keys named by `name + 'fpn' + level`
+            are added to dict each with a value that's an R_level x 5 ndarray of
+            RoIs (see _get_rois_blob for format)
+    """
+    lvl_min = cfg.FPN.ROI_MIN_LEVEL
+    lvl_max = cfg.FPN.ROI_MAX_LEVEL
+    lvls = fpn.map_rois_to_fpn_levels(blobs[name][:, 1:5], lvl_min, lvl_max)
+    fpn.add_multilevel_roi_blobs(
+        blobs, name, blobs[name], lvls, lvl_min, lvl_max
+    )
+
+
+def _get_blobs(im, rois, target_scale, target_max_size):
+    """Convert an image and RoIs within that image into network inputs."""
+    blobs = {}
+    blobs['data'], im_scale, blobs['im_info'] = \
+        blob_utils.get_image_blob(im, target_scale, target_max_size)
+    if rois is not None:
+        blobs['rois'] = _get_rois_blob(rois, im_scale)
+    return blobs, im_scale
diff --git a/detectron/core/test_engine.py b/detectron/core/test_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..f69bc20dd2d97a46db8ef5fb7617482b054c330b
--- /dev/null
+++ b/detectron/core/test_engine.py
@@ -0,0 +1,395 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Test a Detectron network on an imdb (image database)."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from collections import defaultdict
+import cv2
+import datetime
+import logging
+import numpy as np
+import os
+
+from caffe2.python import workspace
+
+from detectron.core.config import cfg
+from detectron.core.config import get_output_dir
+from detectron.core.rpn_generator import generate_rpn_on_dataset
+from detectron.core.rpn_generator import generate_rpn_on_range
+from detectron.core.test import im_detect_all
+from detectron.datasets import task_evaluation
+from detectron.datasets.json_dataset import JsonDataset
+from detectron.modeling import model_builder
+from detectron.utils.io import save_object
+from detectron.utils.timer import Timer
+import detectron.utils.c2 as c2_utils
+import detectron.utils.env as envu
+import detectron.utils.net as net_utils
+import detectron.utils.subprocess as subprocess_utils
+import detectron.utils.vis as vis_utils
+
+logger = logging.getLogger(__name__)
+
+
+def get_eval_functions():
+    # Determine which parent or child function should handle inference
+    if cfg.MODEL.RPN_ONLY:
+        child_func = generate_rpn_on_range
+        parent_func = generate_rpn_on_dataset
+    else:
+        # Generic case that handles all network types other than RPN-only nets
+        # and RetinaNet
+        child_func = test_net
+        parent_func = test_net_on_dataset
+
+    return parent_func, child_func
+
+
+def get_inference_dataset(index, is_parent=True):
+    assert is_parent or len(cfg.TEST.DATASETS) == 1, \
+        'The child inference process can only work on a single dataset'
+
+    dataset_name = cfg.TEST.DATASETS[index]
+
+    if cfg.TEST.PRECOMPUTED_PROPOSALS:
+        assert is_parent or len(cfg.TEST.PROPOSAL_FILES) == 1, \
+            'The child inference process can only work on a single proposal file'
+        assert len(cfg.TEST.PROPOSAL_FILES) == len(cfg.TEST.DATASETS), \
+            'If proposals are used, one proposal file must be specified for ' \
+            'each dataset'
+        proposal_file = cfg.TEST.PROPOSAL_FILES[index]
+    else:
+        proposal_file = None
+
+    return dataset_name, proposal_file
+
+
+def run_inference(
+    weights_file, ind_range=None,
+    multi_gpu_testing=False, gpu_id=0,
+    check_expected_results=False,
+):
+    parent_func, child_func = get_eval_functions()
+    is_parent = ind_range is None
+
+    def result_getter():
+        if is_parent:
+            # Parent case:
+            # In this case we're either running inference on the entire dataset in a
+            # single process or (if multi_gpu_testing is True) using this process to
+            # launch subprocesses that each run inference on a range of the dataset
+            all_results = {}
+            for i in range(len(cfg.TEST.DATASETS)):
+                dataset_name, proposal_file = get_inference_dataset(i)
+                output_dir = get_output_dir(dataset_name, training=False)
+                results = parent_func(
+                    weights_file,
+                    dataset_name,
+                    proposal_file,
+                    output_dir,
+                    multi_gpu=multi_gpu_testing
+                )
+                all_results.update(results)
+
+            return all_results
+        else:
+            # Subprocess child case:
+            # In this case test_net was called via subprocess.Popen to execute on a
+            # range of inputs on a single dataset
+            dataset_name, proposal_file = get_inference_dataset(0, is_parent=False)
+            output_dir = get_output_dir(dataset_name, training=False)
+            return child_func(
+                weights_file,
+                dataset_name,
+                proposal_file,
+                output_dir,
+                ind_range=ind_range,
+                gpu_id=gpu_id
+            )
+
+    all_results = result_getter()
+    if check_expected_results and is_parent:
+        task_evaluation.check_expected_results(
+            all_results,
+            atol=cfg.EXPECTED_RESULTS_ATOL,
+            rtol=cfg.EXPECTED_RESULTS_RTOL
+        )
+        task_evaluation.log_copy_paste_friendly_results(all_results)
+
+    return all_results
+
+
+def test_net_on_dataset(
+    weights_file,
+    dataset_name,
+    proposal_file,
+    output_dir,
+    multi_gpu=False,
+    gpu_id=0
+):
+    """Run inference on a dataset."""
+    dataset = JsonDataset(dataset_name)
+    test_timer = Timer()
+    test_timer.tic()
+    if multi_gpu:
+        num_images = len(dataset.get_roidb())
+        all_boxes, all_segms, all_keyps = multi_gpu_test_net_on_dataset(
+            weights_file, dataset_name, proposal_file, num_images, output_dir
+        )
+    else:
+        all_boxes, all_segms, all_keyps = test_net(
+            weights_file, dataset_name, proposal_file, output_dir, gpu_id=gpu_id
+        )
+    test_timer.toc()
+    logger.info('Total inference time: {:.3f}s'.format(test_timer.average_time))
+    results = task_evaluation.evaluate_all(
+        dataset, all_boxes, all_segms, all_keyps, output_dir
+    )
+    return results
+
+
+def multi_gpu_test_net_on_dataset(
+    weights_file, dataset_name, proposal_file, num_images, output_dir
+):
+    """Multi-gpu inference on a dataset."""
+    binary_dir = envu.get_runtime_dir()
+    binary_ext = envu.get_py_bin_ext()
+    binary = os.path.join(binary_dir, 'test_net' + binary_ext)
+    assert os.path.exists(binary), 'Binary \'{}\' not found'.format(binary)
+
+    # Pass the target dataset and proposal file (if any) via the command line
+    opts = ['TEST.DATASETS', '("{}",)'.format(dataset_name)]
+    opts += ['TEST.WEIGHTS', weights_file]
+    if proposal_file:
+        opts += ['TEST.PROPOSAL_FILES', '("{}",)'.format(proposal_file)]
+
+    # Run inference in parallel in subprocesses
+    # Outputs will be a list of outputs from each subprocess, where the output
+    # of each subprocess is the dictionary saved by test_net().
+    outputs = subprocess_utils.process_in_parallel(
+        'detection', num_images, binary, output_dir, opts
+    )
+
+    # Collate the results from each subprocess
+    all_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)]
+    all_segms = [[] for _ in range(cfg.MODEL.NUM_CLASSES)]
+    all_keyps = [[] for _ in range(cfg.MODEL.NUM_CLASSES)]
+    for det_data in outputs:
+        all_boxes_batch = det_data['all_boxes']
+        all_segms_batch = det_data['all_segms']
+        all_keyps_batch = det_data['all_keyps']
+        for cls_idx in range(1, cfg.MODEL.NUM_CLASSES):
+            all_boxes[cls_idx] += all_boxes_batch[cls_idx]
+            all_segms[cls_idx] += all_segms_batch[cls_idx]
+            all_keyps[cls_idx] += all_keyps_batch[cls_idx]
+    det_file = os.path.join(output_dir, 'detections.pkl')
+    cfg_yaml = envu.yaml_dump(cfg)
+    save_object(
+        dict(
+            all_boxes=all_boxes,
+            all_segms=all_segms,
+            all_keyps=all_keyps,
+            cfg=cfg_yaml
+        ), det_file
+    )
+    logger.info('Wrote detections to: {}'.format(os.path.abspath(det_file)))
+
+    return all_boxes, all_segms, all_keyps
+
+
+def test_net(
+    weights_file,
+    dataset_name,
+    proposal_file,
+    output_dir,
+    ind_range=None,
+    gpu_id=0
+):
+    """Run inference on all images in a dataset or over an index range of images
+    in a dataset using a single GPU.
+    """
+    assert not cfg.MODEL.RPN_ONLY, \
+        'Use rpn_generate to generate proposals from RPN-only models'
+
+    roidb, dataset, start_ind, end_ind, total_num_images = get_roidb_and_dataset(
+        dataset_name, proposal_file, ind_range
+    )
+    model = initialize_model_from_cfg(weights_file, gpu_id=gpu_id)
+    num_images = len(roidb)
+    num_classes = cfg.MODEL.NUM_CLASSES
+    all_boxes, all_segms, all_keyps = empty_results(num_classes, num_images)
+    timers = defaultdict(Timer)
+    for i, entry in enumerate(roidb):
+        if cfg.TEST.PRECOMPUTED_PROPOSALS:
+            # The roidb may contain ground-truth rois (for example, if the roidb
+            # comes from the training or val split). We only want to evaluate
+            # detection on the *non*-ground-truth rois. We select only the rois
+            # that have the gt_classes field set to 0, which means there's no
+            # ground truth.
+            box_proposals = entry['boxes'][entry['gt_classes'] == 0]
+            if len(box_proposals) == 0:
+                continue
+        else:
+            # Faster R-CNN type models generate proposals on-the-fly with an
+            # in-network RPN; 1-stage models don't require proposals.
+            box_proposals = None
+
+        im = cv2.imread(entry['image'])
+        with c2_utils.NamedCudaScope(gpu_id):
+            cls_boxes_i, cls_segms_i, cls_keyps_i = im_detect_all(
+                model, im, box_proposals, timers
+            )
+
+        extend_results(i, all_boxes, cls_boxes_i)
+        if cls_segms_i is not None:
+            extend_results(i, all_segms, cls_segms_i)
+        if cls_keyps_i is not None:
+            extend_results(i, all_keyps, cls_keyps_i)
+
+        if i % 10 == 0:  # Reduce log file size
+            ave_total_time = np.sum([t.average_time for t in timers.values()])
+            eta_seconds = ave_total_time * (num_images - i - 1)
+            eta = str(datetime.timedelta(seconds=int(eta_seconds)))
+            det_time = (
+                timers['im_detect_bbox'].average_time +
+                timers['im_detect_mask'].average_time +
+                timers['im_detect_keypoints'].average_time
+            )
+            misc_time = (
+                timers['misc_bbox'].average_time +
+                timers['misc_mask'].average_time +
+                timers['misc_keypoints'].average_time
+            )
+            logger.info(
+                (
+                    'im_detect: range [{:d}, {:d}] of {:d}: '
+                    '{:d}/{:d} {:.3f}s + {:.3f}s (eta: {})'
+                ).format(
+                    start_ind + 1, end_ind, total_num_images, start_ind + i + 1,
+                    start_ind + num_images, det_time, misc_time, eta
+                )
+            )
+
+        if cfg.VIS:
+            im_name = os.path.splitext(os.path.basename(entry['image']))[0]
+            vis_utils.vis_one_image(
+                im[:, :, ::-1],
+                '{:d}_{:s}'.format(i, im_name),
+                os.path.join(output_dir, 'vis'),
+                cls_boxes_i,
+                segms=cls_segms_i,
+                keypoints=cls_keyps_i,
+                thresh=cfg.VIS_TH,
+                box_alpha=0.8,
+                dataset=dataset,
+                show_class=True
+            )
+
+    cfg_yaml = envu.yaml_dump(cfg)
+    if ind_range is not None:
+        det_name = 'detection_range_%s_%s.pkl' % tuple(ind_range)
+    else:
+        det_name = 'detections.pkl'
+    det_file = os.path.join(output_dir, det_name)
+    save_object(
+        dict(
+            all_boxes=all_boxes,
+            all_segms=all_segms,
+            all_keyps=all_keyps,
+            cfg=cfg_yaml
+        ), det_file
+    )
+    logger.info('Wrote detections to: {}'.format(os.path.abspath(det_file)))
+    return all_boxes, all_segms, all_keyps
+
+
+def initialize_model_from_cfg(weights_file, gpu_id=0):
+    """Initialize a model from the global cfg. Loads test-time weights and
+    creates the networks in the Caffe2 workspace.
+    """
+    model = model_builder.create(cfg.MODEL.TYPE, train=False, gpu_id=gpu_id)
+    net_utils.initialize_gpu_from_weights_file(
+        model, weights_file, gpu_id=gpu_id,
+    )
+    model_builder.add_inference_inputs(model)
+    workspace.CreateNet(model.net)
+    workspace.CreateNet(model.conv_body_net)
+    if cfg.MODEL.MASK_ON:
+        workspace.CreateNet(model.mask_net)
+    if cfg.MODEL.KEYPOINTS_ON:
+        workspace.CreateNet(model.keypoint_net)
+    return model
+
+
+def get_roidb_and_dataset(dataset_name, proposal_file, ind_range):
+    """Get the roidb for the dataset specified in the global cfg. Optionally
+    restrict it to a range of indices if ind_range is a pair of integers.
+    """
+    dataset = JsonDataset(dataset_name)
+    if cfg.TEST.PRECOMPUTED_PROPOSALS:
+        assert proposal_file, 'No proposal file given'
+        roidb = dataset.get_roidb(
+            proposal_file=proposal_file,
+            proposal_limit=cfg.TEST.PROPOSAL_LIMIT
+        )
+    else:
+        roidb = dataset.get_roidb()
+
+    if ind_range is not None:
+        total_num_images = len(roidb)
+        start, end = ind_range
+        roidb = roidb[start:end]
+    else:
+        start = 0
+        end = len(roidb)
+        total_num_images = end
+
+    return roidb, dataset, start, end, total_num_images
+
+
+def empty_results(num_classes, num_images):
+    """Return empty results lists for boxes, masks, and keypoints.
+    Box detections are collected into:
+      all_boxes[cls][image] = N x 5 array with columns (x1, y1, x2, y2, score)
+    Instance mask predictions are collected into:
+      all_segms[cls][image] = [...] list of COCO RLE encoded masks that are in
+      1:1 correspondence with the boxes in all_boxes[cls][image]
+    Keypoint predictions are collected into:
+      all_keyps[cls][image] = [...] list of keypoints results, each encoded as
+      a 3D array (#rois, 4, #keypoints) with the 4 rows corresponding to
+      [x, y, logit, prob] (See: utils.keypoints.heatmaps_to_keypoints).
+      Keypoints are recorded for person (cls = 1); they are in 1:1
+      correspondence with the boxes in all_boxes[cls][image].
+    """
+    # Note: do not be tempted to use [[] * N], which gives N references to the
+    # *same* empty list.
+    all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
+    all_segms = [[[] for _ in range(num_images)] for _ in range(num_classes)]
+    all_keyps = [[[] for _ in range(num_images)] for _ in range(num_classes)]
+    return all_boxes, all_segms, all_keyps
+
+
+def extend_results(index, all_res, im_res):
+    """Add results for an image to the set of all results at the specified
+    index.
+    """
+    # Skip cls_idx 0 (__background__)
+    for cls_idx in range(1, len(im_res)):
+        all_res[cls_idx][index] = im_res[cls_idx]
diff --git a/detectron/core/test_retinanet.py b/detectron/core/test_retinanet.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c4e4f3e87e199338999180051ecd03295379ae4
--- /dev/null
+++ b/detectron/core/test_retinanet.py
@@ -0,0 +1,200 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Test a RetinaNet network on an image database"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import numpy as np
+import logging
+from collections import defaultdict
+
+from caffe2.python import core, workspace
+
+from detectron.core.config import cfg
+from detectron.modeling.generate_anchors import generate_anchors
+from detectron.utils.timer import Timer
+import detectron.utils.blob as blob_utils
+import detectron.utils.boxes as box_utils
+
+logger = logging.getLogger(__name__)
+
+
+def _create_cell_anchors():
+    """
+    Generate all types of anchors for all fpn levels/scales/aspect ratios.
+    This function is called only once at the beginning of inference.
+    """
+    k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
+    scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
+    aspect_ratios = cfg.RETINANET.ASPECT_RATIOS
+    anchor_scale = cfg.RETINANET.ANCHOR_SCALE
+    A = scales_per_octave * len(aspect_ratios)
+    anchors = {}
+    for lvl in range(k_min, k_max + 1):
+        # create cell anchors array
+        stride = 2. ** lvl
+        cell_anchors = np.zeros((A, 4))
+        a = 0
+        for octave in range(scales_per_octave):
+            octave_scale = 2 ** (octave / float(scales_per_octave))
+            for aspect in aspect_ratios:
+                anchor_sizes = (stride * octave_scale * anchor_scale, )
+                anchor_aspect_ratios = (aspect, )
+                cell_anchors[a, :] = generate_anchors(
+                    stride=stride, sizes=anchor_sizes,
+                    aspect_ratios=anchor_aspect_ratios)
+                a += 1
+        anchors[lvl] = cell_anchors
+    return anchors
+
+
+def im_detect_bbox(model, im, timers=None):
+    """Generate RetinaNet detections on a single image."""
+    if timers is None:
+        timers = defaultdict(Timer)
+    # Although anchors are input independent and could be precomputed,
+    # recomputing them per image only brings a small overhead
+    anchors = _create_cell_anchors()
+    timers['im_detect_bbox'].tic()
+    k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
+    A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS)
+    inputs = {}
+    inputs['data'], im_scale, inputs['im_info'] = \
+        blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE)
+    cls_probs, box_preds = [], []
+    for lvl in range(k_min, k_max + 1):
+        suffix = 'fpn{}'.format(lvl)
+        cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix)))
+        box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix)))
+    for k, v in inputs.items():
+        workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False))
+
+    workspace.RunNet(model.net.Proto().name)
+    cls_probs = workspace.FetchBlobs(cls_probs)
+    box_preds = workspace.FetchBlobs(box_preds)
+
+    # here the boxes_all are [x0, y0, x1, y1, score]
+    boxes_all = defaultdict(list)
+
+    cnt = 0
+    for lvl in range(k_min, k_max + 1):
+        # create cell anchors array
+        stride = 2. ** lvl
+        cell_anchors = anchors[lvl]
+
+        # fetch per level probability
+        cls_prob = cls_probs[cnt]
+        box_pred = box_preds[cnt]
+        cls_prob = cls_prob.reshape((
+            cls_prob.shape[0], A, int(cls_prob.shape[1] / A),
+            cls_prob.shape[2], cls_prob.shape[3]))
+        box_pred = box_pred.reshape((
+            box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3]))
+        cnt += 1
+
+        if cfg.RETINANET.SOFTMAX:
+            cls_prob = cls_prob[:, :, 1::, :, :]
+
+        cls_prob_ravel = cls_prob.ravel()
+        # In some cases [especially for very small img sizes], it's possible that
+        # candidate_ind is empty if we impose threshold 0.05 at all levels. This
+        # will lead to errors since no detections are found for this image. Hence,
+        # for lvl 7 which has small spatial resolution, we take the threshold 0.0
+        th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0
+        candidate_inds = np.where(cls_prob_ravel > th)[0]
+        if (len(candidate_inds) == 0):
+            continue
+
+        pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds))
+        inds = np.argpartition(
+            cls_prob_ravel[candidate_inds], -pre_nms_topn)[-pre_nms_topn:]
+        inds = candidate_inds[inds]
+
+        inds_5d = np.array(np.unravel_index(inds, cls_prob.shape)).transpose()
+        classes = inds_5d[:, 2]
+        anchor_ids, y, x = inds_5d[:, 1], inds_5d[:, 3], inds_5d[:, 4]
+        scores = cls_prob[:, anchor_ids, classes, y, x]
+
+        boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32)
+        boxes *= stride
+        boxes += cell_anchors[anchor_ids, :]
+
+        if not cfg.RETINANET.CLASS_SPECIFIC_BBOX:
+            box_deltas = box_pred[0, anchor_ids, :, y, x]
+        else:
+            box_cls_inds = classes * 4
+            box_deltas = np.vstack(
+                [box_pred[0, ind:ind + 4, yi, xi]
+                 for ind, yi, xi in zip(box_cls_inds, y, x)]
+            )
+        pred_boxes = (
+            box_utils.bbox_transform(boxes, box_deltas)
+            if cfg.TEST.BBOX_REG else boxes)
+        pred_boxes /= im_scale
+        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
+        box_scores = np.zeros((pred_boxes.shape[0], 5))
+        box_scores[:, 0:4] = pred_boxes
+        box_scores[:, 4] = scores
+
+        for cls in range(1, cfg.MODEL.NUM_CLASSES):
+            inds = np.where(classes == cls - 1)[0]
+            if len(inds) > 0:
+                boxes_all[cls].extend(box_scores[inds, :])
+    timers['im_detect_bbox'].toc()
+
+    # Combine predictions across all levels and retain the top scoring by class
+    timers['misc_bbox'].tic()
+    detections = []
+    for cls, boxes in boxes_all.items():
+        cls_dets = np.vstack(boxes).astype(dtype=np.float32)
+        # do class specific nms here
+        if cfg.TEST.SOFT_NMS.ENABLED:
+            cls_dets, keep = box_utils.soft_nms(
+                cls_dets,
+                sigma=cfg.TEST.SOFT_NMS.SIGMA,
+                overlap_thresh=cfg.TEST.NMS,
+                score_thresh=0.0001,
+                method=cfg.TEST.SOFT_NMS.METHOD
+            )
+        else:
+            keep = box_utils.nms(cls_dets, cfg.TEST.NMS)
+            cls_dets = cls_dets[keep, :]
+        out = np.zeros((len(keep), 6))
+        out[:, 0:5] = cls_dets
+        out[:, 5].fill(cls)
+        detections.append(out)
+
+    # detections (N, 6) format:
+    #   detections[:, :4] - boxes
+    #   detections[:, 4] - scores
+    #   detections[:, 5] - classes
+    detections = np.vstack(detections)
+    # sort all again
+    inds = np.argsort(-detections[:, 4])
+    detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :]
+
+    # Convert the detections to image cls_ format (see core/test_engine.py)
+    num_classes = cfg.MODEL.NUM_CLASSES
+    cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)]
+    for c in range(1, num_classes):
+        inds = np.where(detections[:, 5] == c)[0]
+        cls_boxes[c] = detections[inds, :5]
+    timers['misc_bbox'].toc()
+
+    return cls_boxes
diff --git a/detectron/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m b/detectron/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m
new file mode 100644
index 0000000000000000000000000000000000000000..629597a1f1d1f978f0065f7b57b4c9eae1957f87
--- /dev/null
+++ b/detectron/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m
@@ -0,0 +1,14 @@
+function VOCopts = get_voc_opts(path)
+
+tmp = pwd;
+cd(path);
+try
+  addpath('VOCcode');
+  VOCinit;
+catch
+  rmpath('VOCcode');
+  cd(tmp);
+  error(sprintf('VOCcode directory not found under %s', path));
+end
+rmpath('VOCcode');
+cd(tmp);
diff --git a/detectron/datasets/VOCdevkit-matlab-wrapper/voc_eval.m b/detectron/datasets/VOCdevkit-matlab-wrapper/voc_eval.m
new file mode 100644
index 0000000000000000000000000000000000000000..1911a0e39b91ba8e2a2c1157b8c93e5e65829141
--- /dev/null
+++ b/detectron/datasets/VOCdevkit-matlab-wrapper/voc_eval.m
@@ -0,0 +1,56 @@
+function res = voc_eval(path, comp_id, test_set, output_dir)
+
+VOCopts = get_voc_opts(path);
+VOCopts.testset = test_set;
+
+for i = 1:length(VOCopts.classes)
+  cls = VOCopts.classes{i};
+  res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir);
+end
+
+fprintf('\n~~~~~~~~~~~~~~~~~~~~\n');
+fprintf('Results:\n');
+aps = [res(:).ap]';
+fprintf('%.1f\n', aps * 100);
+fprintf('%.1f\n', mean(aps) * 100);
+fprintf('~~~~~~~~~~~~~~~~~~~~\n');
+
+function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir)
+
+test_set = VOCopts.testset;
+year = VOCopts.dataset(4:end);
+
+addpath(fullfile(VOCopts.datadir, 'VOCcode'));
+
+res_fn = sprintf(VOCopts.detrespath, comp_id, cls);
+
+recall = [];
+prec = [];
+ap = 0;
+ap_auc = 0;
+
+do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test');
+if do_eval
+  % Bug in VOCevaldet requires that tic has been called first
+  tic;
+  [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true);
+  ap_auc = xVOCap(recall, prec);
+
+  % force plot limits
+  ylim([0 1]);
+  xlim([0 1]);
+
+  print(gcf, '-djpeg', '-r0', ...
+        [output_dir '/' cls '_pr.jpg']);
+end
+fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc);
+
+res.recall = recall;
+res.prec = prec;
+res.ap = ap;
+res.ap_auc = ap_auc;
+
+save([output_dir '/' cls '_pr.mat'], ...
+     'res', 'recall', 'prec', 'ap', 'ap_auc');
+
+rmpath(fullfile(VOCopts.datadir, 'VOCcode'));
diff --git a/detectron/datasets/VOCdevkit-matlab-wrapper/xVOCap.m b/detectron/datasets/VOCdevkit-matlab-wrapper/xVOCap.m
new file mode 100644
index 0000000000000000000000000000000000000000..7e8024fd1400adcddbcffc988bbc99e2399b7781
--- /dev/null
+++ b/detectron/datasets/VOCdevkit-matlab-wrapper/xVOCap.m
@@ -0,0 +1,10 @@
+function ap = xVOCap(rec,prec)
+% From the PASCAL VOC 2011 devkit
+
+mrec=[0 ; rec ; 1];
+mpre=[0 ; prec ; 0];
+for i=numel(mpre)-1:-1:1
+    mpre(i)=max(mpre(i),mpre(i+1));
+end
+i=find(mrec(2:end)~=mrec(1:end-1))+1;
+ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
diff --git a/detectron/datasets/__init__.py b/detectron/datasets/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/detectron/datasets/cityscapes_json_dataset_evaluator.py b/detectron/datasets/cityscapes_json_dataset_evaluator.py
new file mode 100644
index 0000000000000000000000000000000000000000..af7dbccb023b206b7adde1f3459a9418b923fa3c
--- /dev/null
+++ b/detectron/datasets/cityscapes_json_dataset_evaluator.py
@@ -0,0 +1,95 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Functions for evaluating results on Cityscapes."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import cv2
+import logging
+import os
+import uuid
+
+import pycocotools.mask as mask_util
+
+from detectron.core.config import cfg
+from detectron.datasets.dataset_catalog import get_raw_dir
+
+logger = logging.getLogger(__name__)
+
+
+def evaluate_masks(
+    json_dataset,
+    all_boxes,
+    all_segms,
+    output_dir,
+    use_salt=True,
+    cleanup=False
+):
+    if cfg.CLUSTER.ON_CLUSTER:
+        # On the cluster avoid saving these files in the job directory
+        output_dir = '/tmp'
+    res_file = os.path.join(
+        output_dir, 'segmentations_' + json_dataset.name + '_results')
+    if use_salt:
+        res_file += '_{}'.format(str(uuid.uuid4()))
+    res_file += '.json'
+
+    results_dir = os.path.join(output_dir, 'results')
+    if not os.path.exists(results_dir):
+        os.mkdir(results_dir)
+
+    os.environ['CITYSCAPES_DATASET'] = get_raw_dir(json_dataset.name)
+    os.environ['CITYSCAPES_RESULTS'] = output_dir
+
+    # Load the Cityscapes eval script *after* setting the required env vars,
+    # since the script reads their values into global variables (at load time).
+    import cityscapesscripts.evaluation.evalInstanceLevelSemanticLabeling \
+        as cityscapes_eval
+
+    roidb = json_dataset.get_roidb()
+    for i, entry in enumerate(roidb):
+        im_name = entry['image']
+
+        basename = os.path.splitext(os.path.basename(im_name))[0]
+        txtname = os.path.join(output_dir, basename + 'pred.txt')
+        with open(txtname, 'w') as fid_txt:
+            if i % 10 == 0:
+                logger.info('i: {}: {}'.format(i, basename))
+            for j in range(1, len(all_segms)):
+                clss = json_dataset.classes[j]
+                clss_id = cityscapes_eval.name2label[clss].id
+                segms = all_segms[j][i]
+                boxes = all_boxes[j][i]
+                if segms == []:
+                    continue
+                masks = mask_util.decode(segms)
+
+                for k in range(boxes.shape[0]):
+                    score = boxes[k, -1]
+                    mask = masks[:, :, k]
+                    pngname = os.path.join(
+                        'results',
+                        basename + '_' + clss + '_{}.png'.format(k))
+                    # write txt
+                    fid_txt.write('{} {} {}\n'.format(pngname, clss_id, score))
+                    # save mask
+                    cv2.imwrite(os.path.join(output_dir, pngname), mask * 255)
+    logger.info('Evaluating...')
+    cityscapes_eval.main([])
+    return None
diff --git a/detectron/datasets/coco_to_cityscapes_id.py b/detectron/datasets/coco_to_cityscapes_id.py
new file mode 100644
index 0000000000000000000000000000000000000000..7bf56184f2b07adf3b22b08abfea1c985cd1c75a
--- /dev/null
+++ b/detectron/datasets/coco_to_cityscapes_id.py
@@ -0,0 +1,95 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+# mapping coco categories to cityscapes (our converted json) id
+# cityscapes
+# INFO roidb.py: 220: 1       bicycle: 7286
+# INFO roidb.py: 220: 2           car: 53684
+# INFO roidb.py: 220: 3        person: 35704
+# INFO roidb.py: 220: 4         train: 336
+# INFO roidb.py: 220: 5         truck: 964
+# INFO roidb.py: 220: 6    motorcycle: 1468
+# INFO roidb.py: 220: 7           bus: 758
+# INFO roidb.py: 220: 8         rider: 3504
+
+# coco (val5k)
+# INFO roidb.py: 220: 1        person: 21296
+# INFO roidb.py: 220: 2       bicycle: 628
+# INFO roidb.py: 220: 3           car: 3818
+# INFO roidb.py: 220: 4    motorcycle: 732
+# INFO roidb.py: 220: 5      airplane: 286 <------ irrelevant
+# INFO roidb.py: 220: 6           bus: 564
+# INFO roidb.py: 220: 7         train: 380
+# INFO roidb.py: 220: 8         truck: 828
+
+
+def cityscapes_to_coco(cityscapes_id):
+    lookup = {
+        0: 0,  # ... background
+        1: 2,  # bicycle
+        2: 3,  # car
+        3: 1,  # person
+        4: 7,  # train
+        5: 8,  # truck
+        6: 4,  # motorcycle
+        7: 6,  # bus
+        8: -1,  # rider (-1 means rand init)
+    }
+    return lookup[cityscapes_id]
+
+
+def cityscapes_to_coco_with_rider(cityscapes_id):
+    lookup = {
+        0: 0,  # ... background
+        1: 2,  # bicycle
+        2: 3,  # car
+        3: 1,  # person
+        4: 7,  # train
+        5: 8,  # truck
+        6: 4,  # motorcycle
+        7: 6,  # bus
+        8: 1,  # rider ("person", *rider has human right!*)
+    }
+    return lookup[cityscapes_id]
+
+
+def cityscapes_to_coco_without_person_rider(cityscapes_id):
+    lookup = {
+        0: 0,  # ... background
+        1: 2,  # bicycle
+        2: 3,  # car
+        3: -1,  # person (ignore)
+        4: 7,  # train
+        5: 8,  # truck
+        6: 4,  # motorcycle
+        7: 6,  # bus
+        8: -1,  # rider (ignore)
+    }
+    return lookup[cityscapes_id]
+
+
+def cityscapes_to_coco_all_random(cityscapes_id):
+    lookup = {
+        0: -1,  # ... background
+        1: -1,  # bicycle
+        2: -1,  # car
+        3: -1,  # person (ignore)
+        4: -1,  # train
+        5: -1,  # truck
+        6: -1,  # motorcycle
+        7: -1,  # bus
+        8: -1,  # rider (ignore)
+    }
+    return lookup[cityscapes_id]
diff --git a/detectron/datasets/data/README.md b/detectron/datasets/data/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64b201a81457d4d2a9e1d91ef553616f507ea3a3
--- /dev/null
+++ b/detectron/datasets/data/README.md
@@ -0,0 +1,103 @@
+# Setting Up Datasets
+
+This directory contains symlinks to data locations.
+
+## Creating Symlinks for COCO
+
+Symlink the COCO dataset:
+
+```
+ln -s /path/to/coco $DETECTRON/detectron/datasets/data/coco
+```
+
+We assume that your local COCO dataset copy at `/path/to/coco` has the following directory structure:
+
+```
+coco
+|_ coco_train2014
+|  |_ <im-1-name>.jpg
+|  |_ ...
+|  |_ <im-N-name>.jpg
+|_ coco_val2014
+|_ ...
+|_ annotations
+   |_ instances_train2014.json
+   |_ ...
+```
+
+If that is not the case, you may need to do something similar to:
+
+```
+mkdir -p $DETECTRON/detectron/datasets/data/coco
+ln -s /path/to/coco_train2014 $DETECTRON/detectron/datasets/data/coco/coco_train2014
+ln -s /path/to/coco_val2014 $DETECTRON/detectron/datasets/data/coco/coco_val2014
+ln -s /path/to/json/annotations $DETECTRON/detectron/datasets/data/coco/annotations
+```
+
+### COCO Minival Annotations
+
+Our custom `minival` and `valminusminival` annotations are available for download [here](https://dl.fbaipublicfiles.com/detectron/coco/coco_annotations_minival.tgz).
+Please note that `minival` is exactly equivalent to the recently defined 2017 `val` set.
+Similarly, the union of `valminusminival` and the 2014 `train` is exactly equivalent to the 2017 `train` set. To complete installation of the COCO dataset, you will need to copy the `minival` and `valminusminival` json annotation files to the `coco/annotations` directory referenced above.
+
+## Creating Symlinks for PASCAL VOC
+
+We assume that your symlinked `detectron/datasets/data/VOC<year>` directory has the following structure:
+
+```
+VOC<year>
+|_ JPEGImages
+|  |_ <im-1-name>.jpg
+|  |_ ...
+|  |_ <im-N-name>.jpg
+|_ annotations
+|  |_ voc_<year>_train.json
+|  |_ voc_<year>_val.json
+|  |_ ...
+|_ VOCdevkit<year>
+```
+
+Create symlinks for `VOC<year>`:
+
+```
+mkdir -p $DETECTRON/detectron/datasets/data/VOC<year>
+ln -s /path/to/VOC<year>/JPEGImages $DETECTRON/detectron/datasets/data/VOC<year>/JPEGImages
+ln -s /path/to/VOC<year>/json/annotations $DETECTRON/detectron/datasets/data/VOC<year>/annotations
+ln -s /path/to/VOC<year>/devkit $DETECTRON/detectron/datasets/data/VOC<year>/VOCdevkit<year>
+```
+
+### PASCAL VOC Annotations in COCO Format
+
+We expect PASCAL VOC annotations converted to COCO json format, which are available for download [here](https://storage.googleapis.com/coco-dataset/external/PASCAL_VOC.zip ).
+
+## Creating Symlinks for Cityscapes:
+
+We assume that your symlinked `detectron/datasets/data/cityscapes` directory has the following structure:
+
+```
+cityscapes
+|_ images
+|  |_ <im-1-name>.jpg
+|  |_ ...
+|  |_ <im-N-name>.jpg
+|_ annotations
+|  |_ instanceonly_gtFile_train.json
+|  |_ ...
+|_ raw
+   |_ gtFine
+   |_ ...
+   |_ README.md
+```
+
+Create symlinks for `cityscapes`:
+
+```
+mkdir -p $DETECTRON/detectron/datasets/data/cityscapes
+ln -s /path/to/cityscapes/images $DETECTRON/detectron/datasets/data/cityscapes/images
+ln -s /path/to/cityscapes/json/annotations $DETECTRON/detectron/datasets/data/cityscapes/annotations
+ln -s /path/to/cityscapes/root $DETECTRON/detectron/datasets/data/cityscapes/raw
+```
+
+### Cityscapes Annotations in COCO Format
+
+We expect Cityscapes annotations converted to COCO json format, which we will make available for download soon.
diff --git a/detectron/datasets/dataset_catalog.py b/detectron/datasets/dataset_catalog.py
new file mode 100644
index 0000000000000000000000000000000000000000..b92487e6ebf83f7de0d4fdddf163d2c99adc67cc
--- /dev/null
+++ b/detectron/datasets/dataset_catalog.py
@@ -0,0 +1,240 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Collection of available datasets."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import os
+
+
+# Path to data dir
+_DATA_DIR = os.path.join(os.path.dirname(__file__), 'data')
+
+# Required dataset entry keys
+_IM_DIR = 'image_directory'
+_ANN_FN = 'annotation_file'
+
+# Optional dataset entry keys
+_IM_PREFIX = 'image_prefix'
+_DEVKIT_DIR = 'devkit_directory'
+_RAW_DIR = 'raw_dir'
+
+# Available datasets
+_DATASETS = {
+    'cityscapes_fine_instanceonly_seg_train': {
+        _IM_DIR:
+            _DATA_DIR + '/cityscapes/images',
+        _ANN_FN:
+            _DATA_DIR + '/cityscapes/annotations/instancesonly_gtFine_train.json',
+        _RAW_DIR:
+            _DATA_DIR + '/cityscapes/raw'
+    },
+    'cityscapes_fine_instanceonly_seg_val': {
+        _IM_DIR:
+            _DATA_DIR + '/cityscapes/images',
+        # use filtered validation as there is an issue converting contours
+        _ANN_FN:
+            _DATA_DIR + '/cityscapes/annotations/instancesonly_filtered_gtFine_val.json',
+        _RAW_DIR:
+            _DATA_DIR + '/cityscapes/raw'
+    },
+    'cityscapes_fine_instanceonly_seg_test': {
+        _IM_DIR:
+            _DATA_DIR + '/cityscapes/images',
+        _ANN_FN:
+            _DATA_DIR + '/cityscapes/annotations/instancesonly_gtFine_test.json',
+        _RAW_DIR:
+            _DATA_DIR + '/cityscapes/raw'
+    },
+    'coco_2014_train': {
+        _IM_DIR:
+            _DATA_DIR + '/coco/coco_train2014',
+        _ANN_FN:
+            _DATA_DIR + '/coco/annotations/instances_train2014.json'
+    },
+    'coco_2014_val': {
+        _IM_DIR:
+            _DATA_DIR + '/coco/coco_val2014',
+        _ANN_FN:
+            _DATA_DIR + '/coco/annotations/instances_val2014.json'
+    },
+    'coco_2014_minival': {
+        _IM_DIR:
+            _DATA_DIR + '/coco/coco_val2014',
+        _ANN_FN:
+            _DATA_DIR + '/coco/annotations/instances_minival2014.json'
+    },
+    'coco_2014_valminusminival': {
+        _IM_DIR:
+            _DATA_DIR + '/coco/coco_val2014',
+        _ANN_FN:
+            _DATA_DIR + '/coco/annotations/instances_valminusminival2014.json'
+    },
+    'coco_2015_test': {
+        _IM_DIR:
+            _DATA_DIR + '/coco/coco_test2015',
+        _ANN_FN:
+            _DATA_DIR + '/coco/annotations/image_info_test2015.json'
+    },
+    'coco_2015_test-dev': {
+        _IM_DIR:
+            _DATA_DIR + '/coco/coco_test2015',
+        _ANN_FN:
+            _DATA_DIR + '/coco/annotations/image_info_test-dev2015.json'
+    },
+    'coco_2017_test': {  # 2017 test uses 2015 test images
+        _IM_DIR:
+            _DATA_DIR + '/coco/coco_test2015',
+        _ANN_FN:
+            _DATA_DIR + '/coco/annotations/image_info_test2017.json',
+        _IM_PREFIX:
+            'COCO_test2015_'
+    },
+    'coco_2017_test-dev': {  # 2017 test-dev uses 2015 test images
+        _IM_DIR:
+            _DATA_DIR + '/coco/coco_test2015',
+        _ANN_FN:
+            _DATA_DIR + '/coco/annotations/image_info_test-dev2017.json',
+        _IM_PREFIX:
+            'COCO_test2015_'
+    },
+    'coco_stuff_train': {
+        _IM_DIR:
+            _DATA_DIR + '/coco/coco_train2014',
+        _ANN_FN:
+            _DATA_DIR + '/coco/annotations/coco_stuff_train.json'
+    },
+    'coco_stuff_val': {
+        _IM_DIR:
+            _DATA_DIR + '/coco/coco_val2014',
+        _ANN_FN:
+            _DATA_DIR + '/coco/annotations/coco_stuff_val.json'
+    },
+    'keypoints_coco_2014_train': {
+        _IM_DIR:
+            _DATA_DIR + '/coco/coco_train2014',
+        _ANN_FN:
+            _DATA_DIR + '/coco/annotations/person_keypoints_train2014.json'
+    },
+    'keypoints_coco_2014_val': {
+        _IM_DIR:
+            _DATA_DIR + '/coco/coco_val2014',
+        _ANN_FN:
+            _DATA_DIR + '/coco/annotations/person_keypoints_val2014.json'
+    },
+    'keypoints_coco_2014_minival': {
+        _IM_DIR:
+            _DATA_DIR + '/coco/coco_val2014',
+        _ANN_FN:
+            _DATA_DIR + '/coco/annotations/person_keypoints_minival2014.json'
+    },
+    'keypoints_coco_2014_valminusminival': {
+        _IM_DIR:
+            _DATA_DIR + '/coco/coco_val2014',
+        _ANN_FN:
+            _DATA_DIR + '/coco/annotations/person_keypoints_valminusminival2014.json'
+    },
+    'keypoints_coco_2015_test': {
+        _IM_DIR:
+            _DATA_DIR + '/coco/coco_test2015',
+        _ANN_FN:
+            _DATA_DIR + '/coco/annotations/image_info_test2015.json'
+    },
+    'keypoints_coco_2015_test-dev': {
+        _IM_DIR:
+            _DATA_DIR + '/coco/coco_test2015',
+        _ANN_FN:
+            _DATA_DIR + '/coco/annotations/image_info_test-dev2015.json'
+    },
+    'voc_2007_train': {
+        _IM_DIR:
+            _DATA_DIR + '/VOC2007/JPEGImages',
+        _ANN_FN:
+            _DATA_DIR + '/VOC2007/annotations/voc_2007_train.json',
+        _DEVKIT_DIR:
+            _DATA_DIR + '/VOC2007/VOCdevkit2007'
+    },
+    'voc_2007_val': {
+        _IM_DIR:
+            _DATA_DIR + '/VOC2007/JPEGImages',
+        _ANN_FN:
+            _DATA_DIR + '/VOC2007/annotations/voc_2007_val.json',
+        _DEVKIT_DIR:
+            _DATA_DIR + '/VOC2007/VOCdevkit2007'
+    },
+    'voc_2007_test': {
+        _IM_DIR:
+            _DATA_DIR + '/VOC2007/JPEGImages',
+        _ANN_FN:
+            _DATA_DIR + '/VOC2007/annotations/voc_2007_test.json',
+        _DEVKIT_DIR:
+            _DATA_DIR + '/VOC2007/VOCdevkit2007'
+    },
+    'voc_2012_train': {
+        _IM_DIR:
+            _DATA_DIR + '/VOC2012/JPEGImages',
+        _ANN_FN:
+            _DATA_DIR + '/VOC2012/annotations/voc_2012_train.json',
+        _DEVKIT_DIR:
+            _DATA_DIR + '/VOC2012/VOCdevkit2012'
+    },
+    'voc_2012_val': {
+        _IM_DIR:
+            _DATA_DIR + '/VOC2012/JPEGImages',
+        _ANN_FN:
+            _DATA_DIR + '/VOC2012/annotations/voc_2012_val.json',
+        _DEVKIT_DIR:
+            _DATA_DIR + '/VOC2012/VOCdevkit2012'
+    }
+}
+
+
+def datasets():
+    """Retrieve the list of available dataset names."""
+    return _DATASETS.keys()
+
+
+def contains(name):
+    """Determine if the dataset is in the catalog."""
+    return name in _DATASETS.keys()
+
+
+def get_im_dir(name):
+    """Retrieve the image directory for the dataset."""
+    return _DATASETS[name][_IM_DIR]
+
+
+def get_ann_fn(name):
+    """Retrieve the annotation file for the dataset."""
+    return _DATASETS[name][_ANN_FN]
+
+
+def get_im_prefix(name):
+    """Retrieve the image prefix for the dataset."""
+    return _DATASETS[name][_IM_PREFIX] if _IM_PREFIX in _DATASETS[name] else ''
+
+
+def get_devkit_dir(name):
+    """Retrieve the devkit dir for the dataset."""
+    return _DATASETS[name][_DEVKIT_DIR]
+
+
+def get_raw_dir(name):
+    """Retrieve the raw dir for the dataset."""
+    return _DATASETS[name][_RAW_DIR]
diff --git a/detectron/datasets/dummy_datasets.py b/detectron/datasets/dummy_datasets.py
new file mode 100644
index 0000000000000000000000000000000000000000..2f96b8ad3b1060f48dda02a435b221321e537e4a
--- /dev/null
+++ b/detectron/datasets/dummy_datasets.py
@@ -0,0 +1,47 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+"""Provide stub objects that can act as stand-in "dummy" datasets for simple use
+cases, like getting all classes in a dataset. This exists so that demos can be
+run without requiring users to download/install datasets first.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from detectron.utils.collections import AttrDict
+
+
+def get_coco_dataset():
+    """A dummy COCO dataset that includes only the 'classes' field."""
+    ds = AttrDict()
+    classes = [
+        '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
+        'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
+        'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
+        'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
+        'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
+        'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
+        'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass',
+        'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
+        'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',
+        'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
+        'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
+        'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
+        'scissors', 'teddy bear', 'hair drier', 'toothbrush'
+    ]
+    ds.classes = {i: name for i, name in enumerate(classes)}
+    return ds
diff --git a/detectron/datasets/json_dataset.py b/detectron/datasets/json_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..71aacc1e5ed1abf0c0f64e5e7517661bdcd8e24d
--- /dev/null
+++ b/detectron/datasets/json_dataset.py
@@ -0,0 +1,465 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Representation of the standard COCO json dataset format.
+
+When working with a new dataset, we strongly suggest to convert the dataset into
+the COCO json format and use the existing code; it is not recommended to write
+code to support new dataset formats.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import copy
+import logging
+import numpy as np
+import os
+import scipy.sparse
+
+# Must happen before importing COCO API (which imports matplotlib)
+import detectron.utils.env as envu
+envu.set_up_matplotlib()
+# COCO API
+from pycocotools import mask as COCOmask
+from pycocotools.coco import COCO
+
+from detectron.core.config import cfg
+from detectron.utils.timer import Timer
+import detectron.datasets.dataset_catalog as dataset_catalog
+import detectron.utils.boxes as box_utils
+from detectron.utils.io import load_object
+import detectron.utils.segms as segm_utils
+
+logger = logging.getLogger(__name__)
+
+
+class JsonDataset(object):
+    """A class representing a COCO json dataset."""
+
+    def __init__(self, name):
+        assert dataset_catalog.contains(name), \
+            'Unknown dataset name: {}'.format(name)
+        assert os.path.exists(dataset_catalog.get_im_dir(name)), \
+            'Im dir \'{}\' not found'.format(dataset_catalog.get_im_dir(name))
+        assert os.path.exists(dataset_catalog.get_ann_fn(name)), \
+            'Ann fn \'{}\' not found'.format(dataset_catalog.get_ann_fn(name))
+        logger.debug('Creating: {}'.format(name))
+        self.name = name
+        self.image_directory = dataset_catalog.get_im_dir(name)
+        self.image_prefix = dataset_catalog.get_im_prefix(name)
+        self.COCO = COCO(dataset_catalog.get_ann_fn(name))
+        self.debug_timer = Timer()
+        # Set up dataset classes
+        category_ids = self.COCO.getCatIds()
+        categories = [c['name'] for c in self.COCO.loadCats(category_ids)]
+        self.category_to_id_map = dict(zip(categories, category_ids))
+        self.classes = ['__background__'] + categories
+        self.num_classes = len(self.classes)
+        self.json_category_id_to_contiguous_id = {
+            v: i + 1
+            for i, v in enumerate(self.COCO.getCatIds())
+        }
+        self.contiguous_category_id_to_json_id = {
+            v: k
+            for k, v in self.json_category_id_to_contiguous_id.items()
+        }
+        self._init_keypoints()
+
+    def get_roidb(
+        self,
+        gt=False,
+        proposal_file=None,
+        min_proposal_size=2,
+        proposal_limit=-1,
+        crowd_filter_thresh=0
+    ):
+        """Return an roidb corresponding to the json dataset. Optionally:
+           - include ground truth boxes in the roidb
+           - add proposals specified in a proposals file
+           - filter proposals based on a minimum side length
+           - filter proposals that intersect with crowd regions
+        """
+        assert gt is True or crowd_filter_thresh == 0, \
+            'Crowd filter threshold must be 0 if ground-truth annotations ' \
+            'are not included.'
+        image_ids = self.COCO.getImgIds()
+        image_ids.sort()
+        roidb = copy.deepcopy(self.COCO.loadImgs(image_ids))
+        for entry in roidb:
+            self._prep_roidb_entry(entry)
+        if gt:
+            # Include ground-truth object annotations
+            self.debug_timer.tic()
+            for entry in roidb:
+                self._add_gt_annotations(entry)
+            logger.debug(
+                '_add_gt_annotations took {:.3f}s'.
+                format(self.debug_timer.toc(average=False))
+            )
+        if proposal_file is not None:
+            # Include proposals from a file
+            self.debug_timer.tic()
+            self._add_proposals_from_file(
+                roidb, proposal_file, min_proposal_size, proposal_limit,
+                crowd_filter_thresh
+            )
+            logger.debug(
+                '_add_proposals_from_file took {:.3f}s'.
+                format(self.debug_timer.toc(average=False))
+            )
+        _add_class_assignments(roidb)
+        return roidb
+
+    def _prep_roidb_entry(self, entry):
+        """Adds empty metadata fields to an roidb entry."""
+        # Reference back to the parent dataset
+        entry['dataset'] = self
+        # Make file_name an abs path
+        im_path = os.path.join(
+            self.image_directory, self.image_prefix + entry['file_name']
+        )
+        assert os.path.exists(im_path), 'Image \'{}\' not found'.format(im_path)
+        entry['image'] = im_path
+        entry['flipped'] = False
+        entry['has_visible_keypoints'] = False
+        # Empty placeholders
+        entry['boxes'] = np.empty((0, 4), dtype=np.float32)
+        entry['segms'] = []
+        entry['gt_classes'] = np.empty((0), dtype=np.int32)
+        entry['seg_areas'] = np.empty((0), dtype=np.float32)
+        entry['gt_overlaps'] = scipy.sparse.csr_matrix(
+            np.empty((0, self.num_classes), dtype=np.float32)
+        )
+        entry['is_crowd'] = np.empty((0), dtype=np.bool)
+        # 'box_to_gt_ind_map': Shape is (#rois). Maps from each roi to the index
+        # in the list of rois that satisfy np.where(entry['gt_classes'] > 0)
+        entry['box_to_gt_ind_map'] = np.empty((0), dtype=np.int32)
+        if self.keypoints is not None:
+            entry['gt_keypoints'] = np.empty(
+                (0, 3, self.num_keypoints), dtype=np.int32
+            )
+        # Remove unwanted fields that come from the json file (if they exist)
+        for k in ['date_captured', 'url', 'license', 'file_name']:
+            if k in entry:
+                del entry[k]
+
+    def _add_gt_annotations(self, entry):
+        """Add ground truth annotation metadata to an roidb entry."""
+        ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None)
+        objs = self.COCO.loadAnns(ann_ids)
+        # Sanitize bboxes -- some are invalid
+        valid_objs = []
+        valid_segms = []
+        width = entry['width']
+        height = entry['height']
+        for obj in objs:
+            # crowd regions are RLE encoded
+            if segm_utils.is_poly(obj['segmentation']):
+                # Valid polygons have >= 3 points, so require >= 6 coordinates
+                obj['segmentation'] = [
+                    p for p in obj['segmentation'] if len(p) >= 6
+                ]
+            if obj['area'] < cfg.TRAIN.GT_MIN_AREA:
+                continue
+            if 'ignore' in obj and obj['ignore'] == 1:
+                continue
+            # Convert form (x1, y1, w, h) to (x1, y1, x2, y2)
+            x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox'])
+            x1, y1, x2, y2 = box_utils.clip_xyxy_to_image(
+                x1, y1, x2, y2, height, width
+            )
+            # Require non-zero seg area and more than 1x1 box size
+            if obj['area'] > 0 and x2 > x1 and y2 > y1:
+                obj['clean_bbox'] = [x1, y1, x2, y2]
+                valid_objs.append(obj)
+                valid_segms.append(obj['segmentation'])
+        num_valid_objs = len(valid_objs)
+
+        boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype)
+        gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype)
+        gt_overlaps = np.zeros(
+            (num_valid_objs, self.num_classes),
+            dtype=entry['gt_overlaps'].dtype
+        )
+        seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype)
+        is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype)
+        box_to_gt_ind_map = np.zeros(
+            (num_valid_objs), dtype=entry['box_to_gt_ind_map'].dtype
+        )
+        if self.keypoints is not None:
+            gt_keypoints = np.zeros(
+                (num_valid_objs, 3, self.num_keypoints),
+                dtype=entry['gt_keypoints'].dtype
+            )
+
+        im_has_visible_keypoints = False
+        for ix, obj in enumerate(valid_objs):
+            cls = self.json_category_id_to_contiguous_id[obj['category_id']]
+            boxes[ix, :] = obj['clean_bbox']
+            gt_classes[ix] = cls
+            seg_areas[ix] = obj['area']
+            is_crowd[ix] = obj['iscrowd']
+            box_to_gt_ind_map[ix] = ix
+            if self.keypoints is not None:
+                gt_keypoints[ix, :, :] = self._get_gt_keypoints(obj)
+                if np.sum(gt_keypoints[ix, 2, :]) > 0:
+                    im_has_visible_keypoints = True
+            if obj['iscrowd']:
+                # Set overlap to -1 for all classes for crowd objects
+                # so they will be excluded during training
+                gt_overlaps[ix, :] = -1.0
+            else:
+                gt_overlaps[ix, cls] = 1.0
+        entry['boxes'] = np.append(entry['boxes'], boxes, axis=0)
+        entry['segms'].extend(valid_segms)
+        # To match the original implementation:
+        # entry['boxes'] = np.append(
+        #     entry['boxes'], boxes.astype(np.int).astype(np.float), axis=0)
+        entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes)
+        entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas)
+        entry['gt_overlaps'] = np.append(
+            entry['gt_overlaps'].toarray(), gt_overlaps, axis=0
+        )
+        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])
+        entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd)
+        entry['box_to_gt_ind_map'] = np.append(
+            entry['box_to_gt_ind_map'], box_to_gt_ind_map
+        )
+        if self.keypoints is not None:
+            entry['gt_keypoints'] = np.append(
+                entry['gt_keypoints'], gt_keypoints, axis=0
+            )
+            entry['has_visible_keypoints'] = im_has_visible_keypoints
+
+    def _add_proposals_from_file(
+        self, roidb, proposal_file, min_proposal_size, top_k, crowd_thresh
+    ):
+        """Add proposals from a proposals file to an roidb."""
+        logger.info('Loading proposals from: {}'.format(proposal_file))
+        proposals = load_object(proposal_file)
+
+        id_field = 'indexes' if 'indexes' in proposals else 'ids'  # compat fix
+
+        _remove_proposals_not_in_roidb(proposals, roidb, id_field)
+        _sort_proposals(proposals, id_field)
+        box_list = []
+        for i, entry in enumerate(roidb):
+            if i % 2500 == 0:
+                logger.info(' {:d}/{:d}'.format(i + 1, len(roidb)))
+            boxes = proposals['boxes'][i]
+            # Sanity check that these boxes are for the correct image id
+            assert entry['id'] == proposals[id_field][i]
+            # Remove duplicate boxes and very small boxes and then take top k
+            boxes = box_utils.clip_boxes_to_image(
+                boxes, entry['height'], entry['width']
+            )
+            keep = box_utils.unique_boxes(boxes)
+            boxes = boxes[keep, :]
+            keep = box_utils.filter_small_boxes(boxes, min_proposal_size)
+            boxes = boxes[keep, :]
+            if top_k > 0:
+                boxes = boxes[:top_k, :]
+            box_list.append(boxes)
+        _merge_proposal_boxes_into_roidb(roidb, box_list)
+        if crowd_thresh > 0:
+            _filter_crowd_proposals(roidb, crowd_thresh)
+
+    def _init_keypoints(self):
+        """Initialize COCO keypoint information."""
+        self.keypoints = None
+        self.keypoint_flip_map = None
+        self.keypoints_to_id_map = None
+        self.num_keypoints = 0
+        # Thus far only the 'person' category has keypoints
+        if 'person' in self.category_to_id_map:
+            cat_info = self.COCO.loadCats([self.category_to_id_map['person']])
+        else:
+            return
+
+        # Check if the annotations contain keypoint data or not
+        if 'keypoints' in cat_info[0]:
+            keypoints = cat_info[0]['keypoints']
+            self.keypoints_to_id_map = dict(
+                zip(keypoints, range(len(keypoints))))
+            self.keypoints = keypoints
+            self.num_keypoints = len(keypoints)
+            self.keypoint_flip_map = {
+                'left_eye': 'right_eye',
+                'left_ear': 'right_ear',
+                'left_shoulder': 'right_shoulder',
+                'left_elbow': 'right_elbow',
+                'left_wrist': 'right_wrist',
+                'left_hip': 'right_hip',
+                'left_knee': 'right_knee',
+                'left_ankle': 'right_ankle'}
+
+    def _get_gt_keypoints(self, obj):
+        """Return ground truth keypoints."""
+        if 'keypoints' not in obj:
+            return None
+        kp = np.array(obj['keypoints'])
+        x = kp[0::3]  # 0-indexed x coordinates
+        y = kp[1::3]  # 0-indexed y coordinates
+        # 0: not labeled; 1: labeled, not inside mask;
+        # 2: labeled and inside mask
+        v = kp[2::3]
+        num_keypoints = len(obj['keypoints']) / 3
+        assert num_keypoints == self.num_keypoints
+        gt_kps = np.ones((3, self.num_keypoints), dtype=np.int32)
+        for i in range(self.num_keypoints):
+            gt_kps[0, i] = x[i]
+            gt_kps[1, i] = y[i]
+            gt_kps[2, i] = v[i]
+        return gt_kps
+
+
+def add_proposals(roidb, rois, scales, crowd_thresh):
+    """Add proposal boxes (rois) to an roidb that has ground-truth annotations
+    but no proposals. If the proposals are not at the original image scale,
+    specify the scale factor that separate them in scales.
+    """
+    box_list = []
+    for i in range(len(roidb)):
+        inv_im_scale = 1. / scales[i]
+        idx = np.where(rois[:, 0] == i)[0]
+        box_list.append(rois[idx, 1:] * inv_im_scale)
+    _merge_proposal_boxes_into_roidb(roidb, box_list)
+    if crowd_thresh > 0:
+        _filter_crowd_proposals(roidb, crowd_thresh)
+    _add_class_assignments(roidb)
+
+
+def _merge_proposal_boxes_into_roidb(roidb, box_list):
+    """Add proposal boxes to each roidb entry."""
+    assert len(box_list) == len(roidb)
+    for i, entry in enumerate(roidb):
+        boxes = box_list[i]
+        num_boxes = boxes.shape[0]
+        gt_overlaps = np.zeros(
+            (num_boxes, entry['gt_overlaps'].shape[1]),
+            dtype=entry['gt_overlaps'].dtype
+        )
+        box_to_gt_ind_map = -np.ones(
+            (num_boxes), dtype=entry['box_to_gt_ind_map'].dtype
+        )
+
+        # Note: unlike in other places, here we intentionally include all gt
+        # rois, even ones marked as crowd. Boxes that overlap with crowds will
+        # be filtered out later (see: _filter_crowd_proposals).
+        gt_inds = np.where(entry['gt_classes'] > 0)[0]
+        if len(gt_inds) > 0:
+            gt_boxes = entry['boxes'][gt_inds, :]
+            gt_classes = entry['gt_classes'][gt_inds]
+            proposal_to_gt_overlaps = box_utils.bbox_overlaps(
+                boxes.astype(dtype=np.float32, copy=False),
+                gt_boxes.astype(dtype=np.float32, copy=False)
+            )
+            # Gt box that overlaps each input box the most
+            # (ties are broken arbitrarily by class order)
+            argmaxes = proposal_to_gt_overlaps.argmax(axis=1)
+            # Amount of that overlap
+            maxes = proposal_to_gt_overlaps.max(axis=1)
+            # Those boxes with non-zero overlap with gt boxes
+            I = np.where(maxes > 0)[0]
+            # Record max overlaps with the class of the appropriate gt box
+            gt_overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]
+            box_to_gt_ind_map[I] = gt_inds[argmaxes[I]]
+        entry['boxes'] = np.append(
+            entry['boxes'],
+            boxes.astype(entry['boxes'].dtype, copy=False),
+            axis=0
+        )
+        entry['gt_classes'] = np.append(
+            entry['gt_classes'],
+            np.zeros((num_boxes), dtype=entry['gt_classes'].dtype)
+        )
+        entry['seg_areas'] = np.append(
+            entry['seg_areas'],
+            np.zeros((num_boxes), dtype=entry['seg_areas'].dtype)
+        )
+        entry['gt_overlaps'] = np.append(
+            entry['gt_overlaps'].toarray(), gt_overlaps, axis=0
+        )
+        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])
+        entry['is_crowd'] = np.append(
+            entry['is_crowd'],
+            np.zeros((num_boxes), dtype=entry['is_crowd'].dtype)
+        )
+        entry['box_to_gt_ind_map'] = np.append(
+            entry['box_to_gt_ind_map'],
+            box_to_gt_ind_map.astype(
+                entry['box_to_gt_ind_map'].dtype, copy=False
+            )
+        )
+
+
+def _filter_crowd_proposals(roidb, crowd_thresh):
+    """Finds proposals that are inside crowd regions and marks them as
+    overlap = -1 with each ground-truth rois, which means they will be excluded
+    from training.
+    """
+    for entry in roidb:
+        gt_overlaps = entry['gt_overlaps'].toarray()
+        crowd_inds = np.where(entry['is_crowd'] == 1)[0]
+        non_gt_inds = np.where(entry['gt_classes'] == 0)[0]
+        if len(crowd_inds) == 0 or len(non_gt_inds) == 0:
+            continue
+        crowd_boxes = box_utils.xyxy_to_xywh(entry['boxes'][crowd_inds, :])
+        non_gt_boxes = box_utils.xyxy_to_xywh(entry['boxes'][non_gt_inds, :])
+        iscrowd_flags = [int(True)] * len(crowd_inds)
+        ious = COCOmask.iou(non_gt_boxes, crowd_boxes, iscrowd_flags)
+        bad_inds = np.where(ious.max(axis=1) > crowd_thresh)[0]
+        gt_overlaps[non_gt_inds[bad_inds], :] = -1
+        entry['gt_overlaps'] = scipy.sparse.csr_matrix(gt_overlaps)
+
+
+def _add_class_assignments(roidb):
+    """Compute object category assignment for each box associated with each
+    roidb entry.
+    """
+    for entry in roidb:
+        gt_overlaps = entry['gt_overlaps'].toarray()
+        # max overlap with gt over classes (columns)
+        max_overlaps = gt_overlaps.max(axis=1)
+        # gt class that had the max overlap
+        max_classes = gt_overlaps.argmax(axis=1)
+        entry['max_classes'] = max_classes
+        entry['max_overlaps'] = max_overlaps
+        # sanity checks
+        # if max overlap is 0, the class must be background (class 0)
+        zero_inds = np.where(max_overlaps == 0)[0]
+        assert all(max_classes[zero_inds] == 0)
+        # if max overlap > 0, the class must be a fg class (not class 0)
+        nonzero_inds = np.where(max_overlaps > 0)[0]
+        assert all(max_classes[nonzero_inds] != 0)
+
+
+def _sort_proposals(proposals, id_field):
+    """Sort proposals by the specified id field."""
+    order = np.argsort(proposals[id_field])
+    fields_to_sort = ['boxes', id_field, 'scores']
+    for k in fields_to_sort:
+        proposals[k] = [proposals[k][i] for i in order]
+
+
+def _remove_proposals_not_in_roidb(proposals, roidb, id_field):
+    # fix proposals so they don't contain entries for images not in the roidb
+    roidb_ids = set({entry["id"] for entry in roidb})
+    keep = [i for i, id in enumerate(proposals[id_field]) if id in roidb_ids]
+    for f in ['boxes', id_field, 'scores']:
+        proposals[f] = [proposals[f][i] for i in keep]
diff --git a/detectron/datasets/json_dataset_evaluator.py b/detectron/datasets/json_dataset_evaluator.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b47f0bd9da98463b13b6db635c5c14b7d630d89
--- /dev/null
+++ b/detectron/datasets/json_dataset_evaluator.py
@@ -0,0 +1,471 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Functions for evaluating results computed for a json dataset."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import json
+import logging
+import numpy as np
+import os
+import six
+import uuid
+
+from pycocotools.cocoeval import COCOeval
+
+from detectron.core.config import cfg
+from detectron.utils.io import save_object
+import detectron.utils.boxes as box_utils
+
+logger = logging.getLogger(__name__)
+
+
+def evaluate_masks(
+    json_dataset,
+    all_boxes,
+    all_segms,
+    output_dir,
+    use_salt=True,
+    cleanup=False
+):
+    res_file = os.path.join(
+        output_dir, 'segmentations_' + json_dataset.name + '_results'
+    )
+    if use_salt:
+        res_file += '_{}'.format(str(uuid.uuid4()))
+    res_file += '.json'
+    _write_coco_segms_results_file(
+        json_dataset, all_boxes, all_segms, res_file)
+    # Only do evaluation on non-test sets (annotations are undisclosed on test)
+    if json_dataset.name.find('test') == -1:
+        coco_eval = _do_segmentation_eval(json_dataset, res_file, output_dir)
+    else:
+        logger.warning(
+            '{} eval ignored as annotations are undisclosed on test: {} ignored'
+            .format("Segmentation", json_dataset.name)
+        )
+        coco_eval = None
+    # Optionally cleanup results json file
+    if cleanup:
+        os.remove(res_file)
+    return coco_eval
+
+
+def _write_coco_segms_results_file(
+    json_dataset, all_boxes, all_segms, res_file
+):
+    # [{"image_id": 42,
+    #   "category_id": 18,
+    #   "segmentation": [...],
+    #   "score": 0.236}, ...]
+    results = []
+    for cls_ind, cls in enumerate(json_dataset.classes):
+        if cls == '__background__':
+            continue
+        if cls_ind >= len(all_boxes):
+            break
+        cat_id = json_dataset.category_to_id_map[cls]
+        results.extend(_coco_segms_results_one_category(
+            json_dataset, all_boxes[cls_ind], all_segms[cls_ind], cat_id))
+    logger.info(
+        'Writing segmentation results json to: {}'.format(
+            os.path.abspath(res_file)))
+    with open(res_file, 'w') as fid:
+        # "counts" is an array encoded by mask_util as a byte-stream. Python3's
+        # json writer which /always produces strings/ cannot serialize a bytestream
+        # unless you decode it. Thankfully, utf-8 works out (which is also what
+        # the pycocotools/_mask.pyx does.
+        if six.PY3:
+            for r in results:
+                rle = r['segmentation']
+                if 'counts' in rle:
+                    rle['counts'] = rle['counts'].decode("utf8")
+
+        json.dump(results, fid)
+
+
+def _coco_segms_results_one_category(json_dataset, boxes, segms, cat_id):
+    results = []
+    image_ids = json_dataset.COCO.getImgIds()
+    image_ids.sort()
+    assert len(boxes) == len(image_ids)
+    assert len(segms) == len(image_ids)
+    for i, image_id in enumerate(image_ids):
+        dets = boxes[i]
+        rles = segms[i]
+
+        if isinstance(dets, list) and len(dets) == 0:
+            continue
+
+        dets = dets.astype(np.float)
+        scores = dets[:, -1]
+
+        results.extend(
+            [{'image_id': image_id,
+              'category_id': cat_id,
+              'segmentation': rles[k],
+              'score': scores[k]}
+              for k in range(dets.shape[0])])
+
+    return results
+
+
+def _do_segmentation_eval(json_dataset, res_file, output_dir):
+    coco_dt = json_dataset.COCO.loadRes(str(res_file))
+    coco_eval = COCOeval(json_dataset.COCO, coco_dt, 'segm')
+    coco_eval.evaluate()
+    coco_eval.accumulate()
+    _log_detection_eval_metrics(json_dataset, coco_eval)
+    eval_file = os.path.join(output_dir, 'segmentation_results.pkl')
+    save_object(coco_eval, eval_file)
+    logger.info('Wrote json eval results to: {}'.format(eval_file))
+    return coco_eval
+
+
+def evaluate_boxes(
+    json_dataset, all_boxes, output_dir, use_salt=True, cleanup=False
+):
+    res_file = os.path.join(
+        output_dir, 'bbox_' + json_dataset.name + '_results'
+    )
+    if use_salt:
+        res_file += '_{}'.format(str(uuid.uuid4()))
+    res_file += '.json'
+    _write_coco_bbox_results_file(json_dataset, all_boxes, res_file)
+    # Only do evaluation on non-test sets (annotations are undisclosed on test)
+    if json_dataset.name.find('test') == -1:
+        coco_eval = _do_detection_eval(json_dataset, res_file, output_dir)
+    else:
+        logger.warning(
+            '{} eval ignored as annotations are undisclosed on test: {} ignored'
+            .format("Bbox", json_dataset.name)
+        )
+        coco_eval = None
+    # Optionally cleanup results json file
+    if cleanup:
+        os.remove(res_file)
+    return coco_eval
+
+
+def _write_coco_bbox_results_file(json_dataset, all_boxes, res_file):
+    # [{"image_id": 42,
+    #   "category_id": 18,
+    #   "bbox": [258.15,41.29,348.26,243.78],
+    #   "score": 0.236}, ...]
+    results = []
+    for cls_ind, cls in enumerate(json_dataset.classes):
+        if cls == '__background__':
+            continue
+        if cls_ind >= len(all_boxes):
+            break
+        cat_id = json_dataset.category_to_id_map[cls]
+        results.extend(_coco_bbox_results_one_category(
+            json_dataset, all_boxes[cls_ind], cat_id))
+    logger.info(
+        'Writing bbox results json to: {}'.format(os.path.abspath(res_file)))
+    with open(res_file, 'w') as fid:
+        json.dump(results, fid)
+
+
+def _coco_bbox_results_one_category(json_dataset, boxes, cat_id):
+    results = []
+    image_ids = json_dataset.COCO.getImgIds()
+    image_ids.sort()
+    assert len(boxes) == len(image_ids)
+    for i, image_id in enumerate(image_ids):
+        dets = boxes[i]
+        if isinstance(dets, list) and len(dets) == 0:
+            continue
+        dets = dets.astype(np.float)
+        scores = dets[:, -1]
+        xywh_dets = box_utils.xyxy_to_xywh(dets[:, 0:4])
+        xs = xywh_dets[:, 0]
+        ys = xywh_dets[:, 1]
+        ws = xywh_dets[:, 2]
+        hs = xywh_dets[:, 3]
+        results.extend(
+            [{'image_id': image_id,
+              'category_id': cat_id,
+              'bbox': [xs[k], ys[k], ws[k], hs[k]],
+              'score': scores[k]} for k in range(dets.shape[0])])
+    return results
+
+
+def _do_detection_eval(json_dataset, res_file, output_dir):
+    coco_dt = json_dataset.COCO.loadRes(str(res_file))
+    coco_eval = COCOeval(json_dataset.COCO, coco_dt, 'bbox')
+    coco_eval.evaluate()
+    coco_eval.accumulate()
+    _log_detection_eval_metrics(json_dataset, coco_eval)
+    eval_file = os.path.join(output_dir, 'detection_results.pkl')
+    save_object(coco_eval, eval_file)
+    logger.info('Wrote json eval results to: {}'.format(eval_file))
+    return coco_eval
+
+
+def _log_detection_eval_metrics(json_dataset, coco_eval):
+    def _get_thr_ind(coco_eval, thr):
+        ind = np.where((coco_eval.params.iouThrs > thr - 1e-5) &
+                       (coco_eval.params.iouThrs < thr + 1e-5))[0][0]
+        iou_thr = coco_eval.params.iouThrs[ind]
+        assert np.isclose(iou_thr, thr)
+        return ind
+
+    IoU_lo_thresh = 0.5
+    IoU_hi_thresh = 0.95
+    ind_lo = _get_thr_ind(coco_eval, IoU_lo_thresh)
+    ind_hi = _get_thr_ind(coco_eval, IoU_hi_thresh)
+    # precision has dims (iou, recall, cls, area range, max dets)
+    # area range index 0: all area ranges
+    # max dets index 2: 100 per image
+    precision = coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, :, 0, 2]
+    ap_default = np.mean(precision[precision > -1])
+    logger.info(
+        '~~~~ Mean and per-category AP @ IoU=[{:.2f},{:.2f}] ~~~~'.format(
+            IoU_lo_thresh, IoU_hi_thresh))
+    logger.info('{:.1f}'.format(100 * ap_default))
+    for cls_ind, cls in enumerate(json_dataset.classes):
+        if cls == '__background__':
+            continue
+        # minus 1 because of __background__
+        precision = coco_eval.eval['precision'][
+            ind_lo:(ind_hi + 1), :, cls_ind - 1, 0, 2]
+        ap = np.mean(precision[precision > -1])
+        logger.info('{:.1f}'.format(100 * ap))
+    logger.info('~~~~ Summary metrics ~~~~')
+    coco_eval.summarize()
+
+
+def evaluate_box_proposals(
+    json_dataset, roidb, thresholds=None, area='all', limit=None, class_specific=False
+):
+    """Evaluate detection proposal recall metrics. This function is a much
+    faster alternative to the official COCO API recall evaluation code. However,
+    it produces slightly different results.
+    """
+    # Record max overlap value for each gt box
+    # Return vector of overlap values
+    areas = {
+        'all': 0,
+        'small': 1,
+        'medium': 2,
+        'large': 3,
+        '96-128': 4,
+        '128-256': 5,
+        '256-512': 6,
+        '512-inf': 7}
+    area_ranges = [
+        [0**2, 1e5**2],    # all
+        [0**2, 32**2],     # small
+        [32**2, 96**2],    # medium
+        [96**2, 1e5**2],   # large
+        [96**2, 128**2],   # 96-128
+        [128**2, 256**2],  # 128-256
+        [256**2, 512**2],  # 256-512
+        [512**2, 1e5**2]]  # 512-inf
+    assert area in areas, 'Unknown area range: {}'.format(area)
+    area_range = area_ranges[areas[area]]
+    gt_overlaps = np.zeros(0)
+    gt_classes = np.zeros(0)
+    num_pos = 0
+    for entry in roidb:
+        gt_inds = np.where(
+            (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0]
+        gt_boxes = entry['boxes'][gt_inds, :]
+        gt_areas = entry['seg_areas'][gt_inds]
+        valid_gt_inds = np.where(
+            (gt_areas >= area_range[0]) & (gt_areas <= area_range[1]))[0]
+        gt_boxes = gt_boxes[valid_gt_inds, :]
+        _gt_classes = entry["gt_classes"][valid_gt_inds]
+        assert gt_boxes.shape[0] == _gt_classes.shape[0]
+        gt_classes = np.hstack((gt_classes, _gt_classes))
+        num_pos += len(valid_gt_inds)
+        non_gt_inds = np.where(entry['gt_classes'] == 0)[0]
+        boxes = entry['boxes'][non_gt_inds, :]
+        if boxes.shape[0] == 0:
+            continue
+        if limit is not None and boxes.shape[0] > limit:
+            boxes = boxes[:limit, :]
+        overlaps = box_utils.bbox_overlaps(
+            boxes.astype(dtype=np.float32, copy=False),
+            gt_boxes.astype(dtype=np.float32, copy=False))
+        _gt_overlaps = np.zeros((gt_boxes.shape[0]))
+        for j in range(min(boxes.shape[0], gt_boxes.shape[0])):
+            # find which proposal box maximally covers each gt box
+            argmax_overlaps = overlaps.argmax(axis=0)
+            # and get the iou amount of coverage for each gt box
+            max_overlaps = overlaps.max(axis=0)
+            # find which gt box is 'best' covered (i.e. 'best' = most iou)
+            gt_ind = max_overlaps.argmax()
+            gt_ovr = max_overlaps.max()
+            assert gt_ovr >= 0
+            # find the proposal box that covers the best covered gt box
+            box_ind = argmax_overlaps[gt_ind]
+            # record the iou coverage of this gt box
+            _gt_overlaps[j] = overlaps[box_ind, gt_ind]
+            assert _gt_overlaps[j] == gt_ovr
+            # mark the proposal box and the gt box as used
+            overlaps[box_ind, :] = -1
+            overlaps[:, gt_ind] = -1
+        # append recorded iou coverage level
+        gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))
+
+    if thresholds is None:
+        step = 0.05
+        thresholds = np.arange(0.5, 0.95 + 1e-5, step)
+
+    if not class_specific:
+        gt_overlaps = np.sort(gt_overlaps)
+        recalls = np.zeros_like(thresholds)
+        # compute recall for each iou threshold
+        for i, t in enumerate(thresholds):
+            recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
+        ar = recalls.mean()
+        return {'ar': ar, 'recalls': recalls, 'thresholds': thresholds,
+                'gt_overlaps': gt_overlaps, 'num_pos': num_pos}
+    else:
+        gt_classes_unique = np.unique(gt_classes)
+        recalls = np.zeros((gt_classes_unique.shape[0], thresholds.shape[0]))
+        # compute recall for each category and each iou threshold
+        for i, category_id in enumerate(gt_classes_unique):
+            inds = (gt_classes == category_id)
+            num_pos_per_category = float(inds.sum())
+            for j, thresh in enumerate(thresholds):
+                recalls[i][j] = (
+                    gt_overlaps[inds] >= thresh
+                ).sum() / num_pos_per_category
+        ar = recalls.mean(axis=1).mean()
+        return {'ar': ar, 'recalls': recalls, 'thresholds': thresholds,
+                'gt_overlaps': gt_overlaps, 'num_pos': num_pos}
+
+def evaluate_keypoints(
+    json_dataset,
+    all_boxes,
+    all_keypoints,
+    output_dir,
+    use_salt=True,
+    cleanup=False
+):
+    res_file = os.path.join(
+        output_dir, 'keypoints_' + json_dataset.name + '_results'
+    )
+    if use_salt:
+        res_file += '_{}'.format(str(uuid.uuid4()))
+    res_file += '.json'
+    _write_coco_keypoint_results_file(
+        json_dataset, all_boxes, all_keypoints, res_file)
+    # Only do evaluation on non-test sets (annotations are undisclosed on test)
+    if json_dataset.name.find('test') == -1:
+        coco_eval = _do_keypoint_eval(json_dataset, res_file, output_dir)
+    else:
+        logger.warning(
+            '{} eval ignored as annotations are undisclosed on test: {} ignored'
+            .format("Keypoints", json_dataset.name)
+        )
+        coco_eval = None
+    # Optionally cleanup results json file
+    if cleanup:
+        os.remove(res_file)
+    return coco_eval
+
+
+def _write_coco_keypoint_results_file(
+    json_dataset, all_boxes, all_keypoints, res_file
+):
+    results = []
+    for cls_ind, cls in enumerate(json_dataset.classes):
+        if cls == '__background__':
+            continue
+        if cls_ind >= len(all_keypoints):
+            break
+        logger.info(
+            'Collecting {} results ({:d}/{:d})'.format(
+                cls, cls_ind, len(all_keypoints) - 1))
+        cat_id = json_dataset.category_to_id_map[cls]
+        results.extend(_coco_kp_results_one_category(
+            json_dataset, all_boxes[cls_ind], all_keypoints[cls_ind], cat_id))
+    logger.info(
+        'Writing keypoint results json to: {}'.format(
+            os.path.abspath(res_file)))
+    with open(res_file, 'w') as fid:
+        json.dump(results, fid)
+
+
+def _coco_kp_results_one_category(json_dataset, boxes, kps, cat_id):
+    results = []
+    image_ids = json_dataset.COCO.getImgIds()
+    image_ids.sort()
+    assert len(kps) == len(image_ids)
+    assert len(boxes) == len(image_ids)
+    use_box_score = False
+    if cfg.KRCNN.KEYPOINT_CONFIDENCE == 'logit':
+        # This is ugly; see utils.keypoints.heatmap_to_keypoints for the magic
+        # indexes
+        score_index = 2
+    elif cfg.KRCNN.KEYPOINT_CONFIDENCE == 'prob':
+        score_index = 3
+    elif cfg.KRCNN.KEYPOINT_CONFIDENCE == 'bbox':
+        use_box_score = True
+    else:
+        raise ValueError(
+            'KRCNN.KEYPOINT_CONFIDENCE must be "logit", "prob", or "bbox"')
+    for i, image_id in enumerate(image_ids):
+        if len(boxes[i]) == 0:
+            continue
+        kps_dets = kps[i]
+        scores = boxes[i][:, -1].astype(np.float)
+        if len(kps_dets) == 0:
+            continue
+        for j in range(len(kps_dets)):
+            xy = []
+
+            kps_score = 0
+            for k in range(kps_dets[j].shape[1]):
+                xy.append(float(kps_dets[j][0, k]))
+                xy.append(float(kps_dets[j][1, k]))
+                xy.append(1)
+                if not use_box_score:
+                    kps_score += kps_dets[j][score_index, k]
+
+            if use_box_score:
+                kps_score = scores[j]
+            else:
+                kps_score /= kps_dets[j].shape[1]
+
+            results.extend([{'image_id': image_id,
+                             'category_id': cat_id,
+                             'keypoints': xy,
+                             'score': kps_score}])
+    return results
+
+
+def _do_keypoint_eval(json_dataset, res_file, output_dir):
+    ann_type = 'keypoints'
+    imgIds = json_dataset.COCO.getImgIds()
+    imgIds.sort()
+    coco_dt = json_dataset.COCO.loadRes(res_file)
+    coco_eval = COCOeval(json_dataset.COCO, coco_dt, ann_type)
+    coco_eval.params.imgIds = imgIds
+    coco_eval.evaluate()
+    coco_eval.accumulate()
+    eval_file = os.path.join(output_dir, 'keypoint_results.pkl')
+    save_object(coco_eval, eval_file)
+    logger.info('Wrote json eval results to: {}'.format(eval_file))
+    coco_eval.summarize()
+    return coco_eval
diff --git a/detectron/datasets/roidb.py b/detectron/datasets/roidb.py
new file mode 100644
index 0000000000000000000000000000000000000000..57b6e9cfeedc12d822e0b9ec46e58ff9a4bbb288
--- /dev/null
+++ b/detectron/datasets/roidb.py
@@ -0,0 +1,199 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Functions for common roidb manipulations."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from past.builtins import basestring
+import logging
+import numpy as np
+
+from detectron.core.config import cfg
+from detectron.datasets.json_dataset import JsonDataset
+import detectron.utils.boxes as box_utils
+import detectron.utils.keypoints as keypoint_utils
+import detectron.utils.segms as segm_utils
+
+logger = logging.getLogger(__name__)
+
+
+def combined_roidb_for_training(dataset_names, proposal_files):
+    """Load and concatenate roidbs for one or more datasets, along with optional
+    object proposals. The roidb entries are then prepared for use in training,
+    which involves caching certain types of metadata for each roidb entry.
+    """
+    def get_roidb(dataset_name, proposal_file):
+        ds = JsonDataset(dataset_name)
+        roidb = ds.get_roidb(
+            gt=True,
+            proposal_file=proposal_file,
+            crowd_filter_thresh=cfg.TRAIN.CROWD_FILTER_THRESH
+        )
+        if cfg.TRAIN.USE_FLIPPED:
+            logger.info('Appending horizontally-flipped training examples...')
+            extend_with_flipped_entries(roidb, ds)
+        logger.info('Loaded dataset: {:s}'.format(ds.name))
+        return roidb
+
+    if isinstance(dataset_names, basestring):
+        dataset_names = (dataset_names, )
+    if isinstance(proposal_files, basestring):
+        proposal_files = (proposal_files, )
+    if len(proposal_files) == 0:
+        proposal_files = (None, ) * len(dataset_names)
+    assert len(dataset_names) == len(proposal_files)
+    roidbs = [get_roidb(*args) for args in zip(dataset_names, proposal_files)]
+    roidb = roidbs[0]
+    for r in roidbs[1:]:
+        roidb.extend(r)
+    roidb = filter_for_training(roidb)
+
+    logger.info('Computing bounding-box regression targets...')
+    add_bbox_regression_targets(roidb)
+    logger.info('done')
+
+    _compute_and_log_stats(roidb)
+
+    return roidb
+
+
+def extend_with_flipped_entries(roidb, dataset):
+    """Flip each entry in the given roidb and return a new roidb that is the
+    concatenation of the original roidb and the flipped entries.
+
+    "Flipping" an entry means that that image and associated metadata (e.g.,
+    ground truth boxes and object proposals) are horizontally flipped.
+    """
+    flipped_roidb = []
+    for entry in roidb:
+        width = entry['width']
+        boxes = entry['boxes'].copy()
+        oldx1 = boxes[:, 0].copy()
+        oldx2 = boxes[:, 2].copy()
+        boxes[:, 0] = width - oldx2 - 1
+        boxes[:, 2] = width - oldx1 - 1
+        assert (boxes[:, 2] >= boxes[:, 0]).all()
+        flipped_entry = {}
+        dont_copy = ('boxes', 'segms', 'gt_keypoints', 'flipped')
+        for k, v in entry.items():
+            if k not in dont_copy:
+                flipped_entry[k] = v
+        flipped_entry['boxes'] = boxes
+        flipped_entry['segms'] = segm_utils.flip_segms(
+            entry['segms'], entry['height'], entry['width']
+        )
+        if dataset.keypoints is not None:
+            flipped_entry['gt_keypoints'] = keypoint_utils.flip_keypoints(
+                dataset.keypoints, dataset.keypoint_flip_map,
+                entry['gt_keypoints'], entry['width']
+            )
+        flipped_entry['flipped'] = True
+        flipped_roidb.append(flipped_entry)
+    roidb.extend(flipped_roidb)
+
+
+def filter_for_training(roidb):
+    """Remove roidb entries that have no usable RoIs based on config settings.
+    """
+    def is_valid(entry):
+        # Valid images have:
+        #   (1) At least one foreground RoI OR
+        #   (2) At least one background RoI
+        overlaps = entry['max_overlaps']
+        # find boxes with sufficient overlap
+        fg_inds = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]
+        # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
+        bg_inds = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) &
+                           (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
+        # image is only valid if such boxes exist
+        valid = len(fg_inds) > 0 or len(bg_inds) > 0
+        if cfg.MODEL.KEYPOINTS_ON:
+            # If we're training for keypoints, exclude images with no keypoints
+            valid = valid and entry['has_visible_keypoints']
+        return valid
+
+    num = len(roidb)
+    filtered_roidb = [entry for entry in roidb if is_valid(entry)]
+    num_after = len(filtered_roidb)
+    logger.info('Filtered {} roidb entries: {} -> {}'.
+                format(num - num_after, num, num_after))
+    return filtered_roidb
+
+
+def add_bbox_regression_targets(roidb):
+    """Add information needed to train bounding-box regressors."""
+    for entry in roidb:
+        entry['bbox_targets'] = compute_bbox_regression_targets(entry)
+
+
+def compute_bbox_regression_targets(entry):
+    """Compute bounding-box regression targets for an image."""
+    # Indices of ground-truth ROIs
+    rois = entry['boxes']
+    overlaps = entry['max_overlaps']
+    labels = entry['max_classes']
+    gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0]
+    # Targets has format (class, tx, ty, tw, th)
+    targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
+    if len(gt_inds) == 0:
+        # Bail if the image has no ground-truth ROIs
+        return targets
+
+    # Indices of examples for which we try to make predictions
+    ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0]
+
+    # Get IoU overlap between each ex ROI and gt ROI
+    ex_gt_overlaps = box_utils.bbox_overlaps(
+        rois[ex_inds, :].astype(dtype=np.float32, copy=False),
+        rois[gt_inds, :].astype(dtype=np.float32, copy=False))
+
+    # Find which gt ROI each ex ROI has max overlap with:
+    # this will be the ex ROI's gt target
+    gt_assignment = ex_gt_overlaps.argmax(axis=1)
+    gt_rois = rois[gt_inds[gt_assignment], :]
+    ex_rois = rois[ex_inds, :]
+    # Use class "1" for all boxes if using class_agnostic_bbox_reg
+    targets[ex_inds, 0] = (
+        1 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else labels[ex_inds])
+    targets[ex_inds, 1:] = box_utils.bbox_transform_inv(
+        ex_rois, gt_rois, cfg.MODEL.BBOX_REG_WEIGHTS)
+    return targets
+
+
+def _compute_and_log_stats(roidb):
+    classes = roidb[0]['dataset'].classes
+    char_len = np.max([len(c) for c in classes])
+    hist_bins = np.arange(len(classes) + 1)
+
+    # Histogram of ground-truth objects
+    gt_hist = np.zeros((len(classes)), dtype=np.int)
+    for entry in roidb:
+        gt_inds = np.where(
+            (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0]
+        gt_classes = entry['gt_classes'][gt_inds]
+        gt_hist += np.histogram(gt_classes, bins=hist_bins)[0]
+    logger.debug('Ground-truth class histogram:')
+    for i, v in enumerate(gt_hist):
+        logger.debug(
+            '{:d}{:s}: {:d}'.format(
+                i, classes[i].rjust(char_len), v))
+    logger.debug('-' * char_len)
+    logger.debug(
+        '{:s}: {:d}'.format(
+            'total'.rjust(char_len), np.sum(gt_hist)))
diff --git a/detectron/datasets/task_evaluation.py b/detectron/datasets/task_evaluation.py
new file mode 100644
index 0000000000000000000000000000000000000000..9caefe559b8de2cbb2aefdd82d3f567695633056
--- /dev/null
+++ b/detectron/datasets/task_evaluation.py
@@ -0,0 +1,411 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Evaluation interface for supported tasks (box detection, instance
+segmentation, keypoint detection, ...).
+
+
+Results are stored in an OrderedDict with the following nested structure:
+
+<dataset>:
+  <task>:
+    <metric>: <val>
+
+<dataset> is any valid dataset (e.g., 'coco_2014_minival')
+<task> is in ['box', 'mask', 'keypoint', 'box_proposal']
+<metric> can be ['AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'AR@1000',
+                 'ARs@1000', 'ARm@1000', 'ARl@1000', ...]
+<val> is a floating point number
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from collections import OrderedDict
+import logging
+import os
+import pprint
+
+from detectron.core.config import cfg
+from detectron.utils.logging import send_email
+import detectron.datasets.cityscapes_json_dataset_evaluator \
+    as cs_json_dataset_evaluator
+import detectron.datasets.json_dataset_evaluator as json_dataset_evaluator
+import detectron.datasets.voc_dataset_evaluator as voc_dataset_evaluator
+
+logger = logging.getLogger(__name__)
+
+
+def evaluate_all(
+    dataset, all_boxes, all_segms, all_keyps, output_dir, use_matlab=False
+):
+    """Evaluate "all" tasks, where "all" includes box detection, instance
+    segmentation, and keypoint detection.
+    """
+    all_results = evaluate_boxes(
+        dataset, all_boxes, output_dir, use_matlab=use_matlab
+    )
+    logger.info('Evaluating bounding boxes is done!')
+    if cfg.MODEL.MASK_ON:
+        results = evaluate_masks(dataset, all_boxes, all_segms, output_dir)
+        all_results[dataset.name].update(results[dataset.name])
+        logger.info('Evaluating segmentations is done!')
+    if cfg.MODEL.KEYPOINTS_ON:
+        results = evaluate_keypoints(dataset, all_boxes, all_keyps, output_dir)
+        all_results[dataset.name].update(results[dataset.name])
+        logger.info('Evaluating keypoints is done!')
+    return all_results
+
+
+def evaluate_boxes(dataset, all_boxes, output_dir, use_matlab=False):
+    """Evaluate bounding box detection."""
+    logger.info('Evaluating detections')
+    not_comp = not cfg.TEST.COMPETITION_MODE
+    if _use_json_dataset_evaluator(dataset):
+        coco_eval = json_dataset_evaluator.evaluate_boxes(
+            dataset, all_boxes, output_dir, use_salt=not_comp, cleanup=not_comp
+        )
+        box_results = _coco_eval_to_box_results(coco_eval)
+    elif _use_cityscapes_evaluator(dataset):
+        logger.warn('Cityscapes bbox evaluated using COCO metrics/conversions')
+        coco_eval = json_dataset_evaluator.evaluate_boxes(
+            dataset, all_boxes, output_dir, use_salt=not_comp, cleanup=not_comp
+        )
+        box_results = _coco_eval_to_box_results(coco_eval)
+    elif _use_voc_evaluator(dataset):
+        # For VOC, always use salt and always cleanup because results are
+        # written to the shared VOCdevkit results directory
+        voc_eval = voc_dataset_evaluator.evaluate_boxes(
+            dataset, all_boxes, output_dir, use_matlab=use_matlab
+        )
+        box_results = _voc_eval_to_box_results(voc_eval)
+    else:
+        raise NotImplementedError(
+            'No evaluator for dataset: {}'.format(dataset.name)
+        )
+    return OrderedDict([(dataset.name, box_results)])
+
+
+def evaluate_masks(dataset, all_boxes, all_segms, output_dir):
+    """Evaluate instance segmentation."""
+    logger.info('Evaluating segmentations')
+    not_comp = not cfg.TEST.COMPETITION_MODE
+    if _use_json_dataset_evaluator(dataset):
+        coco_eval = json_dataset_evaluator.evaluate_masks(
+            dataset,
+            all_boxes,
+            all_segms,
+            output_dir,
+            use_salt=not_comp,
+            cleanup=not_comp
+        )
+        mask_results = _coco_eval_to_mask_results(coco_eval)
+    elif _use_cityscapes_evaluator(dataset):
+        cs_eval = cs_json_dataset_evaluator.evaluate_masks(
+            dataset,
+            all_boxes,
+            all_segms,
+            output_dir,
+            use_salt=not_comp,
+            cleanup=not_comp
+        )
+        mask_results = _cs_eval_to_mask_results(cs_eval)
+    else:
+        raise NotImplementedError(
+            'No evaluator for dataset: {}'.format(dataset.name)
+        )
+    return OrderedDict([(dataset.name, mask_results)])
+
+
+def evaluate_keypoints(dataset, all_boxes, all_keyps, output_dir):
+    """Evaluate human keypoint detection (i.e., 2D pose estimation)."""
+    logger.info('Evaluating detections')
+    not_comp = not cfg.TEST.COMPETITION_MODE
+    assert dataset.name.startswith('keypoints_coco_'), \
+        'Only COCO keypoints are currently supported'
+    coco_eval = json_dataset_evaluator.evaluate_keypoints(
+        dataset,
+        all_boxes,
+        all_keyps,
+        output_dir,
+        use_salt=not_comp,
+        cleanup=not_comp
+    )
+    keypoint_results = _coco_eval_to_keypoint_results(coco_eval)
+    return OrderedDict([(dataset.name, keypoint_results)])
+
+
+def evaluate_box_proposals(dataset, roidb):
+    """Evaluate bounding box object proposals."""
+    res = _empty_box_proposal_results()
+    areas = {'all': '', 'small': 's', 'medium': 'm', 'large': 'l'}
+    for limit in [100, 1000]:
+        for area, suffix in areas.items():
+            stats = json_dataset_evaluator.evaluate_box_proposals(
+                dataset,
+                roidb,
+                area=area,
+                limit=limit,
+                class_specific=cfg.TEST.CLASS_SPECIFIC_AR
+            )
+            key = 'AR{}@{:d}'.format(suffix, limit)
+            res['box_proposal'][key] = stats['ar']
+    return OrderedDict([(dataset.name, res)])
+
+
+def log_box_proposal_results(results):
+    """Log bounding box proposal results."""
+    for dataset in results.keys():
+        keys = results[dataset]['box_proposal'].keys()
+        pad = max([len(k) for k in keys])
+        logger.info(dataset)
+        for k, v in results[dataset]['box_proposal'].items():
+            logger.info('{}: {:.3f}'.format(k.ljust(pad), v))
+
+
+def log_copy_paste_friendly_results(results):
+    """Log results in a format that makes it easy to copy-and-paste in a
+    spreadsheet. Lines are prefixed with 'copypaste: ' to make grepping easy.
+    """
+    for dataset in results.keys():
+        logger.info('copypaste: Dataset: {}'.format(dataset))
+        for task, metrics in results[dataset].items():
+            logger.info('copypaste: Task: {}'.format(task))
+            metric_names = metrics.keys()
+            metric_vals = ['{:.4f}'.format(v) for v in metrics.values()]
+            logger.info('copypaste: ' + ','.join(metric_names))
+            logger.info('copypaste: ' + ','.join(metric_vals))
+
+
+def check_expected_results(results, atol=0.005, rtol=0.1):
+    """Check actual results against expected results stored in
+    cfg.EXPECTED_RESULTS. Optionally email if the match exceeds the specified
+    tolerance.
+
+    Expected results should take the form of a list of expectations, each
+    specified by four elements: [dataset, task, metric, expected value]. For
+    example: [['coco_2014_minival', 'box_proposal', 'AR@1000', 0.387], ...].
+
+    The expected value may also be formatted as a list [mean, std] providing
+    an empirical mean and standard deviation from which a valid range is computed
+    using cfg.EXPECTED_RESULTS_SIGMA_TOL. For example:
+    [['coco_2014_minival', 'box_proposal', 'AR@1000', [0.387, 0.001]], ...]
+    """
+    # cfg contains a reference set of results that we want to check against
+    if len(cfg.EXPECTED_RESULTS) == 0:
+        return
+
+    for dataset, task, metric, expected_val in cfg.EXPECTED_RESULTS:
+        assert dataset in results, 'Dataset {} not in results'.format(dataset)
+        assert task in results[dataset], 'Task {} not in results'.format(task)
+        assert metric in results[dataset][task], \
+            'Metric {} not in results'.format(metric)
+        actual_val = results[dataset][task][metric]
+        ok = False
+        if isinstance(expected_val, list):
+            assert len(expected_val) == 2, (
+                'Expected result must be in (mean, std) format'
+            )
+            mean, std = expected_val
+            lo = mean - cfg.EXPECTED_RESULTS_SIGMA_TOL * std
+            hi = mean + cfg.EXPECTED_RESULTS_SIGMA_TOL * std
+            ok = (lo < actual_val) and (actual_val < hi)
+            msg = (
+                '{} > {} > {} sanity check (actual vs. expected): '
+                '{:.3f} vs. mean={:.4f}, std={:.4}, range=({:.4f}, {:.4f})'
+            ).format(dataset, task, metric, actual_val, mean, std, lo, hi)
+        else:
+            err = abs(actual_val - expected_val)
+            tol = atol + rtol * abs(expected_val)
+            ok = (err > tol)
+            msg = (
+                '{} > {} > {} sanity check (actual vs. expected): '
+                '{:.3f} vs. {:.3f}, err={:.3f}, tol={:.3f}'
+            ).format(dataset, task, metric, actual_val, expected_val, err, tol)
+        if not ok:
+            msg = 'FAIL: ' + msg
+            logger.error(msg)
+            if cfg.EXPECTED_RESULTS_EMAIL != '':
+                subject = 'Detectron end-to-end test failure'
+                job_name = os.environ[
+                    'DETECTRON_JOB_NAME'
+                ] if 'DETECTRON_JOB_NAME' in os.environ else '<unknown>'
+                job_id = os.environ[
+                    'WORKFLOW_RUN_ID'
+                ] if 'WORKFLOW_RUN_ID' in os.environ else '<unknown>'
+                body = [
+                    'Name:',
+                    job_name,
+                    'Run ID:',
+                    job_id,
+                    'Failure:',
+                    msg,
+                    'Config:',
+                    pprint.pformat(cfg),
+                    'Env:',
+                    pprint.pformat(dict(os.environ)),
+                ]
+                send_email(
+                    subject, '\n\n'.join(body), cfg.EXPECTED_RESULTS_EMAIL
+                )
+        else:
+            msg = 'PASS: ' + msg
+            logger.info(msg)
+
+
+def _use_json_dataset_evaluator(dataset):
+    """Check if the dataset uses the general json dataset evaluator."""
+    return dataset.name.find('coco_') > -1 or cfg.TEST.FORCE_JSON_DATASET_EVAL
+
+
+def _use_cityscapes_evaluator(dataset):
+    """Check if the dataset uses the Cityscapes dataset evaluator."""
+    return dataset.name.find('cityscapes_') > -1
+
+
+def _use_voc_evaluator(dataset):
+    """Check if the dataset uses the PASCAL VOC dataset evaluator."""
+    return dataset.name[:4] == 'voc_'
+
+
+# Indices in the stats array for COCO boxes and masks
+COCO_AP = 0
+COCO_AP50 = 1
+COCO_AP75 = 2
+COCO_APS = 3
+COCO_APM = 4
+COCO_APL = 5
+# Slight difference for keypoints
+COCO_KPS_APM = 3
+COCO_KPS_APL = 4
+
+
+# ---------------------------------------------------------------------------- #
+# Helper functions for producing properly formatted results.
+# ---------------------------------------------------------------------------- #
+
+def _coco_eval_to_box_results(coco_eval):
+    res = _empty_box_results()
+    if coco_eval is not None:
+        s = coco_eval.stats
+        res['box']['AP'] = s[COCO_AP]
+        res['box']['AP50'] = s[COCO_AP50]
+        res['box']['AP75'] = s[COCO_AP75]
+        res['box']['APs'] = s[COCO_APS]
+        res['box']['APm'] = s[COCO_APM]
+        res['box']['APl'] = s[COCO_APL]
+    return res
+
+
+def _coco_eval_to_mask_results(coco_eval):
+    res = _empty_mask_results()
+    if coco_eval is not None:
+        s = coco_eval.stats
+        res['mask']['AP'] = s[COCO_AP]
+        res['mask']['AP50'] = s[COCO_AP50]
+        res['mask']['AP75'] = s[COCO_AP75]
+        res['mask']['APs'] = s[COCO_APS]
+        res['mask']['APm'] = s[COCO_APM]
+        res['mask']['APl'] = s[COCO_APL]
+    return res
+
+
+def _coco_eval_to_keypoint_results(coco_eval):
+    res = _empty_keypoint_results()
+    if coco_eval is not None:
+        s = coco_eval.stats
+        res['keypoint']['AP'] = s[COCO_AP]
+        res['keypoint']['AP50'] = s[COCO_AP50]
+        res['keypoint']['AP75'] = s[COCO_AP75]
+        res['keypoint']['APm'] = s[COCO_KPS_APM]
+        res['keypoint']['APl'] = s[COCO_KPS_APL]
+    return res
+
+
+def _voc_eval_to_box_results(voc_eval):
+    # Not supported (return empty results)
+    return _empty_box_results()
+
+
+def _cs_eval_to_mask_results(cs_eval):
+    # Not supported (return empty results)
+    return _empty_mask_results()
+
+
+def _empty_box_results():
+    return OrderedDict({
+        'box':
+        OrderedDict(
+            [
+                ('AP', -1),
+                ('AP50', -1),
+                ('AP75', -1),
+                ('APs', -1),
+                ('APm', -1),
+                ('APl', -1),
+            ]
+        )
+    })
+
+
+def _empty_mask_results():
+    return OrderedDict({
+        'mask':
+        OrderedDict(
+            [
+                ('AP', -1),
+                ('AP50', -1),
+                ('AP75', -1),
+                ('APs', -1),
+                ('APm', -1),
+                ('APl', -1),
+            ]
+        )
+    })
+
+
+def _empty_keypoint_results():
+    return OrderedDict({
+        'keypoint':
+        OrderedDict(
+            [
+                ('AP', -1),
+                ('AP50', -1),
+                ('AP75', -1),
+                ('APm', -1),
+                ('APl', -1),
+            ]
+        )
+    })
+
+
+def _empty_box_proposal_results():
+    return OrderedDict({
+        'box_proposal':
+        OrderedDict(
+            [
+                ('AR@100', -1),
+                ('ARs@100', -1),
+                ('ARm@100', -1),
+                ('ARl@100', -1),
+                ('AR@1000', -1),
+                ('ARs@1000', -1),
+                ('ARm@1000', -1),
+                ('ARl@1000', -1),
+            ]
+        )
+    })
diff --git a/detectron/datasets/voc_dataset_evaluator.py b/detectron/datasets/voc_dataset_evaluator.py
new file mode 100644
index 0000000000000000000000000000000000000000..4426fc3d298f0dd597218b4179f6c4f7d3b07922
--- /dev/null
+++ b/detectron/datasets/voc_dataset_evaluator.py
@@ -0,0 +1,178 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""PASCAL VOC dataset evaluation interface."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import logging
+import numpy as np
+import os
+import shutil
+import uuid
+
+from detectron.core.config import cfg
+from detectron.datasets.dataset_catalog import get_devkit_dir
+from detectron.datasets.voc_eval import voc_eval
+from detectron.utils.io import save_object
+
+logger = logging.getLogger(__name__)
+
+
+def evaluate_boxes(
+    json_dataset,
+    all_boxes,
+    output_dir,
+    use_salt=True,
+    cleanup=True,
+    use_matlab=False
+):
+    salt = '_{}'.format(str(uuid.uuid4())) if use_salt else ''
+    filenames = _write_voc_results_files(json_dataset, all_boxes, salt)
+    _do_python_eval(json_dataset, salt, output_dir)
+    if use_matlab:
+        _do_matlab_eval(json_dataset, salt, output_dir)
+    if cleanup:
+        for filename in filenames:
+            shutil.copy(filename, output_dir)
+            os.remove(filename)
+    return None
+
+
+def _write_voc_results_files(json_dataset, all_boxes, salt):
+    filenames = []
+    image_set_path = voc_info(json_dataset)['image_set_path']
+    assert os.path.exists(image_set_path), \
+        'Image set path does not exist: {}'.format(image_set_path)
+    with open(image_set_path, 'r') as f:
+        image_index = [x.strip() for x in f.readlines()]
+    # Sanity check that order of images in json dataset matches order in the
+    # image set
+    roidb = json_dataset.get_roidb()
+    for i, entry in enumerate(roidb):
+        index = os.path.splitext(os.path.split(entry['image'])[1])[0]
+        assert index == image_index[i]
+    for cls_ind, cls in enumerate(json_dataset.classes):
+        if cls == '__background__':
+            continue
+        logger.info('Writing VOC results for: {}'.format(cls))
+        filename = _get_voc_results_file_template(json_dataset,
+                                                  salt).format(cls)
+        filenames.append(filename)
+        assert len(all_boxes[cls_ind]) == len(image_index)
+        with open(filename, 'wt') as f:
+            for im_ind, index in enumerate(image_index):
+                dets = all_boxes[cls_ind][im_ind]
+                if type(dets) == list:
+                    assert len(dets) == 0, \
+                        'dets should be numpy.ndarray or empty list'
+                    continue
+                # the VOCdevkit expects 1-based indices
+                for k in range(dets.shape[0]):
+                    f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
+                            format(index, dets[k, -1],
+                                   dets[k, 0] + 1, dets[k, 1] + 1,
+                                   dets[k, 2] + 1, dets[k, 3] + 1))
+    return filenames
+
+
+def _get_voc_results_file_template(json_dataset, salt):
+    info = voc_info(json_dataset)
+    year = info['year']
+    image_set = info['image_set']
+    devkit_path = info['devkit_path']
+    # VOCdevkit/results/VOC2007/Main/<comp_id>_det_test_aeroplane.txt
+    filename = 'comp4' + salt + '_det_' + image_set + '_{:s}.txt'
+    return os.path.join(devkit_path, 'results', 'VOC' + year, 'Main', filename)
+
+
+def _do_python_eval(json_dataset, salt, output_dir='output'):
+    info = voc_info(json_dataset)
+    year = info['year']
+    anno_path = info['anno_path']
+    image_set_path = info['image_set_path']
+    devkit_path = info['devkit_path']
+    cachedir = os.path.join(devkit_path, 'annotations_cache')
+    aps = []
+    # The PASCAL VOC metric changed in 2010
+    use_07_metric = True if int(year) < 2010 else False
+    logger.info('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
+    if not os.path.isdir(output_dir):
+        os.mkdir(output_dir)
+    for _, cls in enumerate(json_dataset.classes):
+        if cls == '__background__':
+            continue
+        filename = _get_voc_results_file_template(
+            json_dataset, salt).format(cls)
+        rec, prec, ap = voc_eval(
+            filename, anno_path, image_set_path, cls, cachedir, ovthresh=0.5,
+            use_07_metric=use_07_metric)
+        aps += [ap]
+        logger.info('AP for {} = {:.4f}'.format(cls, ap))
+        res_file = os.path.join(output_dir, cls + '_pr.pkl')
+        save_object({'rec': rec, 'prec': prec, 'ap': ap}, res_file)
+    logger.info('Mean AP = {:.4f}'.format(np.mean(aps)))
+    logger.info('~~~~~~~~')
+    logger.info('Results:')
+    for ap in aps:
+        logger.info('{:.3f}'.format(ap))
+    logger.info('{:.3f}'.format(np.mean(aps)))
+    logger.info('~~~~~~~~')
+    logger.info('')
+    logger.info('----------------------------------------------------------')
+    logger.info('Results computed with the **unofficial** Python eval code.')
+    logger.info('Results should be very close to the official MATLAB code.')
+    logger.info('Use `./tools/reval.py --matlab ...` for your paper.')
+    logger.info('-- Thanks, The Management')
+    logger.info('----------------------------------------------------------')
+
+
+def _do_matlab_eval(json_dataset, salt, output_dir='output'):
+    import subprocess
+    logger.info('-----------------------------------------------------')
+    logger.info('Computing results with the official MATLAB eval code.')
+    logger.info('-----------------------------------------------------')
+    info = voc_info(json_dataset)
+    path = os.path.join(
+        cfg.ROOT_DIR, 'detectron', 'datasets', 'VOCdevkit-matlab-wrapper')
+    cmd = 'cd {} && '.format(path)
+    cmd += '{:s} -nodisplay -nodesktop '.format(cfg.MATLAB)
+    cmd += '-r "dbstop if error; '
+    cmd += 'voc_eval(\'{:s}\',\'{:s}\',\'{:s}\',\'{:s}\'); quit;"' \
+       .format(info['devkit_path'], 'comp4' + salt, info['image_set'],
+               output_dir)
+    logger.info('Running:\n{}'.format(cmd))
+    subprocess.call(cmd, shell=True)
+
+
+def voc_info(json_dataset):
+    year = json_dataset.name[4:8]
+    image_set = json_dataset.name[9:]
+    devkit_path = get_devkit_dir(json_dataset.name)
+    assert os.path.exists(devkit_path), \
+        'Devkit directory {} not found'.format(devkit_path)
+    anno_path = os.path.join(
+        devkit_path, 'VOC' + year, 'Annotations', '{:s}.xml')
+    image_set_path = os.path.join(
+        devkit_path, 'VOC' + year, 'ImageSets', 'Main', image_set + '.txt')
+    return dict(
+        year=year,
+        image_set=image_set,
+        devkit_path=devkit_path,
+        anno_path=anno_path,
+        image_set_path=image_set_path)
diff --git a/detectron/datasets/voc_eval.py b/detectron/datasets/voc_eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..1497305de86d3cc800fd2fa24d4da16855f85bf7
--- /dev/null
+++ b/detectron/datasets/voc_eval.py
@@ -0,0 +1,222 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+#
+# Based on:
+# --------------------------------------------------------
+# Fast/er R-CNN
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Bharath Hariharan
+# --------------------------------------------------------
+
+"""Python implementation of the PASCAL VOC devkit's AP evaluation code."""
+
+import logging
+import numpy as np
+import os
+import xml.etree.ElementTree as ET
+
+from detectron.utils.io import load_object
+from detectron.utils.io import save_object
+
+logger = logging.getLogger(__name__)
+
+
+def parse_rec(filename):
+    """Parse a PASCAL VOC xml file."""
+    tree = ET.parse(filename)
+    objects = []
+    for obj in tree.findall('object'):
+        obj_struct = {}
+        obj_struct['name'] = obj.find('name').text
+        obj_struct['pose'] = obj.find('pose').text
+        obj_struct['truncated'] = int(obj.find('truncated').text)
+        obj_struct['difficult'] = int(obj.find('difficult').text)
+        bbox = obj.find('bndbox')
+        obj_struct['bbox'] = [int(bbox.find('xmin').text),
+                              int(bbox.find('ymin').text),
+                              int(bbox.find('xmax').text),
+                              int(bbox.find('ymax').text)]
+        objects.append(obj_struct)
+
+    return objects
+
+
+def voc_ap(rec, prec, use_07_metric=False):
+    """Compute VOC AP given precision and recall. If use_07_metric is true, uses
+    the VOC 07 11-point method (default:False).
+    """
+    if use_07_metric:
+        # 11 point metric
+        ap = 0.
+        for t in np.arange(0., 1.1, 0.1):
+            if np.sum(rec >= t) == 0:
+                p = 0
+            else:
+                p = np.max(prec[rec >= t])
+            ap = ap + p / 11.
+    else:
+        # correct AP calculation
+        # first append sentinel values at the end
+        mrec = np.concatenate(([0.], rec, [1.]))
+        mpre = np.concatenate(([0.], prec, [0.]))
+
+        # compute the precision envelope
+        for i in range(mpre.size - 1, 0, -1):
+            mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
+
+        # to calculate area under PR curve, look for points
+        # where X axis (recall) changes value
+        i = np.where(mrec[1:] != mrec[:-1])[0]
+
+        # and sum (\Delta recall) * prec
+        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
+    return ap
+
+
+def voc_eval(detpath,
+             annopath,
+             imagesetfile,
+             classname,
+             cachedir,
+             ovthresh=0.5,
+             use_07_metric=False):
+    """rec, prec, ap = voc_eval(detpath,
+                                annopath,
+                                imagesetfile,
+                                classname,
+                                [ovthresh],
+                                [use_07_metric])
+
+    Top level function that does the PASCAL VOC evaluation.
+
+    detpath: Path to detections
+        detpath.format(classname) should produce the detection results file.
+    annopath: Path to annotations
+        annopath.format(imagename) should be the xml annotations file.
+    imagesetfile: Text file containing the list of images, one image per line.
+    classname: Category name (duh)
+    cachedir: Directory for caching the annotations
+    [ovthresh]: Overlap threshold (default = 0.5)
+    [use_07_metric]: Whether to use VOC07's 11 point AP computation
+        (default False)
+    """
+    # assumes detections are in detpath.format(classname)
+    # assumes annotations are in annopath.format(imagename)
+    # assumes imagesetfile is a text file with each line an image name
+    # cachedir caches the annotations in a pickle file
+
+    # first load gt
+    if not os.path.isdir(cachedir):
+        os.mkdir(cachedir)
+    imageset = os.path.splitext(os.path.basename(imagesetfile))[0]
+    cachefile = os.path.join(cachedir, imageset + '_annots.pkl')
+    # read list of images
+    with open(imagesetfile, 'r') as f:
+        lines = f.readlines()
+    imagenames = [x.strip() for x in lines]
+
+    if not os.path.isfile(cachefile):
+        # load annots
+        recs = {}
+        for i, imagename in enumerate(imagenames):
+            recs[imagename] = parse_rec(annopath.format(imagename))
+            if i % 100 == 0:
+                logger.info(
+                    'Reading annotation for {:d}/{:d}'.format(
+                        i + 1, len(imagenames)))
+        # save
+        logger.info('Saving cached annotations to {:s}'.format(cachefile))
+        save_object(recs, cachefile)
+    else:
+        recs = load_object(cachefile)
+
+    # extract gt objects for this class
+    class_recs = {}
+    npos = 0
+    for imagename in imagenames:
+        R = [obj for obj in recs[imagename] if obj['name'] == classname]
+        bbox = np.array([x['bbox'] for x in R])
+        difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
+        det = [False] * len(R)
+        npos = npos + sum(~difficult)
+        class_recs[imagename] = {'bbox': bbox,
+                                 'difficult': difficult,
+                                 'det': det}
+
+    # read dets
+    detfile = detpath.format(classname)
+    with open(detfile, 'r') as f:
+        lines = f.readlines()
+
+    splitlines = [x.strip().split(' ') for x in lines]
+    image_ids = [x[0] for x in splitlines]
+    confidence = np.array([float(x[1]) for x in splitlines])
+    BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
+
+    # sort by confidence
+    sorted_ind = np.argsort(-confidence)
+    BB = BB[sorted_ind, :]
+    image_ids = [image_ids[x] for x in sorted_ind]
+
+    # go down dets and mark TPs and FPs
+    nd = len(image_ids)
+    tp = np.zeros(nd)
+    fp = np.zeros(nd)
+    for d in range(nd):
+        R = class_recs[image_ids[d]]
+        bb = BB[d, :].astype(float)
+        ovmax = -np.inf
+        BBGT = R['bbox'].astype(float)
+
+        if BBGT.size > 0:
+            # compute overlaps
+            # intersection
+            ixmin = np.maximum(BBGT[:, 0], bb[0])
+            iymin = np.maximum(BBGT[:, 1], bb[1])
+            ixmax = np.minimum(BBGT[:, 2], bb[2])
+            iymax = np.minimum(BBGT[:, 3], bb[3])
+            iw = np.maximum(ixmax - ixmin + 1., 0.)
+            ih = np.maximum(iymax - iymin + 1., 0.)
+            inters = iw * ih
+
+            # union
+            uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
+                   (BBGT[:, 2] - BBGT[:, 0] + 1.) *
+                   (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
+
+            overlaps = inters / uni
+            ovmax = np.max(overlaps)
+            jmax = np.argmax(overlaps)
+
+        if ovmax > ovthresh:
+            if not R['difficult'][jmax]:
+                if not R['det'][jmax]:
+                    tp[d] = 1.
+                    R['det'][jmax] = 1
+                else:
+                    fp[d] = 1.
+        else:
+            fp[d] = 1.
+
+    # compute precision recall
+    fp = np.cumsum(fp)
+    tp = np.cumsum(tp)
+    rec = tp / float(npos)
+    # avoid divide by zero in case the first detection matches a difficult
+    # ground truth
+    prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
+    ap = voc_ap(rec, prec, use_07_metric)
+
+    return rec, prec, ap
diff --git a/detectron/modeling/FPN.py b/detectron/modeling/FPN.py
new file mode 100644
index 0000000000000000000000000000000000000000..46881e49708ff6b5a4f5657897a49e4ebb53052e
--- /dev/null
+++ b/detectron/modeling/FPN.py
@@ -0,0 +1,568 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Functions for using a Feature Pyramid Network (FPN)."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import collections
+import numpy as np
+
+from detectron.core.config import cfg
+from detectron.modeling.generate_anchors import generate_anchors
+from detectron.utils.c2 import const_fill
+from detectron.utils.c2 import gauss_fill
+from detectron.utils.net import get_group_gn
+import detectron.modeling.ResNet as ResNet
+import detectron.utils.blob as blob_utils
+import detectron.utils.boxes as box_utils
+
+# Lowest and highest pyramid levels in the backbone network. For FPN, we assume
+# that all networks have 5 spatial reductions, each by a factor of 2. Level 1
+# would correspond to the input image, hence it does not make sense to use it.
+LOWEST_BACKBONE_LVL = 2   # E.g., "conv2"-like level
+HIGHEST_BACKBONE_LVL = 5  # E.g., "conv5"-like level
+
+
+# ---------------------------------------------------------------------------- #
+# FPN with ResNet
+# ---------------------------------------------------------------------------- #
+
+def add_fpn_ResNet50_conv5_body(model):
+    return add_fpn_onto_conv_body(
+        model, ResNet.add_ResNet50_conv5_body, fpn_level_info_ResNet50_conv5
+    )
+
+
+def add_fpn_ResNet50_conv5_P2only_body(model):
+    return add_fpn_onto_conv_body(
+        model,
+        ResNet.add_ResNet50_conv5_body,
+        fpn_level_info_ResNet50_conv5,
+        P2only=True
+    )
+
+
+def add_fpn_ResNet101_conv5_body(model):
+    return add_fpn_onto_conv_body(
+        model, ResNet.add_ResNet101_conv5_body, fpn_level_info_ResNet101_conv5
+    )
+
+
+def add_fpn_ResNet101_conv5_P2only_body(model):
+    return add_fpn_onto_conv_body(
+        model,
+        ResNet.add_ResNet101_conv5_body,
+        fpn_level_info_ResNet101_conv5,
+        P2only=True
+    )
+
+
+def add_fpn_ResNet152_conv5_body(model):
+    return add_fpn_onto_conv_body(
+        model, ResNet.add_ResNet152_conv5_body, fpn_level_info_ResNet152_conv5
+    )
+
+
+def add_fpn_ResNet152_conv5_P2only_body(model):
+    return add_fpn_onto_conv_body(
+        model,
+        ResNet.add_ResNet152_conv5_body,
+        fpn_level_info_ResNet152_conv5,
+        P2only=True
+    )
+
+
+# ---------------------------------------------------------------------------- #
+# Functions for bolting FPN onto a backbone architectures
+# ---------------------------------------------------------------------------- #
+
+def add_fpn_onto_conv_body(
+    model, conv_body_func, fpn_level_info_func, P2only=False
+):
+    """Add the specified conv body to the model and then add FPN levels to it.
+    """
+    # Note: blobs_conv is in revsersed order: [fpn5, fpn4, fpn3, fpn2]
+    # similarly for dims_conv: [2048, 1024, 512, 256]
+    # similarly for spatial_scales_fpn: [1/32, 1/16, 1/8, 1/4]
+
+    conv_body_func(model)
+    blobs_fpn, dim_fpn, spatial_scales_fpn = add_fpn(
+        model, fpn_level_info_func()
+    )
+
+    if P2only:
+        # use only the finest level
+        return blobs_fpn[-1], dim_fpn, spatial_scales_fpn[-1]
+    else:
+        # use all levels
+        return blobs_fpn, dim_fpn, spatial_scales_fpn
+
+
+def add_fpn(model, fpn_level_info):
+    """Add FPN connections based on the model described in the FPN paper."""
+    # FPN levels are built starting from the highest/coarest level of the
+    # backbone (usually "conv5"). First we build down, recursively constructing
+    # lower/finer resolution FPN levels. Then we build up, constructing levels
+    # that are even higher/coarser than the starting level.
+    fpn_dim = cfg.FPN.DIM
+    min_level, max_level = get_min_max_levels()
+    # Count the number of backbone stages that we will generate FPN levels for
+    # starting from the coarest backbone stage (usually the "conv5"-like level)
+    # E.g., if the backbone level info defines stages 4 stages: "conv5",
+    # "conv4", ... "conv2" and min_level=2, then we end up with 4 - (2 - 2) = 4
+    # backbone stages to add FPN to.
+    num_backbone_stages = (
+        len(fpn_level_info.blobs) - (min_level - LOWEST_BACKBONE_LVL)
+    )
+
+    lateral_input_blobs = fpn_level_info.blobs[:num_backbone_stages]
+    output_blobs = [
+        'fpn_inner_{}'.format(s)
+        for s in fpn_level_info.blobs[:num_backbone_stages]
+    ]
+    fpn_dim_lateral = fpn_level_info.dims
+    xavier_fill = ('XavierFill', {})
+
+    # For the coarsest backbone level: 1x1 conv only seeds recursion
+    if cfg.FPN.USE_GN:
+        # use GroupNorm
+        c = model.ConvGN(
+            lateral_input_blobs[0],
+            output_blobs[0],  # note: this is a prefix
+            dim_in=fpn_dim_lateral[0],
+            dim_out=fpn_dim,
+            group_gn=get_group_gn(fpn_dim),
+            kernel=1,
+            pad=0,
+            stride=1,
+            weight_init=xavier_fill,
+            bias_init=const_fill(0.0)
+        )
+        output_blobs[0] = c  # rename it
+    else:
+        model.Conv(
+            lateral_input_blobs[0],
+            output_blobs[0],
+            dim_in=fpn_dim_lateral[0],
+            dim_out=fpn_dim,
+            kernel=1,
+            pad=0,
+            stride=1,
+            weight_init=xavier_fill,
+            bias_init=const_fill(0.0)
+        )
+
+    #
+    # Step 1: recursively build down starting from the coarsest backbone level
+    #
+
+    # For other levels add top-down and lateral connections
+    for i in range(num_backbone_stages - 1):
+        add_topdown_lateral_module(
+            model,
+            output_blobs[i],             # top-down blob
+            lateral_input_blobs[i + 1],  # lateral blob
+            output_blobs[i + 1],         # next output blob
+            fpn_dim,                     # output dimension
+            fpn_dim_lateral[i + 1]       # lateral input dimension
+        )
+
+    # Post-hoc scale-specific 3x3 convs
+    blobs_fpn = []
+    spatial_scales = []
+    for i in range(num_backbone_stages):
+        if cfg.FPN.USE_GN:
+            # use GroupNorm
+            fpn_blob = model.ConvGN(
+                output_blobs[i],
+                'fpn_{}'.format(fpn_level_info.blobs[i]),
+                dim_in=fpn_dim,
+                dim_out=fpn_dim,
+                group_gn=get_group_gn(fpn_dim),
+                kernel=3,
+                pad=1,
+                stride=1,
+                weight_init=xavier_fill,
+                bias_init=const_fill(0.0)
+            )
+        else:
+            fpn_blob = model.Conv(
+                output_blobs[i],
+                'fpn_{}'.format(fpn_level_info.blobs[i]),
+                dim_in=fpn_dim,
+                dim_out=fpn_dim,
+                kernel=3,
+                pad=1,
+                stride=1,
+                weight_init=xavier_fill,
+                bias_init=const_fill(0.0)
+            )
+        blobs_fpn += [fpn_blob]
+        spatial_scales += [fpn_level_info.spatial_scales[i]]
+
+    #
+    # Step 2: build up starting from the coarsest backbone level
+    #
+
+    # Check if we need the P6 feature map
+    if not cfg.FPN.EXTRA_CONV_LEVELS and max_level == HIGHEST_BACKBONE_LVL + 1:
+        # Original FPN P6 level implementation from our CVPR'17 FPN paper
+        P6_blob_in = blobs_fpn[0]
+        P6_name = P6_blob_in + '_subsampled_2x'
+        # Use max pooling to simulate stride 2 subsampling
+        P6_blob = model.MaxPool(P6_blob_in, P6_name, kernel=1, pad=0, stride=2)
+        blobs_fpn.insert(0, P6_blob)
+        spatial_scales.insert(0, spatial_scales[0] * 0.5)
+
+    # Coarser FPN levels introduced for RetinaNet
+    if cfg.FPN.EXTRA_CONV_LEVELS and max_level > HIGHEST_BACKBONE_LVL:
+        fpn_blob = fpn_level_info.blobs[0]
+        dim_in = fpn_level_info.dims[0]
+        for i in range(HIGHEST_BACKBONE_LVL + 1, max_level + 1):
+            fpn_blob_in = fpn_blob
+            if i > HIGHEST_BACKBONE_LVL + 1:
+                fpn_blob_in = model.Relu(fpn_blob, fpn_blob + '_relu')
+            fpn_blob = model.Conv(
+                fpn_blob_in,
+                'fpn_' + str(i),
+                dim_in=dim_in,
+                dim_out=fpn_dim,
+                kernel=3,
+                pad=1,
+                stride=2,
+                weight_init=xavier_fill,
+                bias_init=const_fill(0.0)
+            )
+            dim_in = fpn_dim
+            blobs_fpn.insert(0, fpn_blob)
+            spatial_scales.insert(0, spatial_scales[0] * 0.5)
+
+    return blobs_fpn, fpn_dim, spatial_scales
+
+
+def add_topdown_lateral_module(
+    model, fpn_top, fpn_lateral, fpn_bottom, dim_top, dim_lateral
+):
+    """Add a top-down lateral module."""
+    # Lateral 1x1 conv
+    if cfg.FPN.USE_GN:
+        # use GroupNorm
+        lat = model.ConvGN(
+            fpn_lateral,
+            fpn_bottom + '_lateral',
+            dim_in=dim_lateral,
+            dim_out=dim_top,
+            group_gn=get_group_gn(dim_top),
+            kernel=1,
+            pad=0,
+            stride=1,
+            weight_init=(
+                const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL
+                else ('XavierFill', {})),
+            bias_init=const_fill(0.0)
+        )
+    else:
+        lat = model.Conv(
+            fpn_lateral,
+            fpn_bottom + '_lateral',
+            dim_in=dim_lateral,
+            dim_out=dim_top,
+            kernel=1,
+            pad=0,
+            stride=1,
+            weight_init=(
+                const_fill(0.0)
+                if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {})
+            ),
+            bias_init=const_fill(0.0)
+        )
+    # Top-down 2x upsampling
+    td = model.net.UpsampleNearest(fpn_top, fpn_bottom + '_topdown', scale=2)
+    # Sum lateral and top-down
+    model.net.Sum([lat, td], fpn_bottom)
+
+
+def get_min_max_levels():
+    """The min and max FPN levels required for supporting RPN and/or RoI
+    transform operations on multiple FPN levels.
+    """
+    min_level = LOWEST_BACKBONE_LVL
+    max_level = HIGHEST_BACKBONE_LVL
+    if cfg.FPN.MULTILEVEL_RPN and not cfg.FPN.MULTILEVEL_ROIS:
+        max_level = cfg.FPN.RPN_MAX_LEVEL
+        min_level = cfg.FPN.RPN_MIN_LEVEL
+    if not cfg.FPN.MULTILEVEL_RPN and cfg.FPN.MULTILEVEL_ROIS:
+        max_level = cfg.FPN.ROI_MAX_LEVEL
+        min_level = cfg.FPN.ROI_MIN_LEVEL
+    if cfg.FPN.MULTILEVEL_RPN and cfg.FPN.MULTILEVEL_ROIS:
+        max_level = max(cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.ROI_MAX_LEVEL)
+        min_level = min(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.ROI_MIN_LEVEL)
+    return min_level, max_level
+
+
+# ---------------------------------------------------------------------------- #
+# RPN with an FPN backbone
+# ---------------------------------------------------------------------------- #
+
+def add_fpn_rpn_outputs(model, blobs_in, dim_in, spatial_scales):
+    """Add RPN on FPN specific outputs."""
+    num_anchors = len(cfg.FPN.RPN_ASPECT_RATIOS)
+    dim_out = dim_in
+
+    k_max = cfg.FPN.RPN_MAX_LEVEL  # coarsest level of pyramid
+    k_min = cfg.FPN.RPN_MIN_LEVEL  # finest level of pyramid
+    assert len(blobs_in) == k_max - k_min + 1
+    for lvl in range(k_min, k_max + 1):
+        bl_in = blobs_in[k_max - lvl]  # blobs_in is in reversed order
+        sc = spatial_scales[k_max - lvl]  # in reversed order
+        slvl = str(lvl)
+
+        if lvl == k_min:
+            # Create conv ops with randomly initialized weights and
+            # zeroed biases for the first FPN level; these will be shared by
+            # all other FPN levels
+            # RPN hidden representation
+            conv_rpn_fpn = model.Conv(
+                bl_in,
+                'conv_rpn_fpn' + slvl,
+                dim_in,
+                dim_out,
+                kernel=3,
+                pad=1,
+                stride=1,
+                weight_init=gauss_fill(0.01),
+                bias_init=const_fill(0.0)
+            )
+            model.Relu(conv_rpn_fpn, conv_rpn_fpn)
+            # Proposal classification scores
+            rpn_cls_logits_fpn = model.Conv(
+                conv_rpn_fpn,
+                'rpn_cls_logits_fpn' + slvl,
+                dim_in,
+                num_anchors,
+                kernel=1,
+                pad=0,
+                stride=1,
+                weight_init=gauss_fill(0.01),
+                bias_init=const_fill(0.0)
+            )
+            # Proposal bbox regression deltas
+            rpn_bbox_pred_fpn = model.Conv(
+                conv_rpn_fpn,
+                'rpn_bbox_pred_fpn' + slvl,
+                dim_in,
+                4 * num_anchors,
+                kernel=1,
+                pad=0,
+                stride=1,
+                weight_init=gauss_fill(0.01),
+                bias_init=const_fill(0.0)
+            )
+        else:
+            # Share weights and biases
+            sk_min = str(k_min)
+            # RPN hidden representation
+            conv_rpn_fpn = model.ConvShared(
+                bl_in,
+                'conv_rpn_fpn' + slvl,
+                dim_in,
+                dim_out,
+                kernel=3,
+                pad=1,
+                stride=1,
+                weight='conv_rpn_fpn' + sk_min + '_w',
+                bias='conv_rpn_fpn' + sk_min + '_b'
+            )
+            model.Relu(conv_rpn_fpn, conv_rpn_fpn)
+            # Proposal classification scores
+            rpn_cls_logits_fpn = model.ConvShared(
+                conv_rpn_fpn,
+                'rpn_cls_logits_fpn' + slvl,
+                dim_in,
+                num_anchors,
+                kernel=1,
+                pad=0,
+                stride=1,
+                weight='rpn_cls_logits_fpn' + sk_min + '_w',
+                bias='rpn_cls_logits_fpn' + sk_min + '_b'
+            )
+            # Proposal bbox regression deltas
+            rpn_bbox_pred_fpn = model.ConvShared(
+                conv_rpn_fpn,
+                'rpn_bbox_pred_fpn' + slvl,
+                dim_in,
+                4 * num_anchors,
+                kernel=1,
+                pad=0,
+                stride=1,
+                weight='rpn_bbox_pred_fpn' + sk_min + '_w',
+                bias='rpn_bbox_pred_fpn' + sk_min + '_b'
+            )
+
+        if not model.train or cfg.MODEL.FASTER_RCNN:
+            # Proposals are needed during:
+            #  1) inference (== not model.train) for RPN only and Faster R-CNN
+            #  OR
+            #  2) training for Faster R-CNN
+            # Otherwise (== training for RPN only), proposals are not needed
+            lvl_anchors = generate_anchors(
+                stride=2.**lvl,
+                sizes=(cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ),
+                aspect_ratios=cfg.FPN.RPN_ASPECT_RATIOS
+            )
+            rpn_cls_probs_fpn = model.net.Sigmoid(
+                rpn_cls_logits_fpn, 'rpn_cls_probs_fpn' + slvl
+            )
+            model.GenerateProposals(
+                [rpn_cls_probs_fpn, rpn_bbox_pred_fpn, 'im_info'],
+                ['rpn_rois_fpn' + slvl, 'rpn_roi_probs_fpn' + slvl],
+                anchors=lvl_anchors,
+                spatial_scale=sc
+            )
+
+
+def add_fpn_rpn_losses(model):
+    """Add RPN on FPN specific losses."""
+    loss_gradients = {}
+    for lvl in range(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL + 1):
+        slvl = str(lvl)
+        # Spatially narrow the full-sized RPN label arrays to match the feature map
+        # shape
+        model.net.SpatialNarrowAs(
+            ['rpn_labels_int32_wide_fpn' + slvl, 'rpn_cls_logits_fpn' + slvl],
+            'rpn_labels_int32_fpn' + slvl
+        )
+        for key in ('targets', 'inside_weights', 'outside_weights'):
+            model.net.SpatialNarrowAs(
+                [
+                    'rpn_bbox_' + key + '_wide_fpn' + slvl,
+                    'rpn_bbox_pred_fpn' + slvl
+                ],
+                'rpn_bbox_' + key + '_fpn' + slvl
+            )
+        loss_rpn_cls_fpn = model.net.SigmoidCrossEntropyLoss(
+            ['rpn_cls_logits_fpn' + slvl, 'rpn_labels_int32_fpn' + slvl],
+            'loss_rpn_cls_fpn' + slvl,
+            normalize=0,
+            scale=(
+                model.GetLossScale() / cfg.TRAIN.RPN_BATCH_SIZE_PER_IM /
+                cfg.TRAIN.IMS_PER_BATCH
+            )
+        )
+        # Normalization by (1) RPN_BATCH_SIZE_PER_IM and (2) IMS_PER_BATCH is
+        # handled by (1) setting bbox outside weights and (2) SmoothL1Loss
+        # normalizes by IMS_PER_BATCH
+        loss_rpn_bbox_fpn = model.net.SmoothL1Loss(
+            [
+                'rpn_bbox_pred_fpn' + slvl, 'rpn_bbox_targets_fpn' + slvl,
+                'rpn_bbox_inside_weights_fpn' + slvl,
+                'rpn_bbox_outside_weights_fpn' + slvl
+            ],
+            'loss_rpn_bbox_fpn' + slvl,
+            beta=1. / 9.,
+            scale=model.GetLossScale(),
+        )
+        loss_gradients.update(
+            blob_utils.
+            get_loss_gradients(model, [loss_rpn_cls_fpn, loss_rpn_bbox_fpn])
+        )
+        model.AddLosses(['loss_rpn_cls_fpn' + slvl, 'loss_rpn_bbox_fpn' + slvl])
+    return loss_gradients
+
+
+# ---------------------------------------------------------------------------- #
+# Helper functions for working with multilevel FPN RoIs
+# ---------------------------------------------------------------------------- #
+
+def map_rois_to_fpn_levels(rois, k_min, k_max):
+    """Determine which FPN level each RoI in a set of RoIs should map to based
+    on the heuristic in the FPN paper.
+    """
+    # Compute level ids
+    s = np.sqrt(box_utils.boxes_area(rois))
+    s0 = cfg.FPN.ROI_CANONICAL_SCALE  # default: 224
+    lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL  # default: 4
+
+    # Eqn.(1) in FPN paper
+    target_lvls = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
+    target_lvls = np.clip(target_lvls, k_min, k_max)
+    return target_lvls
+
+
+def add_multilevel_roi_blobs(
+    blobs, blob_prefix, rois, target_lvls, lvl_min, lvl_max
+):
+    """Add RoI blobs for multiple FPN levels to the blobs dict.
+
+    blobs: a dict mapping from blob name to numpy ndarray
+    blob_prefix: name prefix to use for the FPN blobs
+    rois: the source rois as a 2D numpy array of shape (N, 5) where each row is
+      an roi and the columns encode (batch_idx, x1, y1, x2, y2)
+    target_lvls: numpy array of shape (N, ) indicating which FPN level each roi
+      in rois should be assigned to
+    lvl_min: the finest (highest resolution) FPN level (e.g., 2)
+    lvl_max: the coarest (lowest resolution) FPN level (e.g., 6)
+    """
+    rois_idx_order = np.empty((0, ))
+    rois_stacked = np.zeros((0, 5), dtype=np.float32)  # for assert
+    for lvl in range(lvl_min, lvl_max + 1):
+        idx_lvl = np.where(target_lvls == lvl)[0]
+        blobs[blob_prefix + '_fpn' + str(lvl)] = rois[idx_lvl, :]
+        rois_idx_order = np.concatenate((rois_idx_order, idx_lvl))
+        rois_stacked = np.vstack(
+            [rois_stacked, blobs[blob_prefix + '_fpn' + str(lvl)]]
+        )
+    rois_idx_restore = np.argsort(rois_idx_order).astype(np.int32, copy=False)
+    blobs[blob_prefix + '_idx_restore_int32'] = rois_idx_restore
+    # Sanity check that restore order is correct
+    assert (rois_stacked[rois_idx_restore] == rois).all()
+
+
+# ---------------------------------------------------------------------------- #
+# FPN level info for stages 5, 4, 3, 2 for select models (more can be added)
+# ---------------------------------------------------------------------------- #
+
+FpnLevelInfo = collections.namedtuple(
+    'FpnLevelInfo',
+    ['blobs', 'dims', 'spatial_scales']
+)
+
+
+def fpn_level_info_ResNet50_conv5():
+    return FpnLevelInfo(
+        blobs=('res5_2_sum', 'res4_5_sum', 'res3_3_sum', 'res2_2_sum'),
+        dims=(2048, 1024, 512, 256),
+        spatial_scales=(1. / 32., 1. / 16., 1. / 8., 1. / 4.)
+    )
+
+
+def fpn_level_info_ResNet101_conv5():
+    return FpnLevelInfo(
+        blobs=('res5_2_sum', 'res4_22_sum', 'res3_3_sum', 'res2_2_sum'),
+        dims=(2048, 1024, 512, 256),
+        spatial_scales=(1. / 32., 1. / 16., 1. / 8., 1. / 4.)
+    )
+
+
+def fpn_level_info_ResNet152_conv5():
+    return FpnLevelInfo(
+        blobs=('res5_2_sum', 'res4_35_sum', 'res3_7_sum', 'res2_2_sum'),
+        dims=(2048, 1024, 512, 256),
+        spatial_scales=(1. / 32., 1. / 16., 1. / 8., 1. / 4.)
+    )
diff --git a/detectron/modeling/ResNet.py b/detectron/modeling/ResNet.py
new file mode 100644
index 0000000000000000000000000000000000000000..ae602d97c92ef9d1d8584b2e9742c585b4502d8c
--- /dev/null
+++ b/detectron/modeling/ResNet.py
@@ -0,0 +1,391 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Implements ResNet and ResNeXt.
+
+See: https://arxiv.org/abs/1512.03385, https://arxiv.org/abs/1611.05431.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from detectron.core.config import cfg
+from detectron.utils.net import get_group_gn
+
+
+# ---------------------------------------------------------------------------- #
+# Bits for specific architectures (ResNet50, ResNet101, ...)
+# ---------------------------------------------------------------------------- #
+
+
+def add_ResNet50_conv4_body(model):
+    return add_ResNet_convX_body(model, (3, 4, 6))
+
+
+def add_ResNet50_conv5_body(model):
+    return add_ResNet_convX_body(model, (3, 4, 6, 3))
+
+
+def add_ResNet101_conv4_body(model):
+    return add_ResNet_convX_body(model, (3, 4, 23))
+
+
+def add_ResNet101_conv5_body(model):
+    return add_ResNet_convX_body(model, (3, 4, 23, 3))
+
+
+def add_ResNet152_conv5_body(model):
+    return add_ResNet_convX_body(model, (3, 8, 36, 3))
+
+
+# ---------------------------------------------------------------------------- #
+# Generic ResNet components
+# ---------------------------------------------------------------------------- #
+
+
+def add_stage(
+    model,
+    prefix,
+    blob_in,
+    n,
+    dim_in,
+    dim_out,
+    dim_inner,
+    dilation,
+    stride_init=2
+):
+    """Add a ResNet stage to the model by stacking n residual blocks."""
+    # e.g., prefix = res2
+    for i in range(n):
+        blob_in = add_residual_block(
+            model,
+            '{}_{}'.format(prefix, i),
+            blob_in,
+            dim_in,
+            dim_out,
+            dim_inner,
+            dilation,
+            stride_init,
+            # Not using inplace for the last block;
+            # it may be fetched externally or used by FPN
+            inplace_sum=i < n - 1
+        )
+        dim_in = dim_out
+    return blob_in, dim_in
+
+
+def add_ResNet_convX_body(model, block_counts):
+    """Add a ResNet body from input data up through the res5 (aka conv5) stage.
+    The final res5/conv5 stage may be optionally excluded (hence convX, where
+    X = 4 or 5)."""
+    freeze_at = cfg.TRAIN.FREEZE_AT
+    assert freeze_at in [0, 2, 3, 4, 5]
+
+    # add the stem (by default, conv1 and pool1 with bn; can support gn)
+    p, dim_in = globals()[cfg.RESNETS.STEM_FUNC](model, 'data')
+
+    dim_bottleneck = cfg.RESNETS.NUM_GROUPS * cfg.RESNETS.WIDTH_PER_GROUP
+    (n1, n2, n3) = block_counts[:3]
+    s, dim_in = add_stage(model, 'res2', p, n1, dim_in, 256, dim_bottleneck, 1)
+    if freeze_at == 2:
+        model.StopGradient(s, s)
+    s, dim_in = add_stage(
+        model, 'res3', s, n2, dim_in, 512, dim_bottleneck * 2, 1
+    )
+    if freeze_at == 3:
+        model.StopGradient(s, s)
+    s, dim_in = add_stage(
+        model, 'res4', s, n3, dim_in, 1024, dim_bottleneck * 4, 1
+    )
+    if freeze_at == 4:
+        model.StopGradient(s, s)
+    if len(block_counts) == 4:
+        n4 = block_counts[3]
+        s, dim_in = add_stage(
+            model, 'res5', s, n4, dim_in, 2048, dim_bottleneck * 8,
+            cfg.RESNETS.RES5_DILATION
+        )
+        if freeze_at == 5:
+            model.StopGradient(s, s)
+        return s, dim_in, 1. / 32. * cfg.RESNETS.RES5_DILATION
+    else:
+        return s, dim_in, 1. / 16.
+
+
+def add_ResNet_roi_conv5_head(model, blob_in, dim_in, spatial_scale):
+    """Adds an RoI feature transformation (e.g., RoI pooling) followed by a
+    res5/conv5 head applied to each RoI."""
+    # TODO(rbg): This contains Fast R-CNN specific config options making it non-
+    # reusable; make this more generic with model-specific wrappers
+    model.RoIFeatureTransform(
+        blob_in,
+        'pool5',
+        blob_rois='rois',
+        method=cfg.FAST_RCNN.ROI_XFORM_METHOD,
+        resolution=cfg.FAST_RCNN.ROI_XFORM_RESOLUTION,
+        sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO,
+        spatial_scale=spatial_scale
+    )
+    dim_bottleneck = cfg.RESNETS.NUM_GROUPS * cfg.RESNETS.WIDTH_PER_GROUP
+    stride_init = int(cfg.FAST_RCNN.ROI_XFORM_RESOLUTION / 7)
+    s, dim_in = add_stage(
+        model, 'res5', 'pool5', 3, dim_in, 2048, dim_bottleneck * 8, 1,
+        stride_init
+    )
+    s = model.AveragePool(s, 'res5_pool', kernel=7)
+    return s, 2048
+
+
+def add_residual_block(
+    model,
+    prefix,
+    blob_in,
+    dim_in,
+    dim_out,
+    dim_inner,
+    dilation,
+    stride_init=2,
+    inplace_sum=False
+):
+    """Add a residual block to the model."""
+    # prefix = res<stage>_<sub_stage>, e.g., res2_3
+
+    # Max pooling is performed prior to the first stage (which is uniquely
+    # distinguished by dim_in = 64), thus we keep stride = 1 for the first stage
+    stride = stride_init if (
+        dim_in != dim_out and dim_in != 64 and dilation == 1
+    ) else 1
+
+    # transformation blob
+    tr = globals()[cfg.RESNETS.TRANS_FUNC](
+        model,
+        blob_in,
+        dim_in,
+        dim_out,
+        stride,
+        prefix,
+        dim_inner,
+        group=cfg.RESNETS.NUM_GROUPS,
+        dilation=dilation
+    )
+
+    # sum -> ReLU
+    # shortcut function: by default using bn; support gn
+    add_shortcut = globals()[cfg.RESNETS.SHORTCUT_FUNC]
+    sc = add_shortcut(model, prefix, blob_in, dim_in, dim_out, stride)
+    if inplace_sum:
+        s = model.net.Sum([tr, sc], tr)
+    else:
+        s = model.net.Sum([tr, sc], prefix + '_sum')
+
+    return model.Relu(s, s)
+
+
+# ------------------------------------------------------------------------------
+# various shortcuts (may expand and may consider a new helper)
+# ------------------------------------------------------------------------------
+
+
+def basic_bn_shortcut(model, prefix, blob_in, dim_in, dim_out, stride):
+    """ For a pre-trained network that used BN. An AffineChannel op replaces BN
+    during fine-tuning.
+    """
+
+    if dim_in == dim_out:
+        return blob_in
+
+    c = model.Conv(
+        blob_in,
+        prefix + '_branch1',
+        dim_in,
+        dim_out,
+        kernel=1,
+        stride=stride,
+        no_bias=1
+    )
+    return model.AffineChannel(c, prefix + '_branch1_bn', dim=dim_out)
+
+
+def basic_gn_shortcut(model, prefix, blob_in, dim_in, dim_out, stride):
+    if dim_in == dim_out:
+        return blob_in
+
+    # output name is prefix + '_branch1_gn'
+    return model.ConvGN(
+        blob_in,
+        prefix + '_branch1',
+        dim_in,
+        dim_out,
+        kernel=1,
+        group_gn=get_group_gn(dim_out),
+        stride=stride,
+        pad=0,
+        group=1,
+    )
+
+
+# ------------------------------------------------------------------------------
+# various stems (may expand and may consider a new helper)
+# ------------------------------------------------------------------------------
+
+
+def basic_bn_stem(model, data, **kwargs):
+    """Add a basic ResNet stem. For a pre-trained network that used BN.
+    An AffineChannel op replaces BN during fine-tuning.
+    """
+
+    dim = 64
+    p = model.Conv(data, 'conv1', 3, dim, 7, pad=3, stride=2, no_bias=1)
+    p = model.AffineChannel(p, 'res_conv1_bn', dim=dim, inplace=True)
+    p = model.Relu(p, p)
+    p = model.MaxPool(p, 'pool1', kernel=3, pad=1, stride=2)
+    return p, dim
+
+
+def basic_gn_stem(model, data, **kwargs):
+    """Add a basic ResNet stem (using GN)"""
+
+    dim = 64
+    p = model.ConvGN(
+        data, 'conv1', 3, dim, 7, group_gn=get_group_gn(dim), pad=3, stride=2
+    )
+    p = model.Relu(p, p)
+    p = model.MaxPool(p, 'pool1', kernel=3, pad=1, stride=2)
+    return p, dim
+
+
+# ------------------------------------------------------------------------------
+# various transformations (may expand and may consider a new helper)
+# ------------------------------------------------------------------------------
+
+
+def bottleneck_transformation(
+    model,
+    blob_in,
+    dim_in,
+    dim_out,
+    stride,
+    prefix,
+    dim_inner,
+    dilation=1,
+    group=1
+):
+    """Add a bottleneck transformation to the model."""
+    # In original resnet, stride=2 is on 1x1.
+    # In fb.torch resnet, stride=2 is on 3x3.
+    (str1x1, str3x3) = (stride, 1) if cfg.RESNETS.STRIDE_1X1 else (1, stride)
+
+    # conv 1x1 -> BN -> ReLU
+    cur = model.ConvAffine(
+        blob_in,
+        prefix + '_branch2a',
+        dim_in,
+        dim_inner,
+        kernel=1,
+        stride=str1x1,
+        pad=0,
+        inplace=True
+    )
+    cur = model.Relu(cur, cur)
+
+    # conv 3x3 -> BN -> ReLU
+    cur = model.ConvAffine(
+        cur,
+        prefix + '_branch2b',
+        dim_inner,
+        dim_inner,
+        kernel=3,
+        stride=str3x3,
+        pad=1 * dilation,
+        dilation=dilation,
+        group=group,
+        inplace=True
+    )
+    cur = model.Relu(cur, cur)
+
+    # conv 1x1 -> BN (no ReLU)
+    # NB: for now this AffineChannel op cannot be in-place due to a bug in C2
+    # gradient computation for graphs like this
+    cur = model.ConvAffine(
+        cur,
+        prefix + '_branch2c',
+        dim_inner,
+        dim_out,
+        kernel=1,
+        stride=1,
+        pad=0,
+        inplace=False
+    )
+    return cur
+
+
+def bottleneck_gn_transformation(
+    model,
+    blob_in,
+    dim_in,
+    dim_out,
+    stride,
+    prefix,
+    dim_inner,
+    dilation=1,
+    group=1
+):
+    """Add a bottleneck transformation with GroupNorm to the model."""
+    # In original resnet, stride=2 is on 1x1.
+    # In fb.torch resnet, stride=2 is on 3x3.
+    (str1x1, str3x3) = (stride, 1) if cfg.RESNETS.STRIDE_1X1 else (1, stride)
+
+    # conv 1x1 -> GN -> ReLU
+    cur = model.ConvGN(
+        blob_in,
+        prefix + '_branch2a',
+        dim_in,
+        dim_inner,
+        kernel=1,
+        group_gn=get_group_gn(dim_inner),
+        stride=str1x1,
+        pad=0,
+    )
+    cur = model.Relu(cur, cur)
+
+    # conv 3x3 -> GN -> ReLU
+    cur = model.ConvGN(
+        cur,
+        prefix + '_branch2b',
+        dim_inner,
+        dim_inner,
+        kernel=3,
+        group_gn=get_group_gn(dim_inner),
+        stride=str3x3,
+        pad=1 * dilation,
+        dilation=dilation,
+        group=group,
+    )
+    cur = model.Relu(cur, cur)
+
+    # conv 1x1 -> GN (no ReLU)
+    cur = model.ConvGN(
+        cur,
+        prefix + '_branch2c',
+        dim_inner,
+        dim_out,
+        kernel=1,
+        group_gn=get_group_gn(dim_out),
+        stride=1,
+        pad=0,
+    )
+    return cur
diff --git a/detectron/modeling/VGG16.py b/detectron/modeling/VGG16.py
new file mode 100644
index 0000000000000000000000000000000000000000..36454cd6be43631ad232d2b41bbe434dd70bd99d
--- /dev/null
+++ b/detectron/modeling/VGG16.py
@@ -0,0 +1,75 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""VGG16 from https://arxiv.org/abs/1409.1556."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from detectron.core.config import cfg
+
+
+def add_VGG16_conv5_body(model):
+    model.Conv('data', 'conv1_1', 3, 64, 3, pad=1, stride=1)
+    model.Relu('conv1_1', 'conv1_1')
+    model.Conv('conv1_1', 'conv1_2', 64, 64, 3, pad=1, stride=1)
+    model.Relu('conv1_2', 'conv1_2')
+    model.MaxPool('conv1_2', 'pool1', kernel=2, pad=0, stride=2)
+    model.Conv('pool1', 'conv2_1', 64, 128, 3, pad=1, stride=1)
+    model.Relu('conv2_1', 'conv2_1')
+    model.Conv('conv2_1', 'conv2_2', 128, 128, 3, pad=1, stride=1)
+    model.Relu('conv2_2', 'conv2_2')
+    model.MaxPool('conv2_2', 'pool2', kernel=2, pad=0, stride=2)
+    model.StopGradient('pool2', 'pool2')
+    model.Conv('pool2', 'conv3_1', 128, 256, 3, pad=1, stride=1)
+    model.Relu('conv3_1', 'conv3_1')
+    model.Conv('conv3_1', 'conv3_2', 256, 256, 3, pad=1, stride=1)
+    model.Relu('conv3_2', 'conv3_2')
+    model.Conv('conv3_2', 'conv3_3', 256, 256, 3, pad=1, stride=1)
+    model.Relu('conv3_3', 'conv3_3')
+    model.MaxPool('conv3_3', 'pool3', kernel=2, pad=0, stride=2)
+    model.Conv('pool3', 'conv4_1', 256, 512, 3, pad=1, stride=1)
+    model.Relu('conv4_1', 'conv4_1')
+    model.Conv('conv4_1', 'conv4_2', 512, 512, 3, pad=1, stride=1)
+    model.Relu('conv4_2', 'conv4_2')
+    model.Conv('conv4_2', 'conv4_3', 512, 512, 3, pad=1, stride=1)
+    model.Relu('conv4_3', 'conv4_3')
+    model.MaxPool('conv4_3', 'pool4', kernel=2, pad=0, stride=2)
+    model.Conv('pool4', 'conv5_1', 512, 512, 3, pad=1, stride=1)
+    model.Relu('conv5_1', 'conv5_1')
+    model.Conv('conv5_1', 'conv5_2', 512, 512, 3, pad=1, stride=1)
+    model.Relu('conv5_2', 'conv5_2')
+    model.Conv('conv5_2', 'conv5_3', 512, 512, 3, pad=1, stride=1)
+    blob_out = model.Relu('conv5_3', 'conv5_3')
+    return blob_out, 512, 1. / 16.
+
+
+def add_VGG16_roi_fc_head(model, blob_in, dim_in, spatial_scale):
+    model.RoIFeatureTransform(
+        blob_in,
+        'pool5',
+        blob_rois='rois',
+        method=cfg.FAST_RCNN.ROI_XFORM_METHOD,
+        resolution=7,
+        sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO,
+        spatial_scale=spatial_scale
+    )
+    model.FC('pool5', 'fc6', dim_in * 7 * 7, 4096)
+    model.Relu('fc6', 'fc6')
+    model.FC('fc6', 'fc7', 4096, 4096)
+    blob_out = model.Relu('fc7', 'fc7')
+    return blob_out, 4096
diff --git a/detectron/modeling/VGG_CNN_M_1024.py b/detectron/modeling/VGG_CNN_M_1024.py
new file mode 100644
index 0000000000000000000000000000000000000000..0dc9a9eed01e1c185b618468cf0c1b00ba019c8a
--- /dev/null
+++ b/detectron/modeling/VGG_CNN_M_1024.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""VGG_CNN_M_1024 from https://arxiv.org/abs/1405.3531."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from detectron.core.config import cfg
+
+
+def add_VGG_CNN_M_1024_conv5_body(model):
+    model.Conv('data', 'conv1', 3, 96, 7, pad=0, stride=2)
+    model.Relu('conv1', 'conv1')
+    model.LRN('conv1', 'norm1', size=5, alpha=0.0005, beta=0.75, bias=2.)
+    model.MaxPool('norm1', 'pool1', kernel=3, pad=0, stride=2)
+    model.StopGradient('pool1', 'pool1')
+    # No updates at conv1 and below (norm1 and pool1 have no params,
+    # so we can stop gradients before them, too)
+    model.Conv('pool1', 'conv2', 96, 256, 5, pad=0, stride=2)
+    model.Relu('conv2', 'conv2')
+    model.LRN('conv2', 'norm2', size=5, alpha=0.0005, beta=0.75, bias=2.)
+    model.MaxPool('norm2', 'pool2', kernel=3, pad=0, stride=2)
+    model.Conv('pool2', 'conv3', 256, 512, 3, pad=1, stride=1)
+    model.Relu('conv3', 'conv3')
+    model.Conv('conv3', 'conv4', 512, 512, 3, pad=1, stride=1)
+    model.Relu('conv4', 'conv4')
+    model.Conv('conv4', 'conv5', 512, 512, 3, pad=1, stride=1)
+    blob_out = model.Relu('conv5', 'conv5')
+    return blob_out, 512, 1. / 16.
+
+
+def add_VGG_CNN_M_1024_roi_fc_head(model, blob_in, dim_in, spatial_scale):
+    model.RoIFeatureTransform(
+        blob_in,
+        'pool5',
+        blob_rois='rois',
+        method=cfg.FAST_RCNN.ROI_XFORM_METHOD,
+        resolution=6,
+        sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO,
+        spatial_scale=spatial_scale
+    )
+    model.FC('pool5', 'fc6', dim_in * 6 * 6, 4096)
+    model.Relu('fc6', 'fc6')
+    model.FC('fc6', 'fc7', 4096, 1024)
+    blob_out = model.Relu('fc7', 'fc7')
+    return blob_out, 1024
diff --git a/detectron/modeling/__init__.py b/detectron/modeling/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..01e173336f119a232400cecd0de43cee5656f8ea
--- /dev/null
+++ b/detectron/modeling/__init__.py
@@ -0,0 +1,14 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
diff --git a/detectron/modeling/detector.py b/detectron/modeling/detector.py
new file mode 100644
index 0000000000000000000000000000000000000000..ce4e058f3b49f56c2a356d31f750510ccb39d8f6
--- /dev/null
+++ b/detectron/modeling/detector.py
@@ -0,0 +1,572 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Defines DetectionModelHelper, the class that represents a Detectron model."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import numpy as np
+import logging
+
+from caffe2.python import cnn
+from caffe2.python import core
+from caffe2.python import workspace
+from caffe2.python.modeling import initializers
+from caffe2.python.modeling.parameter_info import ParameterTags
+
+from detectron.core.config import cfg
+from detectron.ops.collect_and_distribute_fpn_rpn_proposals \
+    import CollectAndDistributeFpnRpnProposalsOp
+from detectron.ops.generate_proposal_labels import GenerateProposalLabelsOp
+from detectron.ops.generate_proposals import GenerateProposalsOp
+import detectron.roi_data.fast_rcnn as fast_rcnn_roi_data
+import detectron.utils.c2 as c2_utils
+
+logger = logging.getLogger(__name__)
+
+
+class DetectionModelHelper(cnn.CNNModelHelper):
+    def __init__(self, **kwargs):
+        # Handle args specific to the DetectionModelHelper, others pass through
+        # to CNNModelHelper
+        self.train = kwargs.get('train', False)
+        self.num_classes = kwargs.get('num_classes', -1)
+        assert self.num_classes > 0, 'num_classes must be > 0'
+        for k in ('train', 'num_classes'):
+            if k in kwargs:
+                del kwargs[k]
+        kwargs['order'] = 'NCHW'
+        # Defensively set cudnn_exhaustive_search to False in case the default
+        # changes in CNNModelHelper. The detection code uses variable size
+        # inputs that might not play nicely with cudnn_exhaustive_search.
+        kwargs['cudnn_exhaustive_search'] = False
+        super(DetectionModelHelper, self).__init__(**kwargs)
+        self.roi_data_loader = None
+        self.losses = []
+        self.metrics = []
+        self.do_not_update_params = []  # Param on this list are not updated
+        self.net.Proto().type = cfg.MODEL.EXECUTION_TYPE
+        self.net.Proto().num_workers = cfg.NUM_GPUS * 4
+        self.prev_use_cudnn = self.use_cudnn
+        self.gn_params = []  # Param on this list are GroupNorm parameters
+
+    def TrainableParams(self, gpu_id=-1):
+        """Get the blob names for all trainable parameters, possibly filtered by
+        GPU id.
+        """
+        return [
+            p for p in self.params
+            if (
+                p in self.param_to_grad and   # p has a gradient
+                p not in self.do_not_update_params and  # not on the blacklist
+                (gpu_id == -1 or  # filter for gpu assignment, if gpu_id set
+                 str(p).find('gpu_{}'.format(gpu_id)) == 0)
+            )]
+
+    def AffineChannel(self, blob_in, blob_out, dim, inplace=False):
+        """Affine transformation to replace BN in networks where BN cannot be
+        used (e.g., because the minibatch size is too small).
+
+        The operations can be done in place to save memory.
+        """
+        blob_out = blob_out or self.net.NextName()
+        param_prefix = blob_out
+
+        scale = self.create_param(
+            param_name=param_prefix + '_s',
+            initializer=initializers.Initializer("ConstantFill", value=1.),
+            tags=ParameterTags.WEIGHT,
+            shape=[dim, ],
+        )
+        bias = self.create_param(
+            param_name=param_prefix + '_b',
+            initializer=initializers.Initializer("ConstantFill", value=0.),
+            tags=ParameterTags.BIAS,
+            shape=[dim, ],
+        )
+        if inplace:
+            return self.net.AffineChannel([blob_in, scale, bias], blob_in)
+        else:
+            return self.net.AffineChannel([blob_in, scale, bias], blob_out)
+
+    def GenerateProposals(self, blobs_in, blobs_out, anchors, spatial_scale):
+        """Op for generating RPN porposals.
+
+        blobs_in:
+          - 'rpn_cls_probs': 4D tensor of shape (N, A, H, W), where N is the
+            number of minibatch images, A is the number of anchors per
+            locations, and (H, W) is the spatial size of the prediction grid.
+            Each value represents a "probability of object" rating in [0, 1].
+          - 'rpn_bbox_pred': 4D tensor of shape (N, 4 * A, H, W) of predicted
+            deltas for transformation anchor boxes into RPN proposals.
+          - 'im_info': 2D tensor of shape (N, 3) where the three columns encode
+            the input image's [height, width, scale]. Height and width are
+            for the input to the network, not the original image; scale is the
+            scale factor used to scale the original image to the network input
+            size.
+
+        blobs_out:
+          - 'rpn_rois': 2D tensor of shape (R, 5), for R RPN proposals where the
+            five columns encode [batch ind, x1, y1, x2, y2]. The boxes are
+            w.r.t. the network input, which is a *scaled* version of the
+            original image; these proposals must be scaled by 1 / scale (where
+            scale comes from im_info; see above) to transform it back to the
+            original input image coordinate system.
+          - 'rpn_roi_probs': 1D tensor of objectness probability scores
+            (extracted from rpn_cls_probs; see above).
+        """
+        cfg_key = 'TRAIN' if self.train else 'TEST'
+
+        if cfg[cfg_key].GENERATE_PROPOSALS_ON_GPU:
+            rpn_pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
+            rpn_post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
+            rpn_nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
+            rpn_min_size = float(cfg[cfg_key].RPN_MIN_SIZE)
+
+            input_name = str(blobs_in[0])
+            lvl = int(input_name[-1]) if input_name[-1].isdigit() else None
+            anchors_name = 'anchors{}'.format(lvl) if lvl else 'anchors'
+
+            for i in range(cfg.NUM_GPUS):
+                with c2_utils.CudaScope(i):
+                    workspace.FeedBlob(
+                        'gpu_{}/{}'.format(i, anchors_name),
+                        anchors.astype(np.float32))
+
+            self.net.GenerateProposals(
+                blobs_in + [anchors_name],
+                blobs_out,
+                spatial_scale=spatial_scale,
+                pre_nms_topN=rpn_pre_nms_topN,
+                post_nms_topN=rpn_post_nms_topN,
+                nms_thresh=rpn_nms_thresh,
+                min_size=rpn_min_size,
+            )
+        else:
+            name = 'GenerateProposalsOp:' + ','.join([str(b) for b in blobs_in])
+            # spatial_scale passed to the Python op is only used in
+            # convert_pkl_to_pb
+            self.net.Python(
+                GenerateProposalsOp(anchors, spatial_scale, self.train).forward
+            )(blobs_in, blobs_out, name=name, spatial_scale=spatial_scale)
+
+        return blobs_out
+
+    def GenerateProposalLabels(self, blobs_in):
+        """Op for generating training labels for RPN proposals. This is used
+        when training RPN jointly with Fast/Mask R-CNN (as in end-to-end
+        Faster R-CNN training).
+
+        blobs_in:
+          - 'rpn_rois': 2D tensor of RPN proposals output by GenerateProposals
+          - 'roidb': roidb entries that will be labeled
+          - 'im_info': See GenerateProposals doc.
+
+        blobs_out:
+          - (variable set of blobs): returns whatever blobs are required for
+            training the model. It does this by querying the data loader for
+            the list of blobs that are needed.
+        """
+        name = 'GenerateProposalLabelsOp:' + ','.join(
+            [str(b) for b in blobs_in]
+        )
+
+        # The list of blobs is not known before run-time because it depends on
+        # the specific model being trained. Query the data loader to get the
+        # list of output blob names.
+        blobs_out = fast_rcnn_roi_data.get_fast_rcnn_blob_names(
+            is_training=self.train
+        )
+        blobs_out = [core.ScopedBlobReference(b) for b in blobs_out]
+
+        self.net.Python(GenerateProposalLabelsOp().forward)(
+            blobs_in, blobs_out, name=name
+        )
+        return blobs_out
+
+    def CollectAndDistributeFpnRpnProposals(self):
+        """Merge RPN proposals generated at multiple FPN levels and then
+        distribute those proposals to their appropriate FPN levels. An anchor
+        at one FPN level may predict an RoI that will map to another level,
+        hence the need to redistribute the proposals.
+
+        This function assumes standard blob names for input and output blobs.
+
+        Input blobs: [rpn_rois_fpn<min>, ..., rpn_rois_fpn<max>,
+                      rpn_roi_probs_fpn<min>, ..., rpn_roi_probs_fpn<max>]
+          - rpn_rois_fpn<i> are the RPN proposals for FPN level i; see rpn_rois
+            documentation from GenerateProposals.
+          - rpn_roi_probs_fpn<i> are the RPN objectness probabilities for FPN
+            level i; see rpn_roi_probs documentation from GenerateProposals.
+
+        If used during training, then the input blobs will also include:
+          [roidb, im_info] (see GenerateProposalLabels).
+
+        Output blobs: [rois_fpn<min>, ..., rois_rpn<max>, rois,
+                       rois_idx_restore]
+          - rois_fpn<i> are the RPN proposals for FPN level i
+          - rois_idx_restore is a permutation on the concatenation of all
+            rois_fpn<i>, i=min...max, such that when applied the RPN RoIs are
+            restored to their original order in the input blobs.
+
+        If used during training, then the output blobs will also include:
+          [labels, bbox_targets, bbox_inside_weights, bbox_outside_weights].
+        """
+        k_max = cfg.FPN.RPN_MAX_LEVEL
+        k_min = cfg.FPN.RPN_MIN_LEVEL
+
+        # Prepare input blobs
+        rois_names = ['rpn_rois_fpn' + str(l) for l in range(k_min, k_max + 1)]
+        score_names = [
+            'rpn_roi_probs_fpn' + str(l) for l in range(k_min, k_max + 1)
+        ]
+        blobs_in = rois_names + score_names
+        if self.train:
+            blobs_in += ['roidb', 'im_info']
+        blobs_in = [core.ScopedBlobReference(b) for b in blobs_in]
+        name = 'CollectAndDistributeFpnRpnProposalsOp:' + ','.join(
+            [str(b) for b in blobs_in]
+        )
+
+        # Prepare output blobs
+        blobs_out = fast_rcnn_roi_data.get_fast_rcnn_blob_names(
+            is_training=self.train
+        )
+        blobs_out = [core.ScopedBlobReference(b) for b in blobs_out]
+
+        outputs = self.net.Python(
+            CollectAndDistributeFpnRpnProposalsOp(self.train).forward
+        )(blobs_in, blobs_out, name=name)
+
+        return outputs
+
+    def DropoutIfTraining(self, blob_in, dropout_rate):
+        """Add dropout to blob_in if the model is in training mode and
+        dropout_rate is > 0."""
+        blob_out = blob_in
+        if self.train and dropout_rate > 0:
+            blob_out = self.Dropout(
+                blob_in, blob_in, ratio=dropout_rate, is_test=False
+            )
+        return blob_out
+
+    def RoIFeatureTransform(
+        self,
+        blobs_in,
+        blob_out,
+        blob_rois='rois',
+        method='RoIPoolF',
+        resolution=7,
+        spatial_scale=1. / 16.,
+        sampling_ratio=0
+    ):
+        """Add the specified RoI pooling method. The sampling_ratio argument
+        is supported for some, but not all, RoI transform methods.
+
+        RoIFeatureTransform abstracts away:
+          - Use of FPN or not
+          - Specifics of the transform method
+        """
+        assert method in {'RoIPoolF', 'RoIAlign'}, \
+            'Unknown pooling method: {}'.format(method)
+        has_argmax = (method == 'RoIPoolF')
+        if isinstance(blobs_in, list):
+            # FPN case: add RoIFeatureTransform to each FPN level
+            k_max = cfg.FPN.ROI_MAX_LEVEL  # coarsest level of pyramid
+            k_min = cfg.FPN.ROI_MIN_LEVEL  # finest level of pyramid
+            assert len(blobs_in) == k_max - k_min + 1
+            bl_out_list = []
+            for lvl in range(k_min, k_max + 1):
+                bl_in = blobs_in[k_max - lvl]  # blobs_in is in reversed order
+                sc = spatial_scale[k_max - lvl]  # in reversed order
+                bl_rois = blob_rois + '_fpn' + str(lvl)
+                bl_out = blob_out + '_fpn' + str(lvl)
+                bl_out_list.append(bl_out)
+                bl_argmax = ['_argmax_' + bl_out] if has_argmax else []
+                self.net.__getattr__(method)(
+                    [bl_in, bl_rois], [bl_out] + bl_argmax,
+                    pooled_w=resolution,
+                    pooled_h=resolution,
+                    spatial_scale=sc,
+                    sampling_ratio=sampling_ratio
+                )
+            # The pooled features from all levels are concatenated along the
+            # batch dimension into a single 4D tensor.
+            xform_shuffled, _ = self.net.Concat(
+                bl_out_list, [blob_out + '_shuffled', '_concat_' + blob_out],
+                axis=0
+            )
+            # Unshuffle to match rois from dataloader
+            restore_bl = blob_rois + '_idx_restore_int32'
+            xform_out = self.net.BatchPermutation(
+                [xform_shuffled, restore_bl], blob_out
+            )
+        else:
+            # Single feature level
+            bl_argmax = ['_argmax_' + blob_out] if has_argmax else []
+            # sampling_ratio is ignored for RoIPoolF
+            xform_out = self.net.__getattr__(method)(
+                [blobs_in, blob_rois], [blob_out] + bl_argmax,
+                pooled_w=resolution,
+                pooled_h=resolution,
+                spatial_scale=spatial_scale,
+                sampling_ratio=sampling_ratio
+            )
+        # Only return the first blob (the transformed features)
+        return xform_out[0] if isinstance(xform_out, tuple) else xform_out
+
+    def ConvShared(
+        self,
+        blob_in,
+        blob_out,
+        dim_in,
+        dim_out,
+        kernel,
+        weight=None,
+        bias=None,
+        **kwargs
+    ):
+        """Add conv op that shares weights and/or biases with another conv op.
+        """
+        use_bias = (
+            False if ('no_bias' in kwargs and kwargs['no_bias']) else True
+        )
+
+        if self.use_cudnn:
+            kwargs['engine'] = 'CUDNN'
+            kwargs['exhaustive_search'] = self.cudnn_exhaustive_search
+            if self.ws_nbytes_limit:
+                kwargs['ws_nbytes_limit'] = self.ws_nbytes_limit
+
+        if use_bias:
+            blobs_in = [blob_in, weight, bias]
+        else:
+            blobs_in = [blob_in, weight]
+
+        if 'no_bias' in kwargs:
+            del kwargs['no_bias']
+
+        return self.net.Conv(
+            blobs_in, blob_out, kernel=kernel, order=self.order, **kwargs
+        )
+
+    def BilinearInterpolation(
+        self, blob_in, blob_out, dim_in, dim_out, up_scale
+    ):
+        """Bilinear interpolation in space of scale.
+
+        Takes input of NxKxHxW and outputs NxKx(sH)x(sW), where s:= up_scale
+
+        Adapted from the CVPR'15 FCN code.
+        See: https://github.com/shelhamer/fcn.berkeleyvision.org/blob/master/surgery.py
+        """
+        assert dim_in == dim_out
+        assert up_scale % 2 == 0, 'Scale should be even'
+
+        def upsample_filt(size):
+            factor = (size + 1) // 2
+            if size % 2 == 1:
+                center = factor - 1
+            else:
+                center = factor - 0.5
+            og = np.ogrid[:size, :size]
+            return ((1 - abs(og[0] - center) / factor) *
+                    (1 - abs(og[1] - center) / factor))
+
+        kernel_size = up_scale * 2
+        bil_filt = upsample_filt(kernel_size)
+
+        kernel = np.zeros(
+            (dim_in, dim_out, kernel_size, kernel_size), dtype=np.float32
+        )
+        kernel[range(dim_out), range(dim_in), :, :] = bil_filt
+
+        blob = self.ConvTranspose(
+            blob_in,
+            blob_out,
+            dim_in,
+            dim_out,
+            kernel_size,
+            stride=int(up_scale),
+            pad=int(up_scale / 2),
+            weight_init=('GivenTensorFill', {'values': kernel}),
+            bias_init=('ConstantFill', {'value': 0.})
+        )
+        self.do_not_update_params.append(self.weights[-1])
+        self.do_not_update_params.append(self.biases[-1])
+        return blob
+
+    def ConvAffine(  # args in the same order of Conv()
+        self, blob_in, prefix, dim_in, dim_out, kernel, stride, pad,
+        group=1, dilation=1,
+        weight_init=None,
+        bias_init=None,
+        suffix='_bn',
+        inplace=False
+    ):
+        """ConvAffine adds a Conv op followed by a AffineChannel op (which
+        replaces BN during fine tuning).
+        """
+        conv_blob = self.Conv(
+            blob_in,
+            prefix,
+            dim_in,
+            dim_out,
+            kernel,
+            stride=stride,
+            pad=pad,
+            group=group,
+            dilation=dilation,
+            weight_init=weight_init,
+            bias_init=bias_init,
+            no_bias=1
+        )
+        blob_out = self.AffineChannel(
+            conv_blob, prefix + suffix, dim=dim_out, inplace=inplace
+        )
+        return blob_out
+
+    def ConvGN(  # args in the same order of Conv()
+        self, blob_in, prefix, dim_in, dim_out, kernel, stride, pad,
+        group_gn,  # num of groups in gn
+        group=1, dilation=1,
+        weight_init=None,
+        bias_init=None,
+        suffix='_gn',
+        no_conv_bias=1,
+    ):
+        """ConvGN adds a Conv op followed by a GroupNorm op,
+        including learnable scale/bias (gamma/beta)
+        """
+        conv_blob = self.Conv(
+            blob_in,
+            prefix,
+            dim_in,
+            dim_out,
+            kernel,
+            stride=stride,
+            pad=pad,
+            group=group,
+            dilation=dilation,
+            weight_init=weight_init,
+            bias_init=bias_init,
+            no_bias=no_conv_bias)
+
+        if group_gn < 1:
+            logger.warning(
+                'Layer: {} (dim {}): '
+                'group_gn < 1; reset to 1.'.format(prefix, dim_in)
+            )
+            group_gn = 1
+
+        blob_out = self.SpatialGN(
+            conv_blob, prefix + suffix,
+            dim_out, group=group_gn,  # op's arg name is "group"
+            epsilon=cfg.GROUP_NORM.EPSILON,)
+
+        self.gn_params.append(self.params[-1])  # add gn's bias to list
+        self.gn_params.append(self.params[-2])  # add gn's scale to list
+        return blob_out
+
+    def DisableCudnn(self):
+        self.prev_use_cudnn = self.use_cudnn
+        self.use_cudnn = False
+
+    def RestorePreviousUseCudnn(self):
+        prev_use_cudnn = self.use_cudnn
+        self.use_cudnn = self.prev_use_cudnn
+        self.prev_use_cudnn = prev_use_cudnn
+
+    def UpdateWorkspaceLr(self, cur_iter, new_lr):
+        """Updates the model's current learning rate and the workspace (learning
+        rate and update history/momentum blobs).
+        """
+        # The workspace is the one source of truth for the lr
+        # The lr is always the same on all GPUs
+        cur_lr = workspace.FetchBlob('gpu_0/lr')[0]
+        # There are no type conversions between the lr in Python and the lr in
+        # the GPU (both are float32), so exact comparision is ok
+        if cur_lr != new_lr:
+            ratio = _get_lr_change_ratio(cur_lr, new_lr)
+            if ratio > cfg.SOLVER.LOG_LR_CHANGE_THRESHOLD:
+                logger.info(
+                    'Changing learning rate {:.6f} -> {:.6f} at iter {:d}'.
+                    format(cur_lr, new_lr, cur_iter))
+            self._SetNewLr(cur_lr, new_lr)
+        return new_lr
+
+    def _SetNewLr(self, cur_lr, new_lr):
+        """Do the actual work of updating the model and workspace blobs.
+        """
+        for i in range(cfg.NUM_GPUS):
+            with c2_utils.CudaScope(i):
+                workspace.FeedBlob(
+                    'gpu_{}/lr'.format(i), np.array([new_lr], dtype=np.float32))
+        ratio = _get_lr_change_ratio(cur_lr, new_lr)
+        if cfg.SOLVER.SCALE_MOMENTUM and cur_lr > 1e-7 and \
+                ratio > cfg.SOLVER.SCALE_MOMENTUM_THRESHOLD:
+            self._CorrectMomentum(new_lr / cur_lr)
+
+    def _CorrectMomentum(self, correction):
+        """The MomentumSGDUpdate op implements the update V as
+
+            V := mu * V + lr * grad,
+
+        where mu is the momentum factor, lr is the learning rate, and grad is
+        the stochastic gradient. Since V is not defined independently of the
+        learning rate (as it should ideally be), when the learning rate is
+        changed we should scale the update history V in order to make it
+        compatible in scale with lr * grad.
+        """
+        logger.info(
+            'Scaling update history by {:.6f} (new lr / old lr)'.
+            format(correction))
+        for i in range(cfg.NUM_GPUS):
+            with c2_utils.CudaScope(i):
+                for param in self.TrainableParams(gpu_id=i):
+                    op = core.CreateOperator(
+                        'Scale', [param + '_momentum'], [param + '_momentum'],
+                        scale=correction)
+                    workspace.RunOperatorOnce(op)
+
+    def GetLossScale(self):
+        """Allow a way to configure the loss scale dynamically.
+
+        This may be used in a distributed data parallel setting.
+        """
+        return 1.0 / cfg.NUM_GPUS
+
+    def AddLosses(self, losses):
+        if not isinstance(losses, list):
+            losses = [losses]
+        # Conversion to str allows losses to include BlobReferences
+        losses = [c2_utils.UnscopeName(str(l)) for l in losses]
+        self.losses = list(set(self.losses + losses))
+
+    def AddMetrics(self, metrics):
+        if not isinstance(metrics, list):
+            metrics = [metrics]
+        self.metrics = list(set(self.metrics + metrics))
+
+
+def _get_lr_change_ratio(cur_lr, new_lr):
+    eps = 1e-10
+    ratio = np.max(
+        (new_lr / np.max((cur_lr, eps)), cur_lr / np.max((new_lr, eps)))
+    )
+    return ratio
diff --git a/detectron/modeling/fast_rcnn_heads.py b/detectron/modeling/fast_rcnn_heads.py
new file mode 100644
index 0000000000000000000000000000000000000000..eb3a534e8c4922570c5f314999f6d120ecd9213a
--- /dev/null
+++ b/detectron/modeling/fast_rcnn_heads.py
@@ -0,0 +1,178 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Various network "heads" for classification and bounding box prediction.
+
+The design is as follows:
+
+... -> RoI ----\                               /-> box cls output -> cls loss
+                -> RoIFeatureXform -> box head
+... -> Feature /                               \-> box reg output -> reg loss
+       Map
+
+The Fast R-CNN head produces a feature representation of the RoI for the purpose
+of bounding box classification and regression. The box output module converts
+the feature representation into classification and regression predictions.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from detectron.core.config import cfg
+from detectron.utils.c2 import const_fill
+from detectron.utils.c2 import gauss_fill
+from detectron.utils.net import get_group_gn
+import detectron.utils.blob as blob_utils
+
+
+# ---------------------------------------------------------------------------- #
+# Fast R-CNN outputs and losses
+# ---------------------------------------------------------------------------- #
+
+def add_fast_rcnn_outputs(model, blob_in, dim):
+    """Add RoI classification and bounding box regression output ops."""
+    # Box classification layer
+    model.FC(
+        blob_in,
+        'cls_score',
+        dim,
+        model.num_classes,
+        weight_init=gauss_fill(0.01),
+        bias_init=const_fill(0.0)
+    )
+    if not model.train:  # == if test
+        # Only add softmax when testing; during training the softmax is combined
+        # with the label cross entropy loss for numerical stability
+        model.Softmax('cls_score', 'cls_prob', engine='CUDNN')
+    # Box regression layer
+    num_bbox_reg_classes = (
+        2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes
+    )
+    model.FC(
+        blob_in,
+        'bbox_pred',
+        dim,
+        num_bbox_reg_classes * 4,
+        weight_init=gauss_fill(0.001),
+        bias_init=const_fill(0.0)
+    )
+
+
+def add_fast_rcnn_losses(model):
+    """Add losses for RoI classification and bounding box regression."""
+    cls_prob, loss_cls = model.net.SoftmaxWithLoss(
+        ['cls_score', 'labels_int32'], ['cls_prob', 'loss_cls'],
+        scale=model.GetLossScale()
+    )
+    loss_bbox = model.net.SmoothL1Loss(
+        [
+            'bbox_pred', 'bbox_targets', 'bbox_inside_weights',
+            'bbox_outside_weights'
+        ],
+        'loss_bbox',
+        scale=model.GetLossScale()
+    )
+    loss_gradients = blob_utils.get_loss_gradients(model, [loss_cls, loss_bbox])
+    model.Accuracy(['cls_prob', 'labels_int32'], 'accuracy_cls')
+    model.AddLosses(['loss_cls', 'loss_bbox'])
+    model.AddMetrics('accuracy_cls')
+    return loss_gradients
+
+
+# ---------------------------------------------------------------------------- #
+# Box heads
+# ---------------------------------------------------------------------------- #
+
+def add_roi_2mlp_head(model, blob_in, dim_in, spatial_scale):
+    """Add a ReLU MLP with two hidden layers."""
+    hidden_dim = cfg.FAST_RCNN.MLP_HEAD_DIM
+    roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION
+    roi_feat = model.RoIFeatureTransform(
+        blob_in,
+        'roi_feat',
+        blob_rois='rois',
+        method=cfg.FAST_RCNN.ROI_XFORM_METHOD,
+        resolution=roi_size,
+        sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO,
+        spatial_scale=spatial_scale
+    )
+    model.FC(roi_feat, 'fc6', dim_in * roi_size * roi_size, hidden_dim)
+    model.Relu('fc6', 'fc6')
+    model.FC('fc6', 'fc7', hidden_dim, hidden_dim)
+    model.Relu('fc7', 'fc7')
+    return 'fc7', hidden_dim
+
+
+def add_roi_Xconv1fc_head(model, blob_in, dim_in, spatial_scale):
+    """Add a X conv + 1fc head, as a reference if not using GroupNorm"""
+    hidden_dim = cfg.FAST_RCNN.CONV_HEAD_DIM
+    roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION
+    roi_feat = model.RoIFeatureTransform(
+        blob_in,
+        'roi_feat',
+        blob_rois='rois',
+        method=cfg.FAST_RCNN.ROI_XFORM_METHOD,
+        resolution=roi_size,
+        sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO,
+        spatial_scale=spatial_scale
+    )
+
+    current = roi_feat
+    for i in range(cfg.FAST_RCNN.NUM_STACKED_CONVS):
+        current = model.Conv(
+            current, 'head_conv' + str(i + 1), dim_in, hidden_dim, 3,
+            stride=1, pad=1,
+            weight_init=('MSRAFill', {}),
+            bias_init=('ConstantFill', {'value': 0.}),
+            no_bias=0)
+        current = model.Relu(current, current)
+        dim_in = hidden_dim
+
+    fc_dim = cfg.FAST_RCNN.MLP_HEAD_DIM
+    model.FC(current, 'fc6', dim_in * roi_size * roi_size, fc_dim)
+    model.Relu('fc6', 'fc6')
+    return 'fc6', fc_dim
+
+
+def add_roi_Xconv1fc_gn_head(model, blob_in, dim_in, spatial_scale):
+    """Add a X conv + 1fc head, with GroupNorm"""
+    hidden_dim = cfg.FAST_RCNN.CONV_HEAD_DIM
+    roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION
+    roi_feat = model.RoIFeatureTransform(
+        blob_in, 'roi_feat',
+        blob_rois='rois',
+        method=cfg.FAST_RCNN.ROI_XFORM_METHOD,
+        resolution=roi_size,
+        sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO,
+        spatial_scale=spatial_scale
+    )
+
+    current = roi_feat
+    for i in range(cfg.FAST_RCNN.NUM_STACKED_CONVS):
+        current = model.ConvGN(
+            current, 'head_conv' + str(i + 1), dim_in, hidden_dim, 3,
+            group_gn=get_group_gn(hidden_dim),
+            stride=1, pad=1,
+            weight_init=('MSRAFill', {}),
+            bias_init=('ConstantFill', {'value': 0.}))
+        current = model.Relu(current, current)
+        dim_in = hidden_dim
+
+    fc_dim = cfg.FAST_RCNN.MLP_HEAD_DIM
+    model.FC(current, 'fc6', dim_in * roi_size * roi_size, fc_dim)
+    model.Relu('fc6', 'fc6')
+    return 'fc6', fc_dim
diff --git a/detectron/modeling/generate_anchors.py b/detectron/modeling/generate_anchors.py
new file mode 100644
index 0000000000000000000000000000000000000000..f6b7a1ab63a8cd2da89d59f2a769d2b6900f08aa
--- /dev/null
+++ b/detectron/modeling/generate_anchors.py
@@ -0,0 +1,123 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+#
+# Based on:
+# --------------------------------------------------------
+# Faster R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick and Sean Bell
+# --------------------------------------------------------
+
+import numpy as np
+
+# Verify that we compute the same anchors as Shaoqing's matlab implementation:
+#
+#    >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
+#    >> anchors
+#
+#    anchors =
+#
+#       -83   -39   100    56
+#      -175   -87   192   104
+#      -359  -183   376   200
+#       -55   -55    72    72
+#      -119  -119   136   136
+#      -247  -247   264   264
+#       -35   -79    52    96
+#       -79  -167    96   184
+#      -167  -343   184   360
+
+# array([[ -83.,  -39.,  100.,   56.],
+#        [-175.,  -87.,  192.,  104.],
+#        [-359., -183.,  376.,  200.],
+#        [ -55.,  -55.,   72.,   72.],
+#        [-119., -119.,  136.,  136.],
+#        [-247., -247.,  264.,  264.],
+#        [ -35.,  -79.,   52.,   96.],
+#        [ -79., -167.,   96.,  184.],
+#        [-167., -343.,  184.,  360.]])
+
+
+def generate_anchors(
+    stride=16, sizes=(32, 64, 128, 256, 512), aspect_ratios=(0.5, 1, 2)
+):
+    """Generates a matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
+    are centered on stride / 2, have (approximate) sqrt areas of the specified
+    sizes, and aspect ratios as given.
+    """
+    return _generate_anchors(
+        stride,
+        np.array(sizes, dtype=np.float) / stride,
+        np.array(aspect_ratios, dtype=np.float)
+    )
+
+
+def _generate_anchors(base_size, scales, aspect_ratios):
+    """Generate anchor (reference) windows by enumerating aspect ratios X
+    scales wrt a reference (0, 0, base_size - 1, base_size - 1) window.
+    """
+    anchor = np.array([1, 1, base_size, base_size], dtype=np.float) - 1
+    anchors = _ratio_enum(anchor, aspect_ratios)
+    anchors = np.vstack(
+        [_scale_enum(anchors[i, :], scales) for i in range(anchors.shape[0])]
+    )
+    return anchors
+
+
+def _whctrs(anchor):
+    """Return width, height, x center, and y center for an anchor (window)."""
+    w = anchor[2] - anchor[0] + 1
+    h = anchor[3] - anchor[1] + 1
+    x_ctr = anchor[0] + 0.5 * (w - 1)
+    y_ctr = anchor[1] + 0.5 * (h - 1)
+    return w, h, x_ctr, y_ctr
+
+
+def _mkanchors(ws, hs, x_ctr, y_ctr):
+    """Given a vector of widths (ws) and heights (hs) around a center
+    (x_ctr, y_ctr), output a set of anchors (windows).
+    """
+    ws = ws[:, np.newaxis]
+    hs = hs[:, np.newaxis]
+    anchors = np.hstack(
+        (
+            x_ctr - 0.5 * (ws - 1),
+            y_ctr - 0.5 * (hs - 1),
+            x_ctr + 0.5 * (ws - 1),
+            y_ctr + 0.5 * (hs - 1)
+        )
+    )
+    return anchors
+
+
+def _ratio_enum(anchor, ratios):
+    """Enumerate a set of anchors for each aspect ratio wrt an anchor."""
+    w, h, x_ctr, y_ctr = _whctrs(anchor)
+    size = w * h
+    size_ratios = size / ratios
+    ws = np.round(np.sqrt(size_ratios))
+    hs = np.round(ws * ratios)
+    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
+    return anchors
+
+
+def _scale_enum(anchor, scales):
+    """Enumerate a set of anchors for each scale wrt an anchor."""
+    w, h, x_ctr, y_ctr = _whctrs(anchor)
+    ws = w * scales
+    hs = h * scales
+    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
+    return anchors
diff --git a/detectron/modeling/keypoint_rcnn_heads.py b/detectron/modeling/keypoint_rcnn_heads.py
new file mode 100644
index 0000000000000000000000000000000000000000..edc095d0b00d5ea0d6aad27dc901072cf52a4e42
--- /dev/null
+++ b/detectron/modeling/keypoint_rcnn_heads.py
@@ -0,0 +1,217 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Various network "heads" for predicting keypoints in Mask R-CNN.
+
+The design is as follows:
+
+... -> RoI ----\
+                -> RoIFeatureXform -> keypoint head -> keypoint output -> loss
+... -> Feature /
+       Map
+
+The keypoint head produces a feature representation of the RoI for the purpose
+of keypoint prediction. The keypoint output module converts the feature
+representation into keypoint heatmaps.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from detectron.core.config import cfg
+from detectron.utils.c2 import const_fill
+from detectron.utils.c2 import gauss_fill
+import detectron.modeling.ResNet as ResNet
+import detectron.utils.blob as blob_utils
+
+
+# ---------------------------------------------------------------------------- #
+# Keypoint R-CNN outputs and losses
+# ---------------------------------------------------------------------------- #
+
+def add_keypoint_outputs(model, blob_in, dim):
+    """Add Mask R-CNN keypoint specific outputs: keypoint heatmaps."""
+    # NxKxHxW
+    upsample_heatmap = (cfg.KRCNN.UP_SCALE > 1)
+
+    if cfg.KRCNN.USE_DECONV:
+        # Apply ConvTranspose to the feature representation; results in 2x
+        # upsampling
+        blob_in = model.ConvTranspose(
+            blob_in,
+            'kps_deconv',
+            dim,
+            cfg.KRCNN.DECONV_DIM,
+            kernel=cfg.KRCNN.DECONV_KERNEL,
+            pad=int(cfg.KRCNN.DECONV_KERNEL / 2 - 1),
+            stride=2,
+            weight_init=gauss_fill(0.01),
+            bias_init=const_fill(0.0)
+        )
+        model.Relu('kps_deconv', 'kps_deconv')
+        dim = cfg.KRCNN.DECONV_DIM
+
+    if upsample_heatmap:
+        blob_name = 'kps_score_lowres'
+    else:
+        blob_name = 'kps_score'
+
+    if cfg.KRCNN.USE_DECONV_OUTPUT:
+        # Use ConvTranspose to predict heatmaps; results in 2x upsampling
+        blob_out = model.ConvTranspose(
+            blob_in,
+            blob_name,
+            dim,
+            cfg.KRCNN.NUM_KEYPOINTS,
+            kernel=cfg.KRCNN.DECONV_KERNEL,
+            pad=int(cfg.KRCNN.DECONV_KERNEL / 2 - 1),
+            stride=2,
+            weight_init=(cfg.KRCNN.CONV_INIT, {'std': 0.001}),
+            bias_init=const_fill(0.0)
+        )
+    else:
+        # Use Conv to predict heatmaps; does no upsampling
+        blob_out = model.Conv(
+            blob_in,
+            blob_name,
+            dim,
+            cfg.KRCNN.NUM_KEYPOINTS,
+            kernel=1,
+            pad=0,
+            stride=1,
+            weight_init=(cfg.KRCNN.CONV_INIT, {'std': 0.001}),
+            bias_init=const_fill(0.0)
+        )
+
+    if upsample_heatmap:
+        # Increase heatmap output size via bilinear upsampling
+        blob_out = model.BilinearInterpolation(
+            blob_out, 'kps_score', cfg.KRCNN.NUM_KEYPOINTS,
+            cfg.KRCNN.NUM_KEYPOINTS, cfg.KRCNN.UP_SCALE
+        )
+
+    return blob_out
+
+
+def add_keypoint_losses(model):
+    """Add Mask R-CNN keypoint specific losses."""
+    # Reshape input from (N, K, H, W) to (NK, HW)
+    model.net.Reshape(
+        ['kps_score'], ['kps_score_reshaped', '_kps_score_old_shape'],
+        shape=(-1, cfg.KRCNN.HEATMAP_SIZE * cfg.KRCNN.HEATMAP_SIZE)
+    )
+    # Softmax across **space** (woahh....space!)
+    # Note: this is not what is commonly called "spatial softmax"
+    # (i.e., softmax applied along the channel dimension at each spatial
+    # location); This is softmax applied over a set of spatial locations (i.e.,
+    # each spatial location is a "class").
+    kps_prob, loss_kps = model.net.SoftmaxWithLoss(
+        ['kps_score_reshaped', 'keypoint_locations_int32', 'keypoint_weights'],
+        ['kps_prob', 'loss_kps'],
+        scale=cfg.KRCNN.LOSS_WEIGHT / cfg.NUM_GPUS,
+        spatial=0
+    )
+    if not cfg.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS:
+        # Discussion: the softmax loss above will average the loss by the sum of
+        # keypoint_weights, i.e. the total number of visible keypoints. Since
+        # the number of visible keypoints can vary significantly between
+        # minibatches, this has the effect of up-weighting the importance of
+        # minibatches with few visible keypoints. (Imagine the extreme case of
+        # only one visible keypoint versus N: in the case of N, each one
+        # contributes 1/N to the gradient compared to the single keypoint
+        # determining the gradient direction). Instead, we can normalize the
+        # loss by the total number of keypoints, if it were the case that all
+        # keypoints were visible in a full minibatch. (Returning to the example,
+        # this means that the one visible keypoint contributes as much as each
+        # of the N keypoints.)
+        model.StopGradient(
+            'keypoint_loss_normalizer', 'keypoint_loss_normalizer'
+        )
+        loss_kps = model.net.Mul(
+            ['loss_kps', 'keypoint_loss_normalizer'], 'loss_kps_normalized'
+        )
+    loss_gradients = blob_utils.get_loss_gradients(model, [loss_kps])
+    model.AddLosses(loss_kps)
+    return loss_gradients
+
+
+# ---------------------------------------------------------------------------- #
+# Keypoint heads
+# ---------------------------------------------------------------------------- #
+
+def add_ResNet_roi_conv5_head_for_keypoints(
+    model, blob_in, dim_in, spatial_scale
+):
+    """Add a ResNet "conv5" / "stage5" head for Mask R-CNN keypoint prediction.
+    """
+    model.RoIFeatureTransform(
+        blob_in,
+        '_[pose]_pool5',
+        blob_rois='keypoint_rois',
+        method=cfg.KRCNN.ROI_XFORM_METHOD,
+        resolution=cfg.KRCNN.ROI_XFORM_RESOLUTION,
+        sampling_ratio=cfg.KRCNN.ROI_XFORM_SAMPLING_RATIO,
+        spatial_scale=spatial_scale
+    )
+    # Using the prefix '_[pose]_' to 'res5' enables initializing the head's
+    # parameters using pretrained 'res5' parameters if given (see
+    # utils.net.initialize_from_weights_file)
+    s, dim_in = ResNet.add_stage(
+        model,
+        '_[pose]_res5',
+        '_[pose]_pool5',
+        3,
+        dim_in,
+        2048,
+        512,
+        cfg.KRCNN.DILATION,
+        stride_init=int(cfg.KRCNN.ROI_XFORM_RESOLUTION / 7)
+    )
+    return s, 2048
+
+
+def add_roi_pose_head_v1convX(model, blob_in, dim_in, spatial_scale):
+    """Add a Mask R-CNN keypoint head. v1convX design: X * (conv)."""
+    hidden_dim = cfg.KRCNN.CONV_HEAD_DIM
+    kernel_size = cfg.KRCNN.CONV_HEAD_KERNEL
+    pad_size = kernel_size // 2
+    current = model.RoIFeatureTransform(
+        blob_in,
+        '_[pose]_roi_feat',
+        blob_rois='keypoint_rois',
+        method=cfg.KRCNN.ROI_XFORM_METHOD,
+        resolution=cfg.KRCNN.ROI_XFORM_RESOLUTION,
+        sampling_ratio=cfg.KRCNN.ROI_XFORM_SAMPLING_RATIO,
+        spatial_scale=spatial_scale
+    )
+
+    for i in range(cfg.KRCNN.NUM_STACKED_CONVS):
+        current = model.Conv(
+            current,
+            'conv_fcn' + str(i + 1),
+            dim_in,
+            hidden_dim,
+            kernel_size,
+            stride=1,
+            pad=pad_size,
+            weight_init=(cfg.KRCNN.CONV_INIT, {'std': 0.01}),
+            bias_init=('ConstantFill', {'value': 0.})
+        )
+        current = model.Relu(current, current)
+        dim_in = hidden_dim
+
+    return current, hidden_dim
diff --git a/detectron/modeling/mask_rcnn_heads.py b/detectron/modeling/mask_rcnn_heads.py
new file mode 100644
index 0000000000000000000000000000000000000000..bf76e83fa293845648344e0a3e7bd5c7abe0e7d8
--- /dev/null
+++ b/detectron/modeling/mask_rcnn_heads.py
@@ -0,0 +1,329 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Various network "heads" for predicting masks in Mask R-CNN.
+
+The design is as follows:
+
+... -> RoI ----\
+                -> RoIFeatureXform -> mask head -> mask output -> loss
+... -> Feature /
+       Map
+
+The mask head produces a feature representation of the RoI for the purpose
+of mask prediction. The mask output module converts the feature representation
+into real-valued (soft) masks.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from detectron.core.config import cfg
+from detectron.utils.c2 import const_fill
+from detectron.utils.c2 import gauss_fill
+from detectron.utils.net import get_group_gn
+import detectron.modeling.ResNet as ResNet
+import detectron.utils.blob as blob_utils
+
+
+# ---------------------------------------------------------------------------- #
+# Mask R-CNN outputs and losses
+# ---------------------------------------------------------------------------- #
+
+def add_mask_rcnn_outputs(model, blob_in, dim):
+    """Add Mask R-CNN specific outputs: either mask logits or probs."""
+    num_cls = cfg.MODEL.NUM_CLASSES if cfg.MRCNN.CLS_SPECIFIC_MASK else 1
+
+    if cfg.MRCNN.USE_FC_OUTPUT:
+        # Predict masks with a fully connected layer (ignore 'fcn' in the blob
+        # name)
+        dim_fc = int(dim * (cfg.MRCNN.RESOLUTION / cfg.MRCNN.UPSAMPLE_RATIO)**2)
+        blob_out = model.FC(
+            blob_in,
+            'mask_fcn_logits',
+            dim_fc,
+            num_cls * cfg.MRCNN.RESOLUTION**2,
+            weight_init=gauss_fill(0.001),
+            bias_init=const_fill(0.0)
+        )
+    else:
+        # Predict mask using Conv
+
+        # Use GaussianFill for class-agnostic mask prediction; fills based on
+        # fan-in can be too large in this case and cause divergence
+        fill = (
+            cfg.MRCNN.CONV_INIT
+            if cfg.MRCNN.CLS_SPECIFIC_MASK else 'GaussianFill'
+        )
+        blob_out = model.Conv(
+            blob_in,
+            'mask_fcn_logits',
+            dim,
+            num_cls,
+            kernel=1,
+            pad=0,
+            stride=1,
+            weight_init=(fill, {'std': 0.001}),
+            bias_init=const_fill(0.0)
+        )
+
+        if cfg.MRCNN.UPSAMPLE_RATIO > 1:
+            blob_out = model.BilinearInterpolation(
+                'mask_fcn_logits', 'mask_fcn_logits_up', num_cls, num_cls,
+                cfg.MRCNN.UPSAMPLE_RATIO
+            )
+
+    if not model.train:  # == if test
+        blob_out = model.net.Sigmoid(blob_out, 'mask_fcn_probs')
+
+    return blob_out
+
+
+def add_mask_rcnn_losses(model, blob_mask):
+    """Add Mask R-CNN specific losses."""
+    loss_mask = model.net.SigmoidCrossEntropyLoss(
+        [blob_mask, 'masks_int32'],
+        'loss_mask',
+        scale=model.GetLossScale() * cfg.MRCNN.WEIGHT_LOSS_MASK
+    )
+    loss_gradients = blob_utils.get_loss_gradients(model, [loss_mask])
+    model.AddLosses('loss_mask')
+    return loss_gradients
+
+
+# ---------------------------------------------------------------------------- #
+# Mask heads
+# ---------------------------------------------------------------------------- #
+
+def mask_rcnn_fcn_head_v1up4convs(model, blob_in, dim_in, spatial_scale):
+    """v1up design: 4 * (conv 3x3), convT 2x2."""
+    return mask_rcnn_fcn_head_v1upXconvs(
+        model, blob_in, dim_in, spatial_scale, 4
+    )
+
+
+def mask_rcnn_fcn_head_v1up4convs_gn(model, blob_in, dim_in, spatial_scale):
+    """v1up design: 4 * (conv 3x3), convT 2x2, with GroupNorm"""
+    return mask_rcnn_fcn_head_v1upXconvs_gn(
+        model, blob_in, dim_in, spatial_scale, 4
+    )
+
+
+def mask_rcnn_fcn_head_v1up(model, blob_in, dim_in, spatial_scale):
+    """v1up design: 2 * (conv 3x3), convT 2x2."""
+    return mask_rcnn_fcn_head_v1upXconvs(
+        model, blob_in, dim_in, spatial_scale, 2
+    )
+
+
+def mask_rcnn_fcn_head_v1upXconvs(
+    model, blob_in, dim_in, spatial_scale, num_convs
+):
+    """v1upXconvs design: X * (conv 3x3), convT 2x2."""
+    current = model.RoIFeatureTransform(
+        blob_in,
+        blob_out='_[mask]_roi_feat',
+        blob_rois='mask_rois',
+        method=cfg.MRCNN.ROI_XFORM_METHOD,
+        resolution=cfg.MRCNN.ROI_XFORM_RESOLUTION,
+        sampling_ratio=cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO,
+        spatial_scale=spatial_scale
+    )
+
+    dilation = cfg.MRCNN.DILATION
+    dim_inner = cfg.MRCNN.DIM_REDUCED
+
+    for i in range(num_convs):
+        current = model.Conv(
+            current,
+            '_[mask]_fcn' + str(i + 1),
+            dim_in,
+            dim_inner,
+            kernel=3,
+            dilation=dilation,
+            pad=1 * dilation,
+            stride=1,
+            weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}),
+            bias_init=('ConstantFill', {'value': 0.})
+        )
+        current = model.Relu(current, current)
+        dim_in = dim_inner
+
+    # upsample layer
+    model.ConvTranspose(
+        current,
+        'conv5_mask',
+        dim_inner,
+        dim_inner,
+        kernel=2,
+        pad=0,
+        stride=2,
+        weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}),
+        bias_init=const_fill(0.0)
+    )
+    blob_mask = model.Relu('conv5_mask', 'conv5_mask')
+
+    return blob_mask, dim_inner
+
+
+def mask_rcnn_fcn_head_v1upXconvs_gn(
+    model, blob_in, dim_in, spatial_scale, num_convs
+):
+    """v1upXconvs design: X * (conv 3x3), convT 2x2, with GroupNorm"""
+    current = model.RoIFeatureTransform(
+        blob_in,
+        blob_out='_mask_roi_feat',
+        blob_rois='mask_rois',
+        method=cfg.MRCNN.ROI_XFORM_METHOD,
+        resolution=cfg.MRCNN.ROI_XFORM_RESOLUTION,
+        sampling_ratio=cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO,
+        spatial_scale=spatial_scale
+    )
+
+    dilation = cfg.MRCNN.DILATION
+    dim_inner = cfg.MRCNN.DIM_REDUCED
+
+    for i in range(num_convs):
+        current = model.ConvGN(
+            current,
+            '_mask_fcn' + str(i + 1),
+            dim_in,
+            dim_inner,
+            group_gn=get_group_gn(dim_inner),
+            kernel=3,
+            pad=1 * dilation,
+            stride=1,
+            weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}),
+            bias_init=('ConstantFill', {'value': 0.})
+        )
+        current = model.Relu(current, current)
+        dim_in = dim_inner
+
+    # upsample layer
+    model.ConvTranspose(
+        current,
+        'conv5_mask',
+        dim_inner,
+        dim_inner,
+        kernel=2,
+        pad=0,
+        stride=2,
+        weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}),
+        bias_init=const_fill(0.0)
+    )
+    blob_mask = model.Relu('conv5_mask', 'conv5_mask')
+
+    return blob_mask, dim_inner
+
+
+def mask_rcnn_fcn_head_v0upshare(model, blob_in, dim_in, spatial_scale):
+    """Use a ResNet "conv5" / "stage5" head for mask prediction. Weights and
+    computation are shared with the conv5 box head. Computation can only be
+    shared during training, since inference is cascaded.
+
+    v0upshare design: conv5, convT 2x2.
+    """
+    # Since box and mask head are shared, these must match
+    assert cfg.MRCNN.ROI_XFORM_RESOLUTION == cfg.FAST_RCNN.ROI_XFORM_RESOLUTION
+
+    if model.train:  # share computation with bbox head at training time
+        dim_conv5 = 2048
+        blob_conv5 = model.net.SampleAs(
+            ['res5_2_sum', 'roi_has_mask_int32'],
+            ['_[mask]_res5_2_sum_sliced']
+        )
+    else:  # re-compute at test time
+        blob_conv5, dim_conv5 = add_ResNet_roi_conv5_head_for_masks(
+            model,
+            blob_in,
+            dim_in,
+            spatial_scale
+        )
+
+    dim_reduced = cfg.MRCNN.DIM_REDUCED
+
+    blob_mask = model.ConvTranspose(
+        blob_conv5,
+        'conv5_mask',
+        dim_conv5,
+        dim_reduced,
+        kernel=2,
+        pad=0,
+        stride=2,
+        weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}),  # std only for gauss
+        bias_init=const_fill(0.0)
+    )
+    model.Relu('conv5_mask', 'conv5_mask')
+
+    return blob_mask, dim_reduced
+
+
+def mask_rcnn_fcn_head_v0up(model, blob_in, dim_in, spatial_scale):
+    """v0up design: conv5, deconv 2x2 (no weight sharing with the box head)."""
+    blob_conv5, dim_conv5 = add_ResNet_roi_conv5_head_for_masks(
+        model,
+        blob_in,
+        dim_in,
+        spatial_scale
+    )
+
+    dim_reduced = cfg.MRCNN.DIM_REDUCED
+
+    model.ConvTranspose(
+        blob_conv5,
+        'conv5_mask',
+        dim_conv5,
+        dim_reduced,
+        kernel=2,
+        pad=0,
+        stride=2,
+        weight_init=('GaussianFill', {'std': 0.001}),
+        bias_init=const_fill(0.0)
+    )
+    blob_mask = model.Relu('conv5_mask', 'conv5_mask')
+
+    return blob_mask, dim_reduced
+
+
+def add_ResNet_roi_conv5_head_for_masks(model, blob_in, dim_in, spatial_scale):
+    """Add a ResNet "conv5" / "stage5" head for predicting masks."""
+    model.RoIFeatureTransform(
+        blob_in,
+        blob_out='_[mask]_pool5',
+        blob_rois='mask_rois',
+        method=cfg.MRCNN.ROI_XFORM_METHOD,
+        resolution=cfg.MRCNN.ROI_XFORM_RESOLUTION,
+        sampling_ratio=cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO,
+        spatial_scale=spatial_scale
+    )
+
+    dilation = cfg.MRCNN.DILATION
+    stride_init = int(cfg.MRCNN.ROI_XFORM_RESOLUTION / 7)  # by default: 2
+
+    s, dim_in = ResNet.add_stage(
+        model,
+        '_[mask]_res5',
+        '_[mask]_pool5',
+        3,
+        dim_in,
+        2048,
+        512,
+        dilation,
+        stride_init=stride_init
+    )
+
+    return s, 2048
diff --git a/detectron/modeling/model_builder.py b/detectron/modeling/model_builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..25ab21770c4bc68108b50f9e69b6d75f2e0a4c23
--- /dev/null
+++ b/detectron/modeling/model_builder.py
@@ -0,0 +1,675 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Detectron model construction functions.
+
+Detectron supports a large number of model types. The configuration space is
+large. To get a sense, a given model is in element in the cartesian product of:
+
+  - backbone (e.g., VGG16, ResNet, ResNeXt)
+  - FPN (on or off)
+  - RPN only (just proposals)
+  - Fixed proposals for Fast R-CNN, RFCN, Mask R-CNN (with or without keypoints)
+  - End-to-end model with RPN + Fast R-CNN (i.e., Faster R-CNN), Mask R-CNN, ...
+  - Different "head" choices for the model
+  - ... many configuration options ...
+
+A given model is made by combining many basic components. The result is flexible
+though somewhat complex to understand at first.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import copy
+import importlib
+import logging
+
+from caffe2.python import core
+from caffe2.python import workspace
+
+from detectron.core.config import cfg
+from detectron.modeling.detector import DetectionModelHelper
+from detectron.roi_data.loader import RoIDataLoader
+import detectron.modeling.fast_rcnn_heads as fast_rcnn_heads
+import detectron.modeling.keypoint_rcnn_heads as keypoint_rcnn_heads
+import detectron.modeling.mask_rcnn_heads as mask_rcnn_heads
+import detectron.modeling.name_compat as name_compat
+import detectron.modeling.optimizer as optim
+import detectron.modeling.retinanet_heads as retinanet_heads
+import detectron.modeling.rfcn_heads as rfcn_heads
+import detectron.modeling.rpn_heads as rpn_heads
+import detectron.roi_data.minibatch as roi_data_minibatch
+import detectron.utils.c2 as c2_utils
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------- #
+# Generic recomposable model builders
+#
+# For example, you can create a Fast R-CNN model with the ResNet-50-C4 backbone
+# with the configuration:
+#
+# MODEL:
+#   TYPE: generalized_rcnn
+#   CONV_BODY: ResNet.add_ResNet50_conv4_body
+#   ROI_HEAD: ResNet.add_ResNet_roi_conv5_head
+# ---------------------------------------------------------------------------- #
+
+def generalized_rcnn(model):
+    """This model type handles:
+      - Fast R-CNN
+      - RPN only (not integrated with Fast R-CNN)
+      - Faster R-CNN (stagewise training from NIPS paper)
+      - Faster R-CNN (end-to-end joint training)
+      - Mask R-CNN (stagewise training from NIPS paper)
+      - Mask R-CNN (end-to-end joint training)
+    """
+    return build_generic_detection_model(
+        model,
+        get_func(cfg.MODEL.CONV_BODY),
+        add_roi_box_head_func=get_func(cfg.FAST_RCNN.ROI_BOX_HEAD),
+        add_roi_mask_head_func=get_func(cfg.MRCNN.ROI_MASK_HEAD),
+        add_roi_keypoint_head_func=get_func(cfg.KRCNN.ROI_KEYPOINTS_HEAD),
+        freeze_conv_body=cfg.TRAIN.FREEZE_CONV_BODY
+    )
+
+
+def rfcn(model):
+    # TODO(rbg): fold into build_generic_detection_model
+    return build_generic_rfcn_model(model, get_func(cfg.MODEL.CONV_BODY))
+
+
+def retinanet(model):
+    # TODO(rbg): fold into build_generic_detection_model
+    return build_generic_retinanet_model(model, get_func(cfg.MODEL.CONV_BODY))
+
+
+# ---------------------------------------------------------------------------- #
+# Helper functions for building various re-usable network bits
+# ---------------------------------------------------------------------------- #
+
+def create(model_type_func, train=False, gpu_id=0):
+    """Generic model creation function that dispatches to specific model
+    building functions.
+
+    By default, this function will generate a data parallel model configured to
+    run on cfg.NUM_GPUS devices. However, you can restrict it to build a model
+    targeted to a specific GPU by specifying gpu_id. This is used by
+    optimizer.build_data_parallel_model() during test time.
+    """
+    model = DetectionModelHelper(
+        name=model_type_func,
+        train=train,
+        num_classes=cfg.MODEL.NUM_CLASSES,
+        init_params=train
+    )
+    model.only_build_forward_pass = False
+    model.target_gpu_id = gpu_id
+    return get_func(model_type_func)(model)
+
+
+def get_func(func_name):
+    """Helper to return a function object by name. func_name must identify a
+    function in this module or the path to a function relative to the base
+    'modeling' module.
+    """
+    if func_name == '':
+        return None
+    new_func_name = name_compat.get_new_name(func_name)
+    if new_func_name != func_name:
+        logger.warn(
+            'Remapping old function name: {} -> {}'.
+            format(func_name, new_func_name)
+        )
+        func_name = new_func_name
+    try:
+        parts = func_name.split('.')
+        # Refers to a function in this module
+        if len(parts) == 1:
+            return globals()[parts[0]]
+        # Otherwise, assume we're referencing a module under modeling
+        module_name = 'detectron.modeling.' + '.'.join(parts[:-1])
+        module = importlib.import_module(module_name)
+        return getattr(module, parts[-1])
+    except Exception:
+        logger.error('Failed to find function: {}'.format(func_name))
+        raise
+
+
+def build_generic_detection_model(
+    model,
+    add_conv_body_func,
+    add_roi_box_head_func=None,
+    add_roi_mask_head_func=None,
+    add_roi_keypoint_head_func=None,
+    freeze_conv_body=False
+):
+    def _single_gpu_build_func(model):
+        """Build the model on a single GPU. Can be called in a loop over GPUs
+        with name and device scoping to create a data parallel model.
+        """
+        # Add the conv body (called "backbone architecture" in papers)
+        # E.g., ResNet-50, ResNet-50-FPN, ResNeXt-101-FPN, etc.
+        blob_conv, dim_conv, spatial_scale_conv = add_conv_body_func(model)
+        if freeze_conv_body:
+            for b in c2_utils.BlobReferenceList(blob_conv):
+                model.StopGradient(b, b)
+
+        if not model.train:  # == inference
+            # Create a net that can be used to execute the conv body on an image
+            # (without also executing RPN or any other network heads)
+            model.conv_body_net = model.net.Clone('conv_body_net')
+
+        head_loss_gradients = {
+            'rpn': None,
+            'box': None,
+            'mask': None,
+            'keypoints': None,
+        }
+
+        if cfg.RPN.RPN_ON:
+            # Add the RPN head
+            head_loss_gradients['rpn'] = rpn_heads.add_generic_rpn_outputs(
+                model, blob_conv, dim_conv, spatial_scale_conv
+            )
+
+        if cfg.FPN.FPN_ON:
+            # After adding the RPN head, restrict FPN blobs and scales to
+            # those used in the RoI heads
+            blob_conv, spatial_scale_conv = _narrow_to_fpn_roi_levels(
+                blob_conv, spatial_scale_conv
+            )
+
+        if not cfg.MODEL.RPN_ONLY:
+            # Add the Fast R-CNN head
+            head_loss_gradients['box'] = _add_fast_rcnn_head(
+                model, add_roi_box_head_func, blob_conv, dim_conv,
+                spatial_scale_conv
+            )
+
+        if cfg.MODEL.MASK_ON:
+            # Add the mask head
+            head_loss_gradients['mask'] = _add_roi_mask_head(
+                model, add_roi_mask_head_func, blob_conv, dim_conv,
+                spatial_scale_conv
+            )
+
+        if cfg.MODEL.KEYPOINTS_ON:
+            # Add the keypoint head
+            head_loss_gradients['keypoint'] = _add_roi_keypoint_head(
+                model, add_roi_keypoint_head_func, blob_conv, dim_conv,
+                spatial_scale_conv
+            )
+
+        if model.train:
+            loss_gradients = {}
+            for lg in head_loss_gradients.values():
+                if lg is not None:
+                    loss_gradients.update(lg)
+            return loss_gradients
+        else:
+            return None
+
+    optim.build_data_parallel_model(model, _single_gpu_build_func)
+    return model
+
+
+def _narrow_to_fpn_roi_levels(blobs, spatial_scales):
+    """Return only the blobs and spatial scales that will be used for RoI heads.
+    Inputs `blobs` and `spatial_scales` may include extra blobs and scales that
+    are used for RPN proposals, but not for RoI heads.
+    """
+    # Code only supports case when RPN and ROI min levels are the same
+    assert cfg.FPN.RPN_MIN_LEVEL == cfg.FPN.ROI_MIN_LEVEL
+    # RPN max level can be >= to ROI max level
+    assert cfg.FPN.RPN_MAX_LEVEL >= cfg.FPN.ROI_MAX_LEVEL
+    # FPN RPN max level might be > FPN ROI max level in which case we
+    # need to discard some leading conv blobs (blobs are ordered from
+    # max/coarsest level to min/finest level)
+    num_roi_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1
+    return blobs[-num_roi_levels:], spatial_scales[-num_roi_levels:]
+
+
+def _add_fast_rcnn_head(
+    model, add_roi_box_head_func, blob_in, dim_in, spatial_scale_in
+):
+    """Add a Fast R-CNN head to the model."""
+    blob_frcn, dim_frcn = add_roi_box_head_func(
+        model, blob_in, dim_in, spatial_scale_in
+    )
+    fast_rcnn_heads.add_fast_rcnn_outputs(model, blob_frcn, dim_frcn)
+    if model.train:
+        loss_gradients = fast_rcnn_heads.add_fast_rcnn_losses(model)
+    else:
+        loss_gradients = None
+    return loss_gradients
+
+
+def _add_roi_mask_head(
+    model, add_roi_mask_head_func, blob_in, dim_in, spatial_scale_in
+):
+    """Add a mask prediction head to the model."""
+    # Capture model graph before adding the mask head
+    bbox_net = copy.deepcopy(model.net.Proto())
+    # Add the mask head
+    blob_mask_head, dim_mask_head = add_roi_mask_head_func(
+        model, blob_in, dim_in, spatial_scale_in
+    )
+    # Add the mask output
+    blob_mask = mask_rcnn_heads.add_mask_rcnn_outputs(
+        model, blob_mask_head, dim_mask_head
+    )
+
+    if not model.train:  # == inference
+        # Inference uses a cascade of box predictions, then mask predictions.
+        # This requires separate nets for box and mask prediction.
+        # So we extract the mask prediction net, store it as its own network,
+        # then restore model.net to be the bbox-only network
+        model.mask_net, blob_mask = c2_utils.SuffixNet(
+            'mask_net', model.net, len(bbox_net.op), blob_mask
+        )
+        model.net._net = bbox_net
+        loss_gradients = None
+    else:
+        loss_gradients = mask_rcnn_heads.add_mask_rcnn_losses(model, blob_mask)
+    return loss_gradients
+
+
+def _add_roi_keypoint_head(
+    model, add_roi_keypoint_head_func, blob_in, dim_in, spatial_scale_in
+):
+    """Add a keypoint prediction head to the model."""
+    # Capture model graph before adding the mask head
+    bbox_net = copy.deepcopy(model.net.Proto())
+    # Add the keypoint head
+    blob_keypoint_head, dim_keypoint_head = add_roi_keypoint_head_func(
+        model, blob_in, dim_in, spatial_scale_in
+    )
+    # Add the keypoint output
+    blob_keypoint = keypoint_rcnn_heads.add_keypoint_outputs(
+        model, blob_keypoint_head, dim_keypoint_head
+    )
+
+    if not model.train:  # == inference
+        # Inference uses a cascade of box predictions, then keypoint predictions
+        # This requires separate nets for box and keypoint prediction.
+        # So we extract the keypoint prediction net, store it as its own
+        # network, then restore model.net to be the bbox-only network
+        model.keypoint_net, keypoint_blob_out = c2_utils.SuffixNet(
+            'keypoint_net', model.net, len(bbox_net.op), blob_keypoint
+        )
+        model.net._net = bbox_net
+        loss_gradients = None
+    else:
+        loss_gradients = keypoint_rcnn_heads.add_keypoint_losses(model)
+    return loss_gradients
+
+
+def build_generic_rfcn_model(model, add_conv_body_func, dim_reduce=None):
+    # TODO(rbg): fold this function into build_generic_detection_model
+    def _single_gpu_build_func(model):
+        """Builds the model on a single GPU. Can be called in a loop over GPUs
+        with name and device scoping to create a data parallel model."""
+        blob, dim, spatial_scale = add_conv_body_func(model)
+        if not model.train:
+            model.conv_body_net = model.net.Clone('conv_body_net')
+        rfcn_heads.add_rfcn_outputs(model, blob, dim, dim_reduce, spatial_scale)
+        if model.train:
+            loss_gradients = fast_rcnn_heads.add_fast_rcnn_losses(model)
+        return loss_gradients if model.train else None
+
+    optim.build_data_parallel_model(model, _single_gpu_build_func)
+    return model
+
+
+def build_generic_retinanet_model(
+    model, add_conv_body_func, freeze_conv_body=False
+):
+    # TODO(rbg): fold this function into build_generic_detection_model
+    def _single_gpu_build_func(model):
+        """Builds the model on a single GPU. Can be called in a loop over GPUs
+        with name and device scoping to create a data parallel model."""
+        blobs, dim, spatial_scales = add_conv_body_func(model)
+        if not model.train:
+            model.conv_body_net = model.net.Clone('conv_body_net')
+        retinanet_heads.add_fpn_retinanet_outputs(
+            model, blobs, dim, spatial_scales
+        )
+        if model.train:
+            loss_gradients = retinanet_heads.add_fpn_retinanet_losses(
+                model
+            )
+        return loss_gradients if model.train else None
+
+    optim.build_data_parallel_model(model, _single_gpu_build_func)
+    return model
+
+
+# ---------------------------------------------------------------------------- #
+# Network inputs
+# ---------------------------------------------------------------------------- #
+
+def add_training_inputs(model, roidb=None):
+    """Create network input ops and blobs used for training. To be called
+    *after* model_builder.create().
+    """
+    # Implementation notes:
+    #   Typically, one would create the input ops and then the rest of the net.
+    #   However, creating the input ops depends on loading the dataset, which
+    #   can take a few minutes for COCO.
+    #   We prefer to avoid waiting so debugging can fail fast.
+    #   Thus, we create the net *without input ops* prior to loading the
+    #   dataset, and then add the input ops after loading the dataset.
+    #   Since we defer input op creation, we need to do a little bit of surgery
+    #   to place the input ops at the start of the network op list.
+    assert model.train, 'Training inputs can only be added to a trainable model'
+    if roidb is not None:
+        # To make debugging easier you can set cfg.DATA_LOADER.NUM_THREADS = 1
+        model.roi_data_loader = RoIDataLoader(
+            roidb,
+            num_loaders=cfg.DATA_LOADER.NUM_THREADS,
+            minibatch_queue_size=cfg.DATA_LOADER.MINIBATCH_QUEUE_SIZE,
+            blobs_queue_capacity=cfg.DATA_LOADER.BLOBS_QUEUE_CAPACITY
+        )
+    orig_num_op = len(model.net._net.op)
+    blob_names = roi_data_minibatch.get_minibatch_blob_names(is_training=True)
+    for gpu_id in range(cfg.NUM_GPUS):
+        with c2_utils.NamedCudaScope(gpu_id):
+            for blob_name in blob_names:
+                workspace.CreateBlob(core.ScopedName(blob_name))
+            model.net.DequeueBlobs(
+                model.roi_data_loader._blobs_queue_name, blob_names
+            )
+    # A little op surgery to move input ops to the start of the net
+    diff = len(model.net._net.op) - orig_num_op
+    new_op = model.net._net.op[-diff:] + model.net._net.op[:-diff]
+    del model.net._net.op[:]
+    model.net._net.op.extend(new_op)
+
+
+def add_inference_inputs(model):
+    """Create network input blobs used for inference."""
+
+    def create_input_blobs_for_net(net_def):
+        for op in net_def.op:
+            for blob_in in op.input:
+                if not workspace.HasBlob(blob_in):
+                    workspace.CreateBlob(blob_in)
+
+    create_input_blobs_for_net(model.net.Proto())
+    if cfg.MODEL.MASK_ON:
+        create_input_blobs_for_net(model.mask_net.Proto())
+    if cfg.MODEL.KEYPOINTS_ON:
+        create_input_blobs_for_net(model.keypoint_net.Proto())
+
+
+# ---------------------------------------------------------------------------- #
+# ********************** DEPRECATED FUNCTIONALITY BELOW ********************** #
+# ---------------------------------------------------------------------------- #
+
+# ---------------------------------------------------------------------------- #
+# Hardcoded functions to create various types of common models
+#
+#            *** This type of model definition is deprecated ***
+#            *** Use the generic composable versions instead ***
+#
+# ---------------------------------------------------------------------------- #
+
+import detectron.modeling.ResNet as ResNet
+import detectron.modeling.VGG16 as VGG16
+import detectron.modeling.VGG_CNN_M_1024 as VGG_CNN_M_1024
+
+
+def fast_rcnn(model):
+    logger.warn('Deprecated: use `MODEL.TYPE: generalized_rcnn`.')
+    return generalized_rcnn(model)
+
+
+def mask_rcnn(model):
+    logger.warn(
+        'Deprecated: use `MODEL.TYPE: generalized_rcnn` with '
+        '`MODEL.MASK_ON: True`'
+    )
+    return generalized_rcnn(model)
+
+
+def keypoint_rcnn(model):
+    logger.warn(
+        'Deprecated: use `MODEL.TYPE: generalized_rcnn` with '
+        '`MODEL.KEYPOINTS_ON: True`'
+    )
+    return generalized_rcnn(model)
+
+
+def mask_and_keypoint_rcnn(model):
+    logger.warn(
+        'Deprecated: use `MODEL.TYPE: generalized_rcnn` with '
+        '`MODEL.MASK_ON: True and ``MODEL.KEYPOINTS_ON: True`'
+    )
+    return generalized_rcnn(model)
+
+
+def rpn(model):
+    logger.warn(
+        'Deprecated: use `MODEL.TYPE: generalized_rcnn` with '
+        '`MODEL.RPN_ONLY: True`'
+    )
+    return generalized_rcnn(model)
+
+
+def fpn_rpn(model):
+    logger.warn(
+        'Deprecated: use `MODEL.TYPE: generalized_rcnn` with '
+        '`MODEL.RPN_ONLY: True` and FPN enabled via configs'
+    )
+    return generalized_rcnn(model)
+
+
+def faster_rcnn(model):
+    logger.warn(
+        'Deprecated: use `MODEL.TYPE: generalized_rcnn` with '
+        '`MODEL.FASTER_RCNN: True`'
+    )
+    return generalized_rcnn(model)
+
+
+def fast_rcnn_frozen_features(model):
+    logger.warn('Deprecated: use `TRAIN.FREEZE_CONV_BODY: True` instead')
+    return build_generic_detection_model(
+        model,
+        get_func(cfg.MODEL.CONV_BODY),
+        add_roi_box_head_func=get_func(cfg.FAST_RCNN.ROI_BOX_HEAD),
+        freeze_conv_body=True
+    )
+
+
+def rpn_frozen_features(model):
+    logger.warn('Deprecated: use `TRAIN.FREEZE_CONV_BODY: True` instead')
+    return build_generic_detection_model(
+        model, get_func(cfg.MODEL.CONV_BODY), freeze_conv_body=True
+    )
+
+
+def fpn_rpn_frozen_features(model):
+    logger.warn('Deprecated: use `TRAIN.FREEZE_CONV_BODY: True` instead')
+    return build_generic_detection_model(
+        model, get_func(cfg.MODEL.CONV_BODY), freeze_conv_body=True
+    )
+
+
+def mask_rcnn_frozen_features(model):
+    logger.warn('Deprecated: use `TRAIN.FREEZE_CONV_BODY: True` instead')
+    return build_generic_detection_model(
+        model,
+        get_func(cfg.MODEL.CONV_BODY),
+        add_roi_box_head_func=get_func(cfg.FAST_RCNN.ROI_BOX_HEAD),
+        add_roi_mask_head_func=get_func(cfg.MRCNN.ROI_MASK_HEAD),
+        freeze_conv_body=True
+    )
+
+
+def keypoint_rcnn_frozen_features(model):
+    logger.warn('Deprecated: use `TRAIN.FREEZE_CONV_BODY: True` instead')
+    return build_generic_detection_model(
+        model,
+        get_func(cfg.MODEL.CONV_BODY),
+        add_roi_box_head_func=get_func(cfg.FAST_RCNN.ROI_BOX_HEAD),
+        add_roi_keypoint_head_func=get_func(cfg.KRCNN.ROI_KEYPOINTS_HEAD),
+        freeze_conv_body=True
+    )
+
+
+# ---------------------------------------------------------------------------- #
+# Fast R-CNN models
+# ---------------------------------------------------------------------------- #
+
+
+def VGG_CNN_M_1024_fast_rcnn(model):
+    return build_generic_detection_model(
+        model, VGG_CNN_M_1024.add_VGG_CNN_M_1024_conv5_body,
+        VGG_CNN_M_1024.add_VGG_CNN_M_1024_roi_fc_head
+    )
+
+
+def VGG16_fast_rcnn(model):
+    return build_generic_detection_model(
+        model, VGG16.add_VGG16_conv5_body, VGG16.add_VGG16_roi_fc_head
+    )
+
+
+def ResNet50_fast_rcnn(model):
+    return build_generic_detection_model(
+        model, ResNet.add_ResNet50_conv4_body, ResNet.add_ResNet_roi_conv5_head
+    )
+
+
+def ResNet101_fast_rcnn(model):
+    return build_generic_detection_model(
+        model, ResNet.add_ResNet101_conv4_body, ResNet.add_ResNet_roi_conv5_head
+    )
+
+
+def ResNet50_fast_rcnn_frozen_features(model):
+    return build_generic_detection_model(
+        model,
+        ResNet.add_ResNet50_conv4_body,
+        ResNet.add_ResNet_roi_conv5_head,
+        freeze_conv_body=True
+    )
+
+
+def ResNet101_fast_rcnn_frozen_features(model):
+    return build_generic_detection_model(
+        model,
+        ResNet.add_ResNet101_conv4_body,
+        ResNet.add_ResNet_roi_conv5_head,
+        freeze_conv_body=True
+    )
+
+
+# ---------------------------------------------------------------------------- #
+# RPN-only models
+# ---------------------------------------------------------------------------- #
+
+
+def VGG_CNN_M_1024_rpn(model):
+    return build_generic_detection_model(
+        model, VGG_CNN_M_1024.add_VGG_CNN_M_1024_conv5_body
+    )
+
+
+def VGG16_rpn(model):
+    return build_generic_detection_model(model, VGG16.add_VGG16_conv5_body)
+
+
+def ResNet50_rpn_conv4(model):
+    return build_generic_detection_model(model, ResNet.add_ResNet50_conv4_body)
+
+
+def ResNet101_rpn_conv4(model):
+    return build_generic_detection_model(model, ResNet.add_ResNet101_conv4_body)
+
+
+def VGG_CNN_M_1024_rpn_frozen_features(model):
+    return build_generic_detection_model(
+        model,
+        VGG_CNN_M_1024.add_VGG_CNN_M_1024_conv5_body,
+        freeze_conv_body=True
+    )
+
+
+def VGG16_rpn_frozen_features(model):
+    return build_generic_detection_model(
+        model, VGG16.add_VGG16_conv5_body, freeze_conv_body=True
+    )
+
+
+def ResNet50_rpn_conv4_frozen_features(model):
+    return build_generic_detection_model(
+        model, ResNet.add_ResNet50_conv4_body, freeze_conv_body=True
+    )
+
+
+def ResNet101_rpn_conv4_frozen_features(model):
+    return build_generic_detection_model(
+        model, ResNet.add_ResNet101_conv4_body, freeze_conv_body=True
+    )
+
+
+# ---------------------------------------------------------------------------- #
+# Faster R-CNN models
+# ---------------------------------------------------------------------------- #
+
+
+def VGG16_faster_rcnn(model):
+    assert cfg.MODEL.FASTER_RCNN
+    return build_generic_detection_model(
+        model, VGG16.add_VGG16_conv5_body, VGG16.add_VGG16_roi_fc_head
+    )
+
+
+def ResNet50_faster_rcnn(model):
+    assert cfg.MODEL.FASTER_RCNN
+    return build_generic_detection_model(
+        model, ResNet.add_ResNet50_conv4_body, ResNet.add_ResNet_roi_conv5_head
+    )
+
+
+def ResNet101_faster_rcnn(model):
+    assert cfg.MODEL.FASTER_RCNN
+    return build_generic_detection_model(
+        model, ResNet.add_ResNet101_conv4_body, ResNet.add_ResNet_roi_conv5_head
+    )
+
+
+# ---------------------------------------------------------------------------- #
+# R-FCN models
+# ---------------------------------------------------------------------------- #
+
+
+def ResNet50_rfcn(model):
+    return build_generic_rfcn_model(
+        model, ResNet.add_ResNet50_conv5_body, dim_reduce=1024
+    )
+
+
+def ResNet101_rfcn(model):
+    return build_generic_rfcn_model(
+        model, ResNet.add_ResNet101_conv5_body, dim_reduce=1024
+    )
diff --git a/detectron/modeling/name_compat.py b/detectron/modeling/name_compat.py
new file mode 100644
index 0000000000000000000000000000000000000000..7899ea6e04474d87d8adef8a2e5baf3c40dda543
--- /dev/null
+++ b/detectron/modeling/name_compat.py
@@ -0,0 +1,62 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Handle mapping from old network building function names to new names.
+
+Flexible network configuration is achieved by specifying the function name that
+builds a network module (e.g., the name of the conv backbone or the mask roi
+head). However we may wish to change names over time without breaking previous
+config files. This module provides backwards naming compatibility by providing
+a mapping from the old name to the new name.
+
+When renaming functions, it's generally a good idea to codemod existing yaml
+config files. An easy way to batch edit, by example, is a shell command like
+
+$ find . -name "*.yaml" -exec sed -i -e \
+   's/head_builder\.add_roi_2mlp_head/fast_rcnn_heads.add_roi_2mlp_head/g' {} \;
+
+to perform the renaming:
+  head_builder.add_roi_2mlp_head => fast_rcnn_heads.add_roi_2mlp_head
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+
+_RENAME = {
+    # Removed "ResNet_" from the name because it wasn't relevent
+    'mask_rcnn_heads.ResNet_mask_rcnn_fcn_head_v1up4convs':
+        'mask_rcnn_heads.mask_rcnn_fcn_head_v1up4convs',
+    # Removed "ResNet_" from the name because it wasn't relevent
+    'mask_rcnn_heads.ResNet_mask_rcnn_fcn_head_v1up':
+        'mask_rcnn_heads.mask_rcnn_fcn_head_v1up',
+    # Removed "ResNet_" from the name because it wasn't relevent
+    'mask_rcnn_heads.ResNet_mask_rcnn_fcn_head_v0upshare':
+        'mask_rcnn_heads.mask_rcnn_fcn_head_v0upshare',
+    # Removed "ResNet_" from the name because it wasn't relevent
+    'mask_rcnn_heads.ResNet_mask_rcnn_fcn_head_v0up':
+        'mask_rcnn_heads.mask_rcnn_fcn_head_v0up',
+    # Removed head_builder module in favor of the more specific fast_rcnn name
+    'head_builder.add_roi_2mlp_head':
+        'fast_rcnn_heads.add_roi_2mlp_head',
+}
+
+
+def get_new_name(func_name):
+    if func_name in _RENAME:
+        func_name = _RENAME[func_name]
+    return func_name
diff --git a/detectron/modeling/optimizer.py b/detectron/modeling/optimizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..4702428910561440c2a238da2945c5c835ca3841
--- /dev/null
+++ b/detectron/modeling/optimizer.py
@@ -0,0 +1,130 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Optimization operator graph construction."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import logging
+
+from caffe2.python import muji
+
+from detectron.core.config import cfg
+import detectron.utils.c2 as c2_utils
+
+logger = logging.getLogger(__name__)
+
+
+def build_data_parallel_model(model, single_gpu_build_func):
+    """Build a data parallel model given a function that builds the model on a
+    single GPU.
+    """
+    if model.only_build_forward_pass:
+        single_gpu_build_func(model)
+    elif model.train:
+        all_loss_gradients = _build_forward_graph(model, single_gpu_build_func)
+        # Add backward pass on all GPUs
+        model.AddGradientOperators(all_loss_gradients)
+        if cfg.NUM_GPUS > 1:
+            _add_allreduce_graph(model)
+        for gpu_id in range(cfg.NUM_GPUS):
+            # After allreduce, all GPUs perform SGD updates on their identical
+            # params and gradients in parallel
+            with c2_utils.NamedCudaScope(gpu_id):
+                add_single_gpu_param_update_ops(model, gpu_id)
+    else:
+        # Test-time network operates on single GPU
+        # Test-time parallelism is implemented through multiprocessing
+        with c2_utils.NamedCudaScope(model.target_gpu_id):
+            single_gpu_build_func(model)
+
+
+def _build_forward_graph(model, single_gpu_build_func):
+    """Construct the forward graph on each GPU."""
+    all_loss_gradients = {}  # Will include loss gradients from all GPUs
+    # Build the model on each GPU with correct name and device scoping
+    for gpu_id in range(cfg.NUM_GPUS):
+        with c2_utils.NamedCudaScope(gpu_id):
+            all_loss_gradients.update(single_gpu_build_func(model))
+    return all_loss_gradients
+
+
+def _add_allreduce_graph(model):
+    """Construct the graph that performs Allreduce on the gradients."""
+    # Need to all-reduce the per-GPU gradients if training with more than 1 GPU
+    all_params = model.TrainableParams()
+    assert len(all_params) % cfg.NUM_GPUS == 0
+    # The model parameters are replicated on each GPU, get the number
+    # distinct parameter blobs (i.e., the number of parameter blobs on
+    # each GPU)
+    params_per_gpu = int(len(all_params) / cfg.NUM_GPUS)
+    with c2_utils.CudaScope(0):
+        # Iterate over distinct parameter blobs
+        for i in range(params_per_gpu):
+            # Gradients from all GPUs for this parameter blob
+            gradients = [
+                model.param_to_grad[p] for p in all_params[i::params_per_gpu]
+            ]
+            if len(gradients) > 0:
+                if cfg.USE_NCCL:
+                    model.net.NCCLAllreduce(gradients, gradients)
+                else:
+                    muji.Allreduce(model.net, gradients, reduced_affix='')
+
+
+def add_single_gpu_param_update_ops(model, gpu_id):
+    # Learning rate of 0 is a dummy value to be set properly at the
+    # start of training
+    lr = model.param_init_net.ConstantFill(
+        [], 'lr', shape=[1], value=0.0
+    )
+    one = model.param_init_net.ConstantFill(
+        [], 'one', shape=[1], value=1.0
+    )
+    wd = model.param_init_net.ConstantFill(
+        [], 'wd', shape=[1], value=cfg.SOLVER.WEIGHT_DECAY
+    )
+    # weight decay of GroupNorm's parameters
+    wd_gn = model.param_init_net.ConstantFill(
+        [], 'wd_gn', shape=[1], value=cfg.SOLVER.WEIGHT_DECAY_GN
+    )
+    for param in model.TrainableParams(gpu_id=gpu_id):
+        logger.debug('param ' + str(param) + ' will be updated')
+        param_grad = model.param_to_grad[param]
+        # Initialize momentum vector
+        param_momentum = model.param_init_net.ConstantFill(
+            [param], param + '_momentum', value=0.0
+        )
+        if param in model.biases:
+            # Special treatment for biases (mainly to match historical impl.
+            # details):
+            # (1) Do not apply weight decay
+            # (2) Use a 2x higher learning rate
+            model.Scale(param_grad, param_grad, scale=2.0)
+        elif param in model.gn_params:
+            # Special treatment for GroupNorm's parameters
+            model.WeightedSum([param_grad, one, param, wd_gn], param_grad)
+        elif cfg.SOLVER.WEIGHT_DECAY > 0:
+            # Apply weight decay to non-bias weights
+            model.WeightedSum([param_grad, one, param, wd], param_grad)
+        # Update param_grad and param_momentum in place
+        model.net.MomentumSGDUpdate(
+            [param_grad, param_momentum, lr, param],
+            [param_grad, param_momentum, param],
+            momentum=cfg.SOLVER.MOMENTUM
+        )
diff --git a/detectron/modeling/retinanet_heads.py b/detectron/modeling/retinanet_heads.py
new file mode 100644
index 0000000000000000000000000000000000000000..56f09f1c625019f7382489919aae2e8de14a8beb
--- /dev/null
+++ b/detectron/modeling/retinanet_heads.py
@@ -0,0 +1,311 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""RetinaNet model heads and losses. See: https://arxiv.org/abs/1708.02002."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import numpy as np
+
+from detectron.core.config import cfg
+import detectron.utils.blob as blob_utils
+
+
+def get_retinanet_bias_init(model):
+    """Initialize the biases for the conv ops that predict class probabilities.
+    Initialization is performed such that at the start of training, all
+    locations are predicted to be background with high probability
+    (e.g., ~0.99 = 1 - cfg.RETINANET.PRIOR_PROB). See the Focal Loss paper for
+    details.
+    """
+    prior_prob = cfg.RETINANET.PRIOR_PROB
+    scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
+    aspect_ratios = len(cfg.RETINANET.ASPECT_RATIOS)
+    if cfg.RETINANET.SOFTMAX:
+        # Multiclass softmax case
+        bias = np.zeros((model.num_classes, 1), dtype=np.float32)
+        bias[0] = np.log(
+            (model.num_classes - 1) * (1 - prior_prob) / (prior_prob)
+        )
+        bias = np.vstack(
+            [bias for _ in range(scales_per_octave * aspect_ratios)]
+        )
+        bias_init = (
+            'GivenTensorFill', {
+                'values': bias.astype(dtype=np.float32)
+            }
+        )
+    else:
+        # Per-class sigmoid (binary classification) case
+        bias_init = (
+            'ConstantFill', {
+                'value': -np.log((1 - prior_prob) / prior_prob)
+            }
+        )
+    return bias_init
+
+
+def add_fpn_retinanet_outputs(model, blobs_in, dim_in, spatial_scales):
+    """RetinaNet head. For classification and box regression, we can chose to
+    have the same conv tower or a separate tower. "bl_feat_list" stores the list
+    of feature blobs for bbox prediction. These blobs can be shared cls feature
+    blobs if we share the tower or else are independent blobs.
+    """
+    dim_out = dim_in
+    k_max = cfg.FPN.RPN_MAX_LEVEL  # coarsest level of pyramid
+    k_min = cfg.FPN.RPN_MIN_LEVEL  # finest level of pyramid
+    A = len(cfg.RETINANET.ASPECT_RATIOS) * cfg.RETINANET.SCALES_PER_OCTAVE
+
+    # compute init for bias
+    bias_init = get_retinanet_bias_init(model)
+
+    assert len(blobs_in) == k_max - k_min + 1
+    bbox_feat_list = []
+    cls_pred_dim = (
+        model.num_classes if cfg.RETINANET.SOFTMAX else (model.num_classes - 1)
+    )
+    # unpacked bbox feature and add prediction layers
+    bbox_regr_dim = (
+        4 * (model.num_classes - 1) if cfg.RETINANET.CLASS_SPECIFIC_BBOX else 4
+    )
+
+    # ==========================================================================
+    # classification tower with logits and prob prediction
+    # ==========================================================================
+    for lvl in range(k_min, k_max + 1):
+        bl_in = blobs_in[k_max - lvl]  # blobs_in is in reversed order
+        # classification tower stack convolution starts
+        for nconv in range(cfg.RETINANET.NUM_CONVS):
+            suffix = 'n{}_fpn{}'.format(nconv, lvl)
+            dim_in, dim_out = dim_in, dim_in
+            if lvl == k_min:
+                bl_out = model.Conv(
+                    bl_in,
+                    'retnet_cls_conv_' + suffix,
+                    dim_in,
+                    dim_out,
+                    3,
+                    stride=1,
+                    pad=1,
+                    weight_init=('GaussianFill', {
+                        'std': 0.01
+                    }),
+                    bias_init=('ConstantFill', {
+                        'value': 0.
+                    })
+                )
+            else:
+                bl_out = model.ConvShared(
+                    bl_in,
+                    'retnet_cls_conv_' + suffix,
+                    dim_in,
+                    dim_out,
+                    3,
+                    stride=1,
+                    pad=1,
+                    weight='retnet_cls_conv_n{}_fpn{}_w'.format(nconv, k_min),
+                    bias='retnet_cls_conv_n{}_fpn{}_b'.format(nconv, k_min)
+                )
+            bl_in = model.Relu(bl_out, bl_out)
+            bl_feat = bl_in
+        # cls tower stack convolution ends. Add the logits layer now
+        if lvl == k_min:
+            retnet_cls_pred = model.Conv(
+                bl_feat,
+                'retnet_cls_pred_fpn{}'.format(lvl),
+                dim_in,
+                cls_pred_dim * A,
+                3,
+                pad=1,
+                stride=1,
+                weight_init=('GaussianFill', {
+                    'std': 0.01
+                }),
+                bias_init=bias_init
+            )
+        else:
+            retnet_cls_pred = model.ConvShared(
+                bl_feat,
+                'retnet_cls_pred_fpn{}'.format(lvl),
+                dim_in,
+                cls_pred_dim * A,
+                3,
+                pad=1,
+                stride=1,
+                weight='retnet_cls_pred_fpn{}_w'.format(k_min),
+                bias='retnet_cls_pred_fpn{}_b'.format(k_min)
+            )
+        if not model.train:
+            if cfg.RETINANET.SOFTMAX:
+                model.net.GroupSpatialSoftmax(
+                    retnet_cls_pred,
+                    'retnet_cls_prob_fpn{}'.format(lvl),
+                    num_classes=cls_pred_dim
+                )
+            else:
+                model.net.Sigmoid(
+                    retnet_cls_pred, 'retnet_cls_prob_fpn{}'.format(lvl)
+                )
+        if cfg.RETINANET.SHARE_CLS_BBOX_TOWER:
+            bbox_feat_list.append(bl_feat)
+
+    # ==========================================================================
+    # bbox tower if not sharing features with the classification tower with
+    # logits and prob prediction
+    # ==========================================================================
+    if not cfg.RETINANET.SHARE_CLS_BBOX_TOWER:
+        for lvl in range(k_min, k_max + 1):
+            bl_in = blobs_in[k_max - lvl]  # blobs_in is in reversed order
+            for nconv in range(cfg.RETINANET.NUM_CONVS):
+                suffix = 'n{}_fpn{}'.format(nconv, lvl)
+                dim_in, dim_out = dim_in, dim_in
+                if lvl == k_min:
+                    bl_out = model.Conv(
+                        bl_in,
+                        'retnet_bbox_conv_' + suffix,
+                        dim_in,
+                        dim_out,
+                        3,
+                        stride=1,
+                        pad=1,
+                        weight_init=('GaussianFill', {
+                            'std': 0.01
+                        }),
+                        bias_init=('ConstantFill', {
+                            'value': 0.
+                        })
+                    )
+                else:
+                    bl_out = model.ConvShared(
+                        bl_in,
+                        'retnet_bbox_conv_' + suffix,
+                        dim_in,
+                        dim_out,
+                        3,
+                        stride=1,
+                        pad=1,
+                        weight='retnet_bbox_conv_n{}_fpn{}_w'.format(
+                            nconv, k_min
+                        ),
+                        bias='retnet_bbox_conv_n{}_fpn{}_b'.format(
+                            nconv, k_min
+                        )
+                    )
+                bl_in = model.Relu(bl_out, bl_out)
+                # Add octave scales and aspect ratio
+                # At least 1 convolution for dealing different aspect ratios
+                bl_feat = bl_in
+            bbox_feat_list.append(bl_feat)
+    # Depending on the features [shared/separate] for bbox, add prediction layer
+    for i, lvl in enumerate(range(k_min, k_max + 1)):
+        bbox_pred = 'retnet_bbox_pred_fpn{}'.format(lvl)
+        bl_feat = bbox_feat_list[i]
+        if lvl == k_min:
+            model.Conv(
+                bl_feat,
+                bbox_pred,
+                dim_in,
+                bbox_regr_dim * A,
+                3,
+                pad=1,
+                stride=1,
+                weight_init=('GaussianFill', {
+                    'std': 0.01
+                }),
+                bias_init=('ConstantFill', {
+                    'value': 0.
+                })
+            )
+        else:
+            model.ConvShared(
+                bl_feat,
+                bbox_pred,
+                dim_in,
+                bbox_regr_dim * A,
+                3,
+                pad=1,
+                stride=1,
+                weight='retnet_bbox_pred_fpn{}_w'.format(k_min),
+                bias='retnet_bbox_pred_fpn{}_b'.format(k_min)
+            )
+
+
+def add_fpn_retinanet_losses(model):
+    loss_gradients = {}
+    gradients, losses = [], []
+
+    k_max = cfg.FPN.RPN_MAX_LEVEL  # coarsest level of pyramid
+    k_min = cfg.FPN.RPN_MIN_LEVEL  # finest level of pyramid
+
+    model.AddMetrics(['retnet_fg_num', 'retnet_bg_num'])
+    # ==========================================================================
+    # bbox regression loss - SelectSmoothL1Loss for multiple anchors at a location
+    # ==========================================================================
+    for lvl in range(k_min, k_max + 1):
+        suffix = 'fpn{}'.format(lvl)
+        bbox_loss = model.net.SelectSmoothL1Loss(
+            [
+                'retnet_bbox_pred_' + suffix,
+                'retnet_roi_bbox_targets_' + suffix,
+                'retnet_roi_fg_bbox_locs_' + suffix, 'retnet_fg_num'
+            ],
+            'retnet_loss_bbox_' + suffix,
+            beta=cfg.RETINANET.BBOX_REG_BETA,
+            scale=model.GetLossScale() * cfg.RETINANET.BBOX_REG_WEIGHT
+        )
+        gradients.append(bbox_loss)
+        losses.append('retnet_loss_bbox_' + suffix)
+
+    # ==========================================================================
+    # cls loss - depends on softmax/sigmoid outputs
+    # ==========================================================================
+    for lvl in range(k_min, k_max + 1):
+        suffix = 'fpn{}'.format(lvl)
+        cls_lvl_logits = 'retnet_cls_pred_' + suffix
+        if not cfg.RETINANET.SOFTMAX:
+            cls_focal_loss = model.net.SigmoidFocalLoss(
+                [
+                    cls_lvl_logits, 'retnet_cls_labels_' + suffix,
+                    'retnet_fg_num'
+                ],
+                ['fl_{}'.format(suffix)],
+                gamma=cfg.RETINANET.LOSS_GAMMA,
+                alpha=cfg.RETINANET.LOSS_ALPHA,
+                scale=model.GetLossScale(),
+                num_classes=model.num_classes - 1
+            )
+            gradients.append(cls_focal_loss)
+            losses.append('fl_{}'.format(suffix))
+        else:
+            cls_focal_loss, gated_prob = model.net.SoftmaxFocalLoss(
+                [
+                    cls_lvl_logits, 'retnet_cls_labels_' + suffix,
+                    'retnet_fg_num'
+                ],
+                ['fl_{}'.format(suffix), 'retnet_prob_{}'.format(suffix)],
+                gamma=cfg.RETINANET.LOSS_GAMMA,
+                alpha=cfg.RETINANET.LOSS_ALPHA,
+                scale=model.GetLossScale(),
+                num_classes=model.num_classes
+            )
+            gradients.append(cls_focal_loss)
+            losses.append('fl_{}'.format(suffix))
+
+    loss_gradients.update(blob_utils.get_loss_gradients(model, gradients))
+    model.AddLosses(losses)
+    return loss_gradients
diff --git a/detectron/modeling/rfcn_heads.py b/detectron/modeling/rfcn_heads.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b54addfcba549cae3b0e744e81237753753d7cb
--- /dev/null
+++ b/detectron/modeling/rfcn_heads.py
@@ -0,0 +1,99 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from detectron.core.config import cfg
+from detectron.utils.c2 import const_fill
+from detectron.utils.c2 import gauss_fill
+
+
+# ---------------------------------------------------------------------------- #
+# R-FCN outputs and losses
+# ---------------------------------------------------------------------------- #
+
+def add_rfcn_outputs(model, blob_in, dim_in, dim_reduce, spatial_scale):
+    if dim_reduce is not None:
+        # Optional dim reduction
+        blob_in = model.Conv(
+            blob_in,
+            'conv_dim_reduce',
+            dim_in,
+            dim_reduce,
+            kernel=1,
+            pad=0,
+            stride=1,
+            weight_init=gauss_fill(0.01),
+            bias_init=const_fill(0.0)
+        )
+        blob_in = model.Relu(blob_in, blob_in)
+        dim_in = dim_reduce
+    # Classification conv
+    model.Conv(
+        blob_in,
+        'conv_cls',
+        dim_in,
+        model.num_classes * cfg.RFCN.PS_GRID_SIZE**2,
+        kernel=1,
+        pad=0,
+        stride=1,
+        weight_init=gauss_fill(0.01),
+        bias_init=const_fill(0.0)
+    )
+    # Bounding-box regression conv
+    num_bbox_reg_classes = (
+        2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes
+    )
+    model.Conv(
+        blob_in,
+        'conv_bbox_pred',
+        dim_in,
+        4 * num_bbox_reg_classes * cfg.RFCN.PS_GRID_SIZE**2,
+        kernel=1,
+        pad=0,
+        stride=1,
+        weight_init=gauss_fill(0.01),
+        bias_init=const_fill(0.0)
+    )
+    # Classification PS RoI pooling
+    model.net.PSRoIPool(
+        ['conv_cls', 'rois'], ['psroipooled_cls', '_mapping_channel_cls'],
+        group_size=cfg.RFCN.PS_GRID_SIZE,
+        output_dim=model.num_classes,
+        spatial_scale=spatial_scale
+    )
+    model.AveragePool(
+        'psroipooled_cls', 'cls_score_4d', kernel=cfg.RFCN.PS_GRID_SIZE
+    )
+    model.net.Reshape(
+        'cls_score_4d', ['cls_score', '_cls_scores_shape'],
+        shape=(-1, cfg.MODEL.NUM_CLASSES)
+    )
+    if not model.train:
+        model.Softmax('cls_score', 'cls_prob', engine='CUDNN')
+    # Bbox regression PS RoI pooling
+    model.net.PSRoIPool(
+        ['conv_bbox_pred', 'rois'],
+        ['psroipooled_bbox', '_mapping_channel_bbox'],
+        group_size=cfg.RFCN.PS_GRID_SIZE,
+        output_dim=4 * num_bbox_reg_classes,
+        spatial_scale=spatial_scale
+    )
+    model.AveragePool(
+        'psroipooled_bbox', 'bbox_pred', kernel=cfg.RFCN.PS_GRID_SIZE
+    )
diff --git a/detectron/modeling/rpn_heads.py b/detectron/modeling/rpn_heads.py
new file mode 100644
index 0000000000000000000000000000000000000000..1f0a9b5ae7e3af5026e18c11b03a5c4c0e941f2c
--- /dev/null
+++ b/detectron/modeling/rpn_heads.py
@@ -0,0 +1,154 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from detectron.core.config import cfg
+from detectron.modeling.generate_anchors import generate_anchors
+from detectron.utils.c2 import const_fill
+from detectron.utils.c2 import gauss_fill
+import detectron.modeling.FPN as FPN
+import detectron.utils.blob as blob_utils
+
+
+# ---------------------------------------------------------------------------- #
+# RPN and Faster R-CNN outputs and losses
+# ---------------------------------------------------------------------------- #
+
+def add_generic_rpn_outputs(model, blob_in, dim_in, spatial_scale_in):
+    """Add RPN outputs (objectness classification and bounding box regression)
+    to an RPN model. Abstracts away the use of FPN.
+    """
+    loss_gradients = None
+    if cfg.FPN.FPN_ON:
+        # Delegate to the FPN module
+        FPN.add_fpn_rpn_outputs(model, blob_in, dim_in, spatial_scale_in)
+        if cfg.MODEL.FASTER_RCNN:
+            # CollectAndDistributeFpnRpnProposals also labels proposals when in
+            # training mode
+            model.CollectAndDistributeFpnRpnProposals()
+        if model.train:
+            loss_gradients = FPN.add_fpn_rpn_losses(model)
+    else:
+        # Not using FPN, add RPN to a single scale
+        add_single_scale_rpn_outputs(model, blob_in, dim_in, spatial_scale_in)
+        if model.train:
+            loss_gradients = add_single_scale_rpn_losses(model)
+    return loss_gradients
+
+
+def add_single_scale_rpn_outputs(model, blob_in, dim_in, spatial_scale):
+    """Add RPN outputs to a single scale model (i.e., no FPN)."""
+    anchors = generate_anchors(
+        stride=1. / spatial_scale,
+        sizes=cfg.RPN.SIZES,
+        aspect_ratios=cfg.RPN.ASPECT_RATIOS
+    )
+    num_anchors = anchors.shape[0]
+    dim_out = dim_in
+    # RPN hidden representation
+    model.Conv(
+        blob_in,
+        'conv_rpn',
+        dim_in,
+        dim_out,
+        kernel=3,
+        pad=1,
+        stride=1,
+        weight_init=gauss_fill(0.01),
+        bias_init=const_fill(0.0)
+    )
+    model.Relu('conv_rpn', 'conv_rpn')
+    # Proposal classification scores
+    model.Conv(
+        'conv_rpn',
+        'rpn_cls_logits',
+        dim_in,
+        num_anchors,
+        kernel=1,
+        pad=0,
+        stride=1,
+        weight_init=gauss_fill(0.01),
+        bias_init=const_fill(0.0)
+    )
+    # Proposal bbox regression deltas
+    model.Conv(
+        'conv_rpn',
+        'rpn_bbox_pred',
+        dim_in,
+        4 * num_anchors,
+        kernel=1,
+        pad=0,
+        stride=1,
+        weight_init=gauss_fill(0.01),
+        bias_init=const_fill(0.0)
+    )
+
+    if not model.train or cfg.MODEL.FASTER_RCNN:
+        # Proposals are needed during:
+        #  1) inference (== not model.train) for RPN only and Faster R-CNN
+        #  OR
+        #  2) training for Faster R-CNN
+        # Otherwise (== training for RPN only), proposals are not needed
+        model.net.Sigmoid('rpn_cls_logits', 'rpn_cls_probs')
+        model.GenerateProposals(
+            ['rpn_cls_probs', 'rpn_bbox_pred', 'im_info'],
+            ['rpn_rois', 'rpn_roi_probs'],
+            anchors=anchors,
+            spatial_scale=spatial_scale
+        )
+
+    if cfg.MODEL.FASTER_RCNN:
+        if model.train:
+            # Add op that generates training labels for in-network RPN proposals
+            model.GenerateProposalLabels(['rpn_rois', 'roidb', 'im_info'])
+        else:
+            # Alias rois to rpn_rois for inference
+            model.net.Alias('rpn_rois', 'rois')
+
+
+def add_single_scale_rpn_losses(model):
+    """Add losses for a single scale RPN model (i.e., no FPN)."""
+    # Spatially narrow the full-sized RPN label arrays to match the feature map
+    # shape
+    model.net.SpatialNarrowAs(
+        ['rpn_labels_int32_wide', 'rpn_cls_logits'], 'rpn_labels_int32'
+    )
+    for key in ('targets', 'inside_weights', 'outside_weights'):
+        model.net.SpatialNarrowAs(
+            ['rpn_bbox_' + key + '_wide', 'rpn_bbox_pred'], 'rpn_bbox_' + key
+        )
+    loss_rpn_cls = model.net.SigmoidCrossEntropyLoss(
+        ['rpn_cls_logits', 'rpn_labels_int32'],
+        'loss_rpn_cls',
+        scale=model.GetLossScale()
+    )
+    loss_rpn_bbox = model.net.SmoothL1Loss(
+        [
+            'rpn_bbox_pred', 'rpn_bbox_targets', 'rpn_bbox_inside_weights',
+            'rpn_bbox_outside_weights'
+        ],
+        'loss_rpn_bbox',
+        beta=1. / 9.,
+        scale=model.GetLossScale()
+    )
+    loss_gradients = blob_utils.get_loss_gradients(
+        model, [loss_rpn_cls, loss_rpn_bbox]
+    )
+    model.AddLosses(['loss_rpn_cls', 'loss_rpn_bbox'])
+    return loss_gradients
diff --git a/detectron/ops/__init__.py b/detectron/ops/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/detectron/ops/collect_and_distribute_fpn_rpn_proposals.py b/detectron/ops/collect_and_distribute_fpn_rpn_proposals.py
new file mode 100644
index 0000000000000000000000000000000000000000..76c5ed3bdceef3ad7ec7386121544bf719c48869
--- /dev/null
+++ b/detectron/ops/collect_and_distribute_fpn_rpn_proposals.py
@@ -0,0 +1,113 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import numpy as np
+
+from detectron.core.config import cfg
+from detectron.datasets import json_dataset
+from detectron.datasets import roidb as roidb_utils
+import detectron.modeling.FPN as fpn
+import detectron.roi_data.fast_rcnn as fast_rcnn_roi_data
+import detectron.utils.blob as blob_utils
+
+
+class CollectAndDistributeFpnRpnProposalsOp(object):
+    def __init__(self, train):
+        self._train = train
+
+    def forward(self, inputs, outputs):
+        """See modeling.detector.CollectAndDistributeFpnRpnProposals for
+        inputs/outputs documentation.
+        """
+        # inputs is
+        # [rpn_rois_fpn2, ..., rpn_rois_fpn6,
+        #  rpn_roi_probs_fpn2, ..., rpn_roi_probs_fpn6]
+        # If training with Faster R-CNN, then inputs will additionally include
+        #  + [roidb, im_info]
+        rois = collect(inputs, self._train)
+        if self._train:
+            # During training we reuse the data loader code. We populate roidb
+            # entries on the fly using the rois generated by RPN.
+            # im_info: [[im_height, im_width, im_scale], ...]
+            im_info = inputs[-1].data
+            im_scales = im_info[:, 2]
+            roidb = blob_utils.deserialize(inputs[-2].data)
+            # For historical consistency with the original Faster R-CNN
+            # implementation we are *not* filtering crowd proposals.
+            # This choice should be investigated in the future (it likely does
+            # not matter).
+            json_dataset.add_proposals(roidb, rois, im_scales, crowd_thresh=0)
+            roidb_utils.add_bbox_regression_targets(roidb)
+            # Compute training labels for the RPN proposals; also handles
+            # distributing the proposals over FPN levels
+            output_blob_names = fast_rcnn_roi_data.get_fast_rcnn_blob_names()
+            blobs = {k: [] for k in output_blob_names}
+            fast_rcnn_roi_data.add_fast_rcnn_blobs(blobs, im_scales, roidb)
+            for i, k in enumerate(output_blob_names):
+                blob_utils.py_op_copy_blob(blobs[k], outputs[i])
+        else:
+            # For inference we have a special code path that avoids some data
+            # loader overhead
+            distribute(rois, None, outputs, self._train)
+
+
+def collect(inputs, is_training):
+    cfg_key = 'TRAIN' if is_training else 'TEST'
+    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
+    k_max = cfg.FPN.RPN_MAX_LEVEL
+    k_min = cfg.FPN.RPN_MIN_LEVEL
+    num_lvls = k_max - k_min + 1
+    roi_inputs = inputs[:num_lvls]
+    score_inputs = inputs[num_lvls:]
+    if is_training:
+        score_inputs = score_inputs[:-2]
+
+    # rois are in [[batch_idx, x0, y0, x1, y2], ...] format
+    # Combine predictions across all levels and retain the top scoring
+    rois = np.concatenate([blob.data for blob in roi_inputs])
+    scores = np.concatenate([blob.data for blob in score_inputs]).squeeze()
+    inds = np.argsort(-scores)[:post_nms_topN]
+    rois = rois[inds, :]
+    return rois
+
+
+def distribute(rois, label_blobs, outputs, train):
+    """To understand the output blob order see return value of
+    detectron.roi_data.fast_rcnn.get_fast_rcnn_blob_names(is_training=False)
+    """
+    lvl_min = cfg.FPN.ROI_MIN_LEVEL
+    lvl_max = cfg.FPN.ROI_MAX_LEVEL
+    lvls = fpn.map_rois_to_fpn_levels(rois[:, 1:5], lvl_min, lvl_max)
+
+    outputs[0].reshape(rois.shape)
+    outputs[0].data[...] = rois
+
+    # Create new roi blobs for each FPN level
+    # (See: modeling.FPN.add_multilevel_roi_blobs which is similar but annoying
+    # to generalize to support this particular case.)
+    rois_idx_order = np.empty((0, ))
+    for output_idx, lvl in enumerate(range(lvl_min, lvl_max + 1)):
+        idx_lvl = np.where(lvls == lvl)[0]
+        blob_roi_level = rois[idx_lvl, :]
+        outputs[output_idx + 1].reshape(blob_roi_level.shape)
+        outputs[output_idx + 1].data[...] = blob_roi_level
+        rois_idx_order = np.concatenate((rois_idx_order, idx_lvl))
+    rois_idx_restore = np.argsort(rois_idx_order)
+    blob_utils.py_op_copy_blob(rois_idx_restore.astype(np.int32), outputs[-1])
diff --git a/detectron/ops/generate_proposal_labels.py b/detectron/ops/generate_proposal_labels.py
new file mode 100644
index 0000000000000000000000000000000000000000..c1231c2f79b257ebc541a981836bd042fd4520d5
--- /dev/null
+++ b/detectron/ops/generate_proposal_labels.py
@@ -0,0 +1,54 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import logging
+
+from detectron.datasets import json_dataset
+from detectron.datasets import roidb as roidb_utils
+from detectron.utils import blob as blob_utils
+import detectron.roi_data.fast_rcnn as fast_rcnn_roi_data
+
+logger = logging.getLogger(__name__)
+
+
+class GenerateProposalLabelsOp(object):
+
+    def forward(self, inputs, outputs):
+        """See modeling.detector.GenerateProposalLabels for inputs/outputs
+        documentation.
+        """
+        # During training we reuse the data loader code. We populate roidb
+        # entries on the fly using the rois generated by RPN.
+        # im_info: [[im_height, im_width, im_scale], ...]
+        rois = inputs[0].data
+        roidb = blob_utils.deserialize(inputs[1].data)
+        im_info = inputs[2].data
+        im_scales = im_info[:, 2]
+        output_blob_names = fast_rcnn_roi_data.get_fast_rcnn_blob_names()
+        # For historical consistency with the original Faster R-CNN
+        # implementation we are *not* filtering crowd proposals.
+        # This choice should be investigated in the future (it likely does
+        # not matter).
+        json_dataset.add_proposals(roidb, rois, im_scales, crowd_thresh=0)
+        roidb_utils.add_bbox_regression_targets(roidb)
+        blobs = {k: [] for k in output_blob_names}
+        fast_rcnn_roi_data.add_fast_rcnn_blobs(blobs, im_scales, roidb)
+        for i, k in enumerate(output_blob_names):
+            blob_utils.py_op_copy_blob(blobs[k], outputs[i])
diff --git a/detectron/ops/generate_proposals.py b/detectron/ops/generate_proposals.py
new file mode 100644
index 0000000000000000000000000000000000000000..7238c07a202d1ffd6237b4c831cc6bc38d24ac45
--- /dev/null
+++ b/detectron/ops/generate_proposals.py
@@ -0,0 +1,197 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+#
+# Based on:
+# --------------------------------------------------------
+# Faster R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick and Sean Bell
+# --------------------------------------------------------
+
+import numpy as np
+
+from detectron.core.config import cfg
+import detectron.utils.boxes as box_utils
+
+
+class GenerateProposalsOp(object):
+    """Output object detection proposals by applying estimated bounding-box
+    transformations to a set of regular boxes (called "anchors").
+
+    See comment in utils/boxes:bbox_transform_inv for details abouts the
+    optional `reg_weights` parameter.
+    """
+
+    def __init__(self, anchors, spatial_scale, train, reg_weights=(1.0, 1.0, 1.0, 1.0)):
+        self._anchors = anchors
+        self._num_anchors = self._anchors.shape[0]
+        self._feat_stride = 1. / spatial_scale
+        self._train = train
+        self._reg_weights = reg_weights
+
+    def forward(self, inputs, outputs):
+        """See modeling.detector.GenerateProposals for inputs/outputs
+        documentation.
+        """
+        # 1. for each location i in a (H, W) grid:
+        #      generate A anchor boxes centered on cell i
+        #      apply predicted bbox deltas to each of the A anchors at cell i
+        # 2. clip predicted boxes to image
+        # 3. remove predicted boxes with either height or width < threshold
+        # 4. sort all (proposal, score) pairs by score from highest to lowest
+        # 5. take the top pre_nms_topN proposals before NMS
+        # 6. apply NMS with a loose threshold (0.7) to the remaining proposals
+        # 7. take after_nms_topN proposals after NMS
+        # 8. return the top proposals
+
+        # predicted probability of fg object for each RPN anchor
+        scores = inputs[0].data
+        # predicted achors transformations
+        bbox_deltas = inputs[1].data
+        # input image (height, width, scale), in which scale is the scale factor
+        # applied to the original dataset image to get the network input image
+        im_info = inputs[2].data
+        # 1. Generate proposals from bbox deltas and shifted anchors
+        height, width = scores.shape[-2:]
+        # Enumerate all shifted positions on the (H, W) grid
+        shift_x = np.arange(0, width) * self._feat_stride
+        shift_y = np.arange(0, height) * self._feat_stride
+        shift_x, shift_y = np.meshgrid(shift_x, shift_y, copy=False)
+        # Convert to (K, 4), K=H*W, where the columns are (dx, dy, dx, dy)
+        # shift pointing to each grid location
+        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
+                            shift_x.ravel(), shift_y.ravel())).transpose()
+
+        # Broacast anchors over shifts to enumerate all anchors at all positions
+        # in the (H, W) grid:
+        #   - add A anchors of shape (1, A, 4) to
+        #   - K shifts of shape (K, 1, 4) to get
+        #   - all shifted anchors of shape (K, A, 4)
+        #   - reshape to (K*A, 4) shifted anchors
+        num_images = inputs[0].shape[0]
+        A = self._num_anchors
+        K = shifts.shape[0]
+        all_anchors = self._anchors[np.newaxis, :, :] + shifts[:, np.newaxis, :]
+        all_anchors = all_anchors.reshape((K * A, 4))
+
+        rois = np.empty((0, 5), dtype=np.float32)
+        roi_probs = np.empty((0, 1), dtype=np.float32)
+        for im_i in range(num_images):
+            im_i_boxes, im_i_probs = self.proposals_for_one_image(
+                im_info[im_i, :], all_anchors, bbox_deltas[im_i, :, :, :],
+                scores[im_i, :, :, :]
+            )
+            batch_inds = im_i * np.ones(
+                (im_i_boxes.shape[0], 1), dtype=np.float32
+            )
+            im_i_rois = np.hstack((batch_inds, im_i_boxes))
+            rois = np.append(rois, im_i_rois, axis=0)
+            roi_probs = np.append(roi_probs, im_i_probs, axis=0)
+
+        outputs[0].reshape(rois.shape)
+        outputs[0].data[...] = rois
+        if len(outputs) > 1:
+            outputs[1].reshape(roi_probs.shape)
+            outputs[1].data[...] = roi_probs
+
+    def proposals_for_one_image(
+        self, im_info, all_anchors, bbox_deltas, scores
+    ):
+        # Get mode-dependent configuration
+        cfg_key = 'TRAIN' if self._train else 'TEST'
+        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
+        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
+        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
+        min_size = cfg[cfg_key].RPN_MIN_SIZE
+        # Transpose and reshape predicted bbox transformations to get them
+        # into the same order as the anchors:
+        #   - bbox deltas will be (4 * A, H, W) format from conv output
+        #   - transpose to (H, W, 4 * A)
+        #   - reshape to (H * W * A, 4) where rows are ordered by (H, W, A)
+        #     in slowest to fastest order to match the enumerated anchors
+        bbox_deltas = bbox_deltas.transpose((1, 2, 0)).reshape((-1, 4))
+
+        # Same story for the scores:
+        #   - scores are (A, H, W) format from conv output
+        #   - transpose to (H, W, A)
+        #   - reshape to (H * W * A, 1) where rows are ordered by (H, W, A)
+        #     to match the order of anchors and bbox_deltas
+        scores = scores.transpose((1, 2, 0)).reshape((-1, 1))
+
+        # 4. sort all (proposal, score) pairs by score from highest to lowest
+        # 5. take top pre_nms_topN (e.g. 6000)
+        if pre_nms_topN <= 0 or pre_nms_topN >= len(scores):
+            order = np.argsort(-scores.squeeze())
+        else:
+            # Avoid sorting possibly large arrays; First partition to get top K
+            # unsorted and then sort just those (~20x faster for 200k scores)
+            inds = np.argpartition(
+                -scores.squeeze(), pre_nms_topN
+            )[:pre_nms_topN]
+            order = np.argsort(-scores[inds].squeeze())
+            order = inds[order]
+        bbox_deltas = bbox_deltas[order, :]
+        all_anchors = all_anchors[order, :]
+        scores = scores[order]
+
+        # Transform anchors into proposals via bbox transformations
+        proposals = box_utils.bbox_transform(all_anchors, bbox_deltas, self._reg_weights)
+
+        # 2. clip proposals to image (may result in proposals with zero area
+        # that will be removed in the next step)
+        proposals = box_utils.clip_tiled_boxes(proposals, im_info[:2])
+
+        # 3. remove predicted boxes with either height or width < min_size
+        keep = _filter_boxes(proposals, min_size, im_info)
+        proposals = proposals[keep, :]
+        scores = scores[keep]
+
+        # 6. apply loose nms (e.g. threshold = 0.7)
+        # 7. take after_nms_topN (e.g. 300)
+        # 8. return the top proposals (-> RoIs top)
+        if nms_thresh > 0:
+            keep = box_utils.nms(np.hstack((proposals, scores)), nms_thresh)
+            if post_nms_topN > 0:
+                keep = keep[:post_nms_topN]
+            proposals = proposals[keep, :]
+            scores = scores[keep]
+        return proposals, scores
+
+
+def _filter_boxes(boxes, min_size, im_info):
+    """Only keep boxes with both sides >= min_size and center within the image.
+    """
+    # Compute the width and height of the proposal boxes as measured in the original
+    # image coordinate system (this is required to avoid "Negative Areas Found"
+    # assertions in other parts of the code that measure).
+    im_scale = im_info[2]
+    ws_orig_scale = (boxes[:, 2] - boxes[:, 0]) / im_scale + 1
+    hs_orig_scale = (boxes[:, 3] - boxes[:, 1]) / im_scale + 1
+    # To avoid numerical issues we require the min_size to be at least 1 pixel in the
+    # original image
+    min_size = np.maximum(min_size, 1)
+    # Proposal center is computed relative to the scaled input image
+    ws = boxes[:, 2] - boxes[:, 0] + 1
+    hs = boxes[:, 3] - boxes[:, 1] + 1
+    x_ctr = boxes[:, 0] + ws / 2.
+    y_ctr = boxes[:, 1] + hs / 2.
+    keep = np.where(
+        (ws_orig_scale >= min_size)
+        & (hs_orig_scale >= min_size)
+        & (x_ctr < im_info[1])
+        & (y_ctr < im_info[0])
+    )[0]
+    return keep
diff --git a/detectron/ops/zero_even_op.cc b/detectron/ops/zero_even_op.cc
new file mode 100644
index 0000000000000000000000000000000000000000..0b77fb24d436f24b68016465cdbf44b9627944b4
--- /dev/null
+++ b/detectron/ops/zero_even_op.cc
@@ -0,0 +1,54 @@
+/**
+ * Copyright (c) 2016-present, Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "zero_even_op.h"
+
+namespace caffe2 {
+
+template <>
+bool ZeroEvenOp<float, CPUContext>::RunOnDevice() {
+  // Retrieve the input tensor.
+  const auto& X = Input(0);
+  CAFFE_ENFORCE(X.dim() == 1);
+
+  // Initialize the output tensor to a copy of the input tensor.
+  auto* Y = Output(0);
+  Y->CopyFrom(X);
+
+  // Set output elements at even indices to zero.
+  auto* Y_data = Y->mutable_data<float>();
+  for (auto i = 0; i < Y->numel(); i += 2) {
+    Y_data[i] = 0.0f;
+  }
+
+  return true;
+}
+
+REGISTER_CPU_OPERATOR(ZeroEven, ZeroEvenOp<float, CPUContext>);
+
+OPERATOR_SCHEMA(ZeroEven)
+    .NumInputs(1)
+    .NumOutputs(1)
+    .Input(
+        0,
+        "X",
+        "1D input tensor")
+    .Output(
+        0,
+        "Y",
+        "1D output tensor");
+
+} // namespace caffe2
diff --git a/detectron/ops/zero_even_op.cu b/detectron/ops/zero_even_op.cu
new file mode 100644
index 0000000000000000000000000000000000000000..a606727d9a5a5c063116834c071553c4501c0f80
--- /dev/null
+++ b/detectron/ops/zero_even_op.cu
@@ -0,0 +1,64 @@
+/**
+ * Copyright (c) 2016-present, Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "caffe2/core/context_gpu.h"
+
+#include "zero_even_op.h"
+
+namespace caffe2 {
+
+namespace {
+
+template <typename T>
+__global__ void SetEvenIndsToVal(size_t num_even_inds, T val, T* data) {
+  CUDA_1D_KERNEL_LOOP(i, num_even_inds) {
+    data[i << 1] = val;
+  }
+}
+
+} // namespace
+
+template <>
+bool ZeroEvenOp<float, CUDAContext>::RunOnDevice() {
+  // Retrieve the input tensor.
+  const auto& X = Input(0);
+  CAFFE_ENFORCE(X.ndim() == 1);
+
+  // Initialize the output tensor to a copy of the input tensor.
+  auto* Y = Output(0);
+  Y->CopyFrom(X);
+
+  // Set output elements at even indices to zero.
+  auto output_size = Y->size();
+
+  if (output_size > 0) {
+    size_t num_even_inds = output_size / 2 + output_size % 2;
+    SetEvenIndsToVal<float>
+        <<<CAFFE_GET_BLOCKS(num_even_inds),
+           CAFFE_CUDA_NUM_THREADS,
+           0,
+           context_.cuda_stream()>>>(
+            num_even_inds,
+            0.0f,
+            Y->mutable_data<float>());
+  }
+
+  return true;
+}
+
+REGISTER_CUDA_OPERATOR(ZeroEven, ZeroEvenOp<float, CUDAContext>);
+
+} // namespace caffe2
diff --git a/detectron/ops/zero_even_op.h b/detectron/ops/zero_even_op.h
new file mode 100644
index 0000000000000000000000000000000000000000..6aa3da8bc709d9f4c6870e8edfa0844510a1d10d
--- /dev/null
+++ b/detectron/ops/zero_even_op.h
@@ -0,0 +1,46 @@
+/**
+ * Copyright (c) 2016-present, Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ZERO_EVEN_OP_H_
+#define ZERO_EVEN_OP_H_
+
+#include "caffe2/core/context.h"
+#include "caffe2/core/operator.h"
+
+namespace caffe2 {
+
+/**
+ * ZeroEven operator. Zeros elements at even indices of an 1D array.
+ * Elements at odd indices are preserved.
+ *
+ * This toy operator is an example of a custom operator and may be a useful
+ * reference for adding new custom operators to the Detectron codebase.
+ */
+template <typename T, class Context>
+class ZeroEvenOp final : public Operator<Context> {
+ public:
+  // Introduce Operator<Context> helper members.
+  USE_OPERATOR_CONTEXT_FUNCTIONS;
+
+  ZeroEvenOp(const OperatorDef& operator_def, Workspace* ws)
+      : Operator<Context>(operator_def, ws) {}
+
+  bool RunOnDevice() override;
+};
+
+} // namespace caffe2
+
+#endif // ZERO_EVEN_OP_H_
diff --git a/detectron/roi_data/__init__.py b/detectron/roi_data/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/detectron/roi_data/data_utils.py b/detectron/roi_data/data_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..54de8a35228534784ec52d58912e5d0ea42f0941
--- /dev/null
+++ b/detectron/roi_data/data_utils.py
@@ -0,0 +1,125 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Common utility functions for RPN and RetinaNet minibtach blobs preparation.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from collections import namedtuple
+import logging
+import numpy as np
+import threading
+
+from detectron.core.config import cfg
+from detectron.modeling.generate_anchors import generate_anchors
+import detectron.utils.boxes as box_utils
+
+logger = logging.getLogger(__name__)
+
+
+# octave and aspect fields are only used on RetinaNet. Octave corresponds to the
+# scale of the anchor and aspect denotes which aspect ratio is used in the range
+# of aspect ratios
+FieldOfAnchors = namedtuple(
+    'FieldOfAnchors', [
+        'field_of_anchors', 'num_cell_anchors', 'stride', 'field_size',
+        'octave', 'aspect'
+    ]
+)
+
+# Cache for memoizing _get_field_of_anchors
+_threadlocal_foa = threading.local()
+
+
+def get_field_of_anchors(
+    stride, anchor_sizes, anchor_aspect_ratios, octave=None, aspect=None
+):
+    global _threadlocal_foa
+    if not hasattr(_threadlocal_foa, 'cache'):
+        _threadlocal_foa.cache = {}
+
+    cache_key = str(stride) + str(anchor_sizes) + str(anchor_aspect_ratios)
+    if cache_key in _threadlocal_foa.cache:
+        return _threadlocal_foa.cache[cache_key]
+
+    # Anchors at a single feature cell
+    cell_anchors = generate_anchors(
+        stride=stride, sizes=anchor_sizes, aspect_ratios=anchor_aspect_ratios
+    )
+    num_cell_anchors = cell_anchors.shape[0]
+
+    # Generate canonical proposals from shifted anchors
+    # Enumerate all shifted positions on the (H, W) grid
+    fpn_max_size = cfg.FPN.COARSEST_STRIDE * np.ceil(
+        cfg.TRAIN.MAX_SIZE / float(cfg.FPN.COARSEST_STRIDE)
+    )
+    field_size = int(np.ceil(fpn_max_size / float(stride)))
+    shifts = np.arange(0, field_size) * stride
+    shift_x, shift_y = np.meshgrid(shifts, shifts)
+    shift_x = shift_x.ravel()
+    shift_y = shift_y.ravel()
+    shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose()
+
+    # Broacast anchors over shifts to enumerate all anchors at all positions
+    # in the (H, W) grid:
+    #   - add A cell anchors of shape (1, A, 4) to
+    #   - K shifts of shape (K, 1, 4) to get
+    #   - all shifted anchors of shape (K, A, 4)
+    #   - reshape to (K*A, 4) shifted anchors
+    A = num_cell_anchors
+    K = shifts.shape[0]
+    field_of_anchors = (
+        cell_anchors.reshape((1, A, 4)) +
+        shifts.reshape((1, K, 4)).transpose((1, 0, 2))
+    )
+    field_of_anchors = field_of_anchors.reshape((K * A, 4))
+    foa = FieldOfAnchors(
+        field_of_anchors=field_of_anchors.astype(np.float32),
+        num_cell_anchors=num_cell_anchors,
+        stride=stride,
+        field_size=field_size,
+        octave=octave,
+        aspect=aspect
+    )
+    _threadlocal_foa.cache[cache_key] = foa
+    return foa
+
+
+def unmap(data, count, inds, fill=0):
+    """Unmap a subset of item (data) back to the original set of items (of
+    size count)"""
+    if count == len(inds):
+        return data
+
+    if len(data.shape) == 1:
+        ret = np.empty((count, ), dtype=data.dtype)
+        ret.fill(fill)
+        ret[inds] = data
+    else:
+        ret = np.empty((count, ) + data.shape[1:], dtype=data.dtype)
+        ret.fill(fill)
+        ret[inds, :] = data
+    return ret
+
+
+def compute_targets(ex_rois, gt_rois, weights=(1.0, 1.0, 1.0, 1.0)):
+    """Compute bounding-box regression targets for an image."""
+    return box_utils.bbox_transform_inv(ex_rois, gt_rois, weights).astype(
+        np.float32, copy=False
+    )
diff --git a/detectron/roi_data/fast_rcnn.py b/detectron/roi_data/fast_rcnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..56e96987b7ea2fa4f4ba11573d24ccb687707ae5
--- /dev/null
+++ b/detectron/roi_data/fast_rcnn.py
@@ -0,0 +1,265 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Construct minibatches for Fast R-CNN training. Handles the minibatch blobs
+that are specific to Fast R-CNN. Other blobs that are generic to RPN, etc.
+are handled by their respecitive roi_data modules.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import logging
+import numpy as np
+import numpy.random as npr
+
+from detectron.core.config import cfg
+import detectron.modeling.FPN as fpn
+import detectron.roi_data.keypoint_rcnn as keypoint_rcnn_roi_data
+import detectron.roi_data.mask_rcnn as mask_rcnn_roi_data
+import detectron.utils.blob as blob_utils
+import detectron.utils.boxes as box_utils
+
+logger = logging.getLogger(__name__)
+
+
+def get_fast_rcnn_blob_names(is_training=True):
+    """Fast R-CNN blob names."""
+    # rois blob: holds R regions of interest, each is a 5-tuple
+    # (batch_idx, x1, y1, x2, y2) specifying an image batch index and a
+    # rectangle (x1, y1, x2, y2)
+    blob_names = ['rois']
+    if is_training:
+        # labels_int32 blob: R categorical labels in [0, ..., K] for K
+        # foreground classes plus background
+        blob_names += ['labels_int32']
+    if is_training:
+        # bbox_targets blob: R bounding-box regression targets with 4
+        # targets per class
+        blob_names += ['bbox_targets']
+        # bbox_inside_weights blob: At most 4 targets per roi are active
+        # this binary vector sepcifies the subset of active targets
+        blob_names += ['bbox_inside_weights']
+        blob_names += ['bbox_outside_weights']
+    if is_training and cfg.MODEL.MASK_ON:
+        # 'mask_rois': RoIs sampled for training the mask prediction branch.
+        # Shape is (#masks, 5) in format (batch_idx, x1, y1, x2, y2).
+        blob_names += ['mask_rois']
+        # 'roi_has_mask': binary labels for the RoIs specified in 'rois'
+        # indicating if each RoI has a mask or not. Note that in some cases
+        # a *bg* RoI will have an all -1 (ignore) mask associated with it in
+        # the case that no fg RoIs can be sampled. Shape is (batchsize).
+        blob_names += ['roi_has_mask_int32']
+        # 'masks_int32' holds binary masks for the RoIs specified in
+        # 'mask_rois'. Shape is (#fg, M * M) where M is the ground truth
+        # mask size.
+        blob_names += ['masks_int32']
+    if is_training and cfg.MODEL.KEYPOINTS_ON:
+        # 'keypoint_rois': RoIs sampled for training the keypoint prediction
+        # branch. Shape is (#instances, 5) in format (batch_idx, x1, y1, x2,
+        # y2).
+        blob_names += ['keypoint_rois']
+        # 'keypoint_locations_int32': index of keypoint in
+        # KRCNN.HEATMAP_SIZE**2 sized array. Shape is (#instances). Used in
+        # SoftmaxWithLoss.
+        blob_names += ['keypoint_locations_int32']
+        # 'keypoint_weights': weight assigned to each target in
+        # 'keypoint_locations_int32'. Shape is (#instances). Used in
+        # SoftmaxWithLoss.
+        blob_names += ['keypoint_weights']
+        # 'keypoint_loss_normalizer': optional normalization factor to use if
+        # cfg.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS is False.
+        blob_names += ['keypoint_loss_normalizer']
+    if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_ROIS:
+        # Support for FPN multi-level rois without bbox reg isn't
+        # implemented (... and may never be implemented)
+        k_max = cfg.FPN.ROI_MAX_LEVEL
+        k_min = cfg.FPN.ROI_MIN_LEVEL
+        # Same format as rois blob, but one per FPN level
+        for lvl in range(k_min, k_max + 1):
+            blob_names += ['rois_fpn' + str(lvl)]
+        blob_names += ['rois_idx_restore_int32']
+        if is_training:
+            if cfg.MODEL.MASK_ON:
+                for lvl in range(k_min, k_max + 1):
+                    blob_names += ['mask_rois_fpn' + str(lvl)]
+                blob_names += ['mask_rois_idx_restore_int32']
+            if cfg.MODEL.KEYPOINTS_ON:
+                for lvl in range(k_min, k_max + 1):
+                    blob_names += ['keypoint_rois_fpn' + str(lvl)]
+                blob_names += ['keypoint_rois_idx_restore_int32']
+    return blob_names
+
+
+def add_fast_rcnn_blobs(blobs, im_scales, roidb):
+    """Add blobs needed for training Fast R-CNN style models."""
+    # Sample training RoIs from each image and append them to the blob lists
+    for im_i, entry in enumerate(roidb):
+        frcn_blobs = _sample_rois(entry, im_scales[im_i], im_i)
+        for k, v in frcn_blobs.items():
+            blobs[k].append(v)
+    # Concat the training blob lists into tensors
+    for k, v in blobs.items():
+        if isinstance(v, list) and len(v) > 0:
+            blobs[k] = np.concatenate(v)
+    # Add FPN multilevel training RoIs, if configured
+    if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_ROIS:
+        _add_multilevel_rois(blobs)
+
+    # Perform any final work and validity checks after the collating blobs for
+    # all minibatch images
+    valid = True
+    if cfg.MODEL.KEYPOINTS_ON:
+        valid = keypoint_rcnn_roi_data.finalize_keypoint_minibatch(blobs, valid)
+
+    return valid
+
+
+def _sample_rois(roidb, im_scale, batch_idx):
+    """Generate a random sample of RoIs comprising foreground and background
+    examples.
+    """
+    rois_per_image = int(cfg.TRAIN.BATCH_SIZE_PER_IM)
+    fg_rois_per_image = int(np.round(cfg.TRAIN.FG_FRACTION * rois_per_image))
+    max_overlaps = roidb['max_overlaps']
+
+    # Select foreground RoIs as those with >= FG_THRESH overlap
+    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
+    # Guard against the case when an image has fewer than fg_rois_per_image
+    # foreground RoIs
+    fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_inds.size)
+    # Sample foreground regions without replacement
+    if fg_inds.size > 0:
+        fg_inds = npr.choice(
+            fg_inds, size=fg_rois_per_this_image, replace=False
+        )
+
+    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
+    bg_inds = np.where(
+        (max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
+        (max_overlaps >= cfg.TRAIN.BG_THRESH_LO)
+    )[0]
+    # Compute number of background RoIs to take from this image (guarding
+    # against there being fewer than desired)
+    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
+    bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_inds.size)
+    # Sample foreground regions without replacement
+    if bg_inds.size > 0:
+        bg_inds = npr.choice(
+            bg_inds, size=bg_rois_per_this_image, replace=False
+        )
+
+    # The indices that we're selecting (both fg and bg)
+    keep_inds = np.append(fg_inds, bg_inds)
+    # Label is the class each RoI has max overlap with
+    sampled_labels = roidb['max_classes'][keep_inds]
+    sampled_labels[fg_rois_per_this_image:] = 0  # Label bg RoIs with class 0
+    sampled_boxes = roidb['boxes'][keep_inds]
+
+    bbox_targets, bbox_inside_weights = _expand_bbox_targets(
+        roidb['bbox_targets'][keep_inds, :]
+    )
+    bbox_outside_weights = np.array(
+        bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype
+    )
+
+    # Scale rois and format as (batch_idx, x1, y1, x2, y2)
+    sampled_rois = sampled_boxes * im_scale
+    repeated_batch_idx = batch_idx * blob_utils.ones((sampled_rois.shape[0], 1))
+    sampled_rois = np.hstack((repeated_batch_idx, sampled_rois))
+
+    # Base Fast R-CNN blobs
+    blob_dict = dict(
+        labels_int32=sampled_labels.astype(np.int32, copy=False),
+        rois=sampled_rois,
+        bbox_targets=bbox_targets,
+        bbox_inside_weights=bbox_inside_weights,
+        bbox_outside_weights=bbox_outside_weights
+    )
+
+    # Optionally add Mask R-CNN blobs
+    if cfg.MODEL.MASK_ON:
+        mask_rcnn_roi_data.add_mask_rcnn_blobs(
+            blob_dict, sampled_boxes, roidb, im_scale, batch_idx
+        )
+
+    # Optionally add Keypoint R-CNN blobs
+    if cfg.MODEL.KEYPOINTS_ON:
+        keypoint_rcnn_roi_data.add_keypoint_rcnn_blobs(
+            blob_dict, roidb, fg_rois_per_image, fg_inds, im_scale, batch_idx
+        )
+
+    return blob_dict
+
+
+def _expand_bbox_targets(bbox_target_data):
+    """Bounding-box regression targets are stored in a compact form in the
+    roidb.
+
+    This function expands those targets into the 4-of-4*K representation used
+    by the network (i.e. only one class has non-zero targets). The loss weights
+    are similarly expanded.
+
+    Returns:
+        bbox_target_data (ndarray): N x 4K blob of regression targets
+        bbox_inside_weights (ndarray): N x 4K blob of loss weights
+    """
+    num_bbox_reg_classes = cfg.MODEL.NUM_CLASSES
+    if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
+        num_bbox_reg_classes = 2  # bg and fg
+
+    clss = bbox_target_data[:, 0]
+    bbox_targets = blob_utils.zeros((clss.size, 4 * num_bbox_reg_classes))
+    bbox_inside_weights = blob_utils.zeros(bbox_targets.shape)
+    inds = np.where(clss > 0)[0]
+    for ind in inds:
+        cls = int(clss[ind])
+        start = 4 * cls
+        end = start + 4
+        bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
+        bbox_inside_weights[ind, start:end] = (1.0, 1.0, 1.0, 1.0)
+    return bbox_targets, bbox_inside_weights
+
+
+def _add_multilevel_rois(blobs):
+    """By default training RoIs are added for a single feature map level only.
+    When using FPN, the RoIs must be distributed over different FPN levels
+    according the level assignment heuristic (see: modeling.FPN.
+    map_rois_to_fpn_levels).
+    """
+    lvl_min = cfg.FPN.ROI_MIN_LEVEL
+    lvl_max = cfg.FPN.ROI_MAX_LEVEL
+
+    def _distribute_rois_over_fpn_levels(rois_blob_name):
+        """Distribute rois over the different FPN levels."""
+        # Get target level for each roi
+        # Recall blob rois are in (batch_idx, x1, y1, x2, y2) format, hence take
+        # the box coordinates from columns 1:5
+        target_lvls = fpn.map_rois_to_fpn_levels(
+            blobs[rois_blob_name][:, 1:5], lvl_min, lvl_max
+        )
+        # Add per FPN level roi blobs named like: <rois_blob_name>_fpn<lvl>
+        fpn.add_multilevel_roi_blobs(
+            blobs, rois_blob_name, blobs[rois_blob_name], target_lvls, lvl_min,
+            lvl_max
+        )
+
+    _distribute_rois_over_fpn_levels('rois')
+    if cfg.MODEL.MASK_ON:
+        _distribute_rois_over_fpn_levels('mask_rois')
+    if cfg.MODEL.KEYPOINTS_ON:
+        _distribute_rois_over_fpn_levels('keypoint_rois')
diff --git a/detectron/roi_data/keypoint_rcnn.py b/detectron/roi_data/keypoint_rcnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..b1aa3dd54f057f97fb255380fbd518bfbc113f20
--- /dev/null
+++ b/detectron/roi_data/keypoint_rcnn.py
@@ -0,0 +1,129 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Construct minibatches for Mask R-CNN training when keypoints are enabled.
+Handles the minibatch blobs that are specific to training Mask R-CNN for
+keypoint detection. Other blobs that are generic to RPN or Fast/er R-CNN are
+handled by their respecitive roi_data modules.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import logging
+import numpy as np
+
+from detectron.core.config import cfg
+import detectron.utils.blob as blob_utils
+import detectron.utils.keypoints as keypoint_utils
+
+logger = logging.getLogger(__name__)
+
+
+def add_keypoint_rcnn_blobs(
+    blobs, roidb, fg_rois_per_image, fg_inds, im_scale, batch_idx
+):
+    """Add Mask R-CNN keypoint specific blobs to the given blobs dictionary."""
+    # Note: gt_inds must match how they're computed in
+    # datasets.json_dataset._merge_proposal_boxes_into_roidb
+    gt_inds = np.where(roidb['gt_classes'] > 0)[0]
+    max_overlaps = roidb['max_overlaps']
+    gt_keypoints = roidb['gt_keypoints']
+
+    ind_kp = gt_inds[roidb['box_to_gt_ind_map']]
+    within_box = _within_box(gt_keypoints[ind_kp, :, :], roidb['boxes'])
+    vis_kp = gt_keypoints[ind_kp, 2, :] > 0
+    is_visible = np.sum(np.logical_and(vis_kp, within_box), axis=1) > 0
+    kp_fg_inds = np.where(
+        np.logical_and(max_overlaps >= cfg.TRAIN.FG_THRESH, is_visible)
+    )[0]
+
+    kp_fg_rois_per_this_image = np.minimum(fg_rois_per_image, kp_fg_inds.size)
+    if kp_fg_inds.size > kp_fg_rois_per_this_image:
+        kp_fg_inds = np.random.choice(
+            kp_fg_inds, size=kp_fg_rois_per_this_image, replace=False
+        )
+
+    sampled_fg_rois = roidb['boxes'][kp_fg_inds]
+    box_to_gt_ind_map = roidb['box_to_gt_ind_map'][kp_fg_inds]
+
+    num_keypoints = gt_keypoints.shape[2]
+    sampled_keypoints = -np.ones(
+        (len(sampled_fg_rois), gt_keypoints.shape[1], num_keypoints),
+        dtype=gt_keypoints.dtype
+    )
+    for ii in range(len(sampled_fg_rois)):
+        ind = box_to_gt_ind_map[ii]
+        if ind >= 0:
+            sampled_keypoints[ii, :, :] = gt_keypoints[gt_inds[ind], :, :]
+            assert np.sum(sampled_keypoints[ii, 2, :]) > 0
+
+    heats, weights = keypoint_utils.keypoints_to_heatmap_labels(
+        sampled_keypoints, sampled_fg_rois
+    )
+
+    shape = (sampled_fg_rois.shape[0] * cfg.KRCNN.NUM_KEYPOINTS, 1)
+    heats = heats.reshape(shape)
+    weights = weights.reshape(shape)
+
+    sampled_fg_rois *= im_scale
+    repeated_batch_idx = batch_idx * blob_utils.ones(
+        (sampled_fg_rois.shape[0], 1)
+    )
+    sampled_fg_rois = np.hstack((repeated_batch_idx, sampled_fg_rois))
+
+    blobs['keypoint_rois'] = sampled_fg_rois
+    blobs['keypoint_locations_int32'] = heats.astype(np.int32, copy=False)
+    blobs['keypoint_weights'] = weights
+
+
+def finalize_keypoint_minibatch(blobs, valid):
+    """Finalize the minibatch after blobs for all minibatch images have been
+    collated.
+    """
+    min_count = cfg.KRCNN.MIN_KEYPOINT_COUNT_FOR_VALID_MINIBATCH
+    num_visible_keypoints = np.sum(blobs['keypoint_weights'])
+    valid = (
+        valid and len(blobs['keypoint_weights']) > 0 and
+        num_visible_keypoints > min_count
+    )
+    # Normalizer to use if cfg.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS is False.
+    # See modeling.model_builder.add_keypoint_losses
+    norm = num_visible_keypoints / (
+        cfg.TRAIN.IMS_PER_BATCH * cfg.TRAIN.BATCH_SIZE_PER_IM *
+        cfg.TRAIN.FG_FRACTION * cfg.KRCNN.NUM_KEYPOINTS
+    )
+    blobs['keypoint_loss_normalizer'] = np.array(norm, dtype=np.float32)
+    return valid
+
+
+def _within_box(points, boxes):
+    """Validate which keypoints are contained inside a given box.
+
+    points: Nx2xK
+    boxes: Nx4
+    output: NxK
+    """
+    x_within = np.logical_and(
+        points[:, 0, :] >= np.expand_dims(boxes[:, 0], axis=1),
+        points[:, 0, :] <= np.expand_dims(boxes[:, 2], axis=1)
+    )
+    y_within = np.logical_and(
+        points[:, 1, :] >= np.expand_dims(boxes[:, 1], axis=1),
+        points[:, 1, :] <= np.expand_dims(boxes[:, 3], axis=1)
+    )
+    return np.logical_and(x_within, y_within)
diff --git a/detectron/roi_data/loader.py b/detectron/roi_data/loader.py
new file mode 100644
index 0000000000000000000000000000000000000000..71cb18f5e9f1f2179f16f5f04f358dfc929ebc8e
--- /dev/null
+++ b/detectron/roi_data/loader.py
@@ -0,0 +1,295 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Detectron data loader. The design is generic and abstracted away from any
+details of the minibatch. A minibatch is a dictionary of blob name keys and
+their associated numpy (float32 or int32) ndarray values.
+
+Outline of the data loader design:
+
+loader thread\
+loader thread \                    / GPU 1 enqueue thread -> feed -> EnqueueOp
+...           -> minibatch queue ->  ...
+loader thread /                    \ GPU N enqueue thread -> feed -> EnqueueOp
+loader thread/
+
+<---------------------------- CPU -----------------------------|---- GPU ---->
+
+A pool of loader threads construct minibatches that are put onto the shared
+minibatch queue. Each GPU has an enqueue thread that pulls a minibatch off the
+minibatch queue, feeds the minibatch blobs into the workspace, and then runs
+an EnqueueBlobsOp to place the minibatch blobs into the GPU's blobs queue.
+During each fprop the first thing the network does is run a DequeueBlobsOp
+in order to populate the workspace with the blobs from a queued minibatch.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from collections import deque
+from collections import OrderedDict
+import logging
+import numpy as np
+import signal
+import threading
+import time
+import uuid
+from six.moves import queue as Queue
+
+from caffe2.python import core, workspace
+
+from detectron.core.config import cfg
+from detectron.roi_data.minibatch import get_minibatch
+from detectron.roi_data.minibatch import get_minibatch_blob_names
+from detectron.utils.coordinator import coordinated_get
+from detectron.utils.coordinator import coordinated_put
+from detectron.utils.coordinator import Coordinator
+import detectron.utils.c2 as c2_utils
+
+logger = logging.getLogger(__name__)
+
+
+class RoIDataLoader(object):
+    def __init__(
+        self,
+        roidb,
+        num_loaders=4,
+        minibatch_queue_size=64,
+        blobs_queue_capacity=8
+    ):
+        self._roidb = roidb
+        self._lock = threading.Lock()
+        self._perm = deque(range(len(self._roidb)))
+        self._cur = 0  # _perm cursor
+        # The minibatch queue holds prepared training data in host (CPU) memory
+        # When training with N > 1 GPUs, each element in the minibatch queue
+        # is actually a partial minibatch which contributes 1 / N of the
+        # examples to the overall minibatch
+        self._minibatch_queue = Queue.Queue(maxsize=minibatch_queue_size)
+        self._blobs_queue_capacity = blobs_queue_capacity
+        # Random queue name in case one instantiates multple RoIDataLoaders
+        self._loader_id = uuid.uuid4()
+        self._blobs_queue_name = 'roi_blobs_queue_{}'.format(self._loader_id)
+        # Loader threads construct (partial) minibatches and put them on the
+        # minibatch queue
+        self._num_loaders = num_loaders
+        self._num_gpus = cfg.NUM_GPUS
+        self.coordinator = Coordinator()
+
+        self._output_names = get_minibatch_blob_names()
+        self._shuffle_roidb_inds()
+        self.create_threads()
+
+    def minibatch_loader_thread(self):
+        """Load mini-batches and put them onto the mini-batch queue."""
+        with self.coordinator.stop_on_exception():
+            while not self.coordinator.should_stop():
+                blobs = self.get_next_minibatch()
+                # Blobs must be queued in the order specified by
+                # self.get_output_names
+                ordered_blobs = OrderedDict()
+                for key in self.get_output_names():
+                    assert blobs[key].dtype in (np.int32, np.float32), \
+                        'Blob {} of dtype {} must have dtype of ' \
+                        'np.int32 or np.float32'.format(key, blobs[key].dtype)
+                    ordered_blobs[key] = blobs[key]
+                coordinated_put(
+                    self.coordinator, self._minibatch_queue, ordered_blobs
+                )
+        logger.info('Stopping mini-batch loading thread')
+
+    def enqueue_blobs_thread(self, gpu_id, blob_names):
+        """Transfer mini-batches from a mini-batch queue to a BlobsQueue."""
+        with self.coordinator.stop_on_exception():
+            while not self.coordinator.should_stop():
+                if self._minibatch_queue.qsize == 0:
+                    logger.warning('Mini-batch queue is empty')
+                blobs = coordinated_get(self.coordinator, self._minibatch_queue)
+                self.enqueue_blobs(gpu_id, blob_names, blobs.values())
+                logger.debug(
+                    'batch queue size {}'.format(self._minibatch_queue.qsize())
+                )
+            logger.info('Stopping enqueue thread')
+
+    def get_next_minibatch(self):
+        """Return the blobs to be used for the next minibatch. Thread safe."""
+        valid = False
+        while not valid:
+            db_inds = self._get_next_minibatch_inds()
+            minibatch_db = [self._roidb[i] for i in db_inds]
+            blobs, valid = get_minibatch(minibatch_db)
+        return blobs
+
+    def _shuffle_roidb_inds(self):
+        """Randomly permute the training roidb. Not thread safe."""
+        if cfg.TRAIN.ASPECT_GROUPING:
+            widths = np.array([r['width'] for r in self._roidb])
+            heights = np.array([r['height'] for r in self._roidb])
+            horz = (widths >= heights)
+            vert = np.logical_not(horz)
+            horz_inds = np.where(horz)[0]
+            vert_inds = np.where(vert)[0]
+
+            horz_inds = np.random.permutation(horz_inds)
+            vert_inds = np.random.permutation(vert_inds)
+            mb = cfg.TRAIN.IMS_PER_BATCH
+            horz_inds = horz_inds[:(len(horz_inds) // mb) * mb]
+            vert_inds = vert_inds[:(len(vert_inds) // mb) * mb]
+            inds = np.hstack((horz_inds, vert_inds))
+
+            inds = np.reshape(inds, (-1, mb))
+            row_perm = np.random.permutation(np.arange(inds.shape[0]))
+            inds = np.reshape(inds[row_perm, :], (-1, ))
+            self._perm = inds
+        else:
+            self._perm = np.random.permutation(np.arange(len(self._roidb)))
+        self._perm = deque(self._perm)
+        self._cur = 0
+
+    def _get_next_minibatch_inds(self):
+        """Return the roidb indices for the next minibatch. Thread safe."""
+        with self._lock:
+            # We use a deque and always take the *first* IMS_PER_BATCH items
+            # followed by *rotating* the deque so that we see fresh items
+            # each time. If the length of _perm is not divisible by
+            # IMS_PER_BATCH, then we end up wrapping around the permutation.
+            db_inds = [self._perm[i] for i in range(cfg.TRAIN.IMS_PER_BATCH)]
+            self._perm.rotate(-cfg.TRAIN.IMS_PER_BATCH)
+            self._cur += cfg.TRAIN.IMS_PER_BATCH
+            if self._cur >= len(self._perm):
+                self._shuffle_roidb_inds()
+        return db_inds
+
+    def get_output_names(self):
+        return self._output_names
+
+    def enqueue_blobs(self, gpu_id, blob_names, blobs):
+        """Put a mini-batch on a BlobsQueue."""
+        assert len(blob_names) == len(blobs)
+        t = time.time()
+        dev = c2_utils.CudaDevice(gpu_id)
+        queue_name = 'gpu_{}/{}'.format(gpu_id, self._blobs_queue_name)
+        blob_names = ['gpu_{}/{}'.format(gpu_id, b) for b in blob_names]
+        for (blob_name, blob) in zip(blob_names, blobs):
+            workspace.FeedBlob(blob_name, blob, device_option=dev)
+        logger.debug(
+            'enqueue_blobs {}: workspace.FeedBlob: {}'.
+            format(gpu_id, time.time() - t)
+        )
+        t = time.time()
+        op = core.CreateOperator(
+            'SafeEnqueueBlobs', [queue_name] + blob_names,
+            blob_names + [queue_name + '_enqueue_status'],
+            device_option=dev
+        )
+        workspace.RunOperatorOnce(op)
+        logger.debug(
+            'enqueue_blobs {}: workspace.RunOperatorOnce: {}'.
+            format(gpu_id, time.time() - t)
+        )
+
+    def create_threads(self):
+        # Create mini-batch loader threads, each of which builds mini-batches
+        # and places them into a queue in CPU memory
+        self._workers = [
+            threading.Thread(target=self.minibatch_loader_thread)
+            for _ in range(self._num_loaders)
+        ]
+
+        # Create one BlobsQueue per GPU
+        # (enqueue_blob_names are unscoped)
+        enqueue_blob_names = self.create_blobs_queues()
+
+        # Create one enqueuer thread per GPU
+        self._enqueuers = [
+            threading.Thread(
+                target=self.enqueue_blobs_thread,
+                args=(gpu_id, enqueue_blob_names)
+            ) for gpu_id in range(self._num_gpus)
+        ]
+
+    def start(self, prefill=False):
+        for w in self._workers + self._enqueuers:
+            w.setDaemon(True)
+            w.start()
+        if prefill:
+            logger.info('Pre-filling mini-batch queue...')
+            while not self._minibatch_queue.full():
+                logger.info(
+                    '  [{:d}/{:d}]'.format(
+                        self._minibatch_queue.qsize(),
+                        self._minibatch_queue.maxsize
+                    )
+                )
+                time.sleep(0.1)
+                # Detect failure and shutdown
+                if self.coordinator.should_stop():
+                    self.shutdown()
+                    break
+
+    def has_stopped(self):
+        return self.coordinator.should_stop()
+
+    def shutdown(self):
+        self.coordinator.request_stop()
+        self.coordinator.wait_for_stop()
+        self.close_blobs_queues()
+        for w in self._workers + self._enqueuers:
+            w.join()
+
+    def create_blobs_queues(self):
+        """Create one BlobsQueue for each GPU to hold mini-batches."""
+        for gpu_id in range(self._num_gpus):
+            with c2_utils.GpuNameScope(gpu_id):
+                workspace.RunOperatorOnce(
+                    core.CreateOperator(
+                        'CreateBlobsQueue', [], [self._blobs_queue_name],
+                        num_blobs=len(self.get_output_names()),
+                        capacity=self._blobs_queue_capacity
+                    )
+                )
+        return self.create_enqueue_blobs()
+
+    def close_blobs_queues(self):
+        """Close a BlobsQueue."""
+        for gpu_id in range(self._num_gpus):
+            with core.NameScope('gpu_{}'.format(gpu_id)):
+                workspace.RunOperatorOnce(
+                    core.CreateOperator(
+                        'CloseBlobsQueue', [self._blobs_queue_name], []
+                    )
+                )
+
+    def create_enqueue_blobs(self):
+        blob_names = self.get_output_names()
+        enqueue_blob_names = [
+            '{}_enqueue_{}'.format(b, self._loader_id) for b in blob_names
+        ]
+        for gpu_id in range(self._num_gpus):
+            with c2_utils.NamedCudaScope(gpu_id):
+                for blob in enqueue_blob_names:
+                    workspace.CreateBlob(core.ScopedName(blob))
+        return enqueue_blob_names
+
+    def register_sigint_handler(self):
+        def signal_handler(signal, frame):
+            logger.info(
+                'SIGINT: Shutting down RoIDataLoader threads and exiting...'
+            )
+            self.shutdown()
+
+        signal.signal(signal.SIGINT, signal_handler)
diff --git a/detectron/roi_data/mask_rcnn.py b/detectron/roi_data/mask_rcnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..8be01bb39e5149624302b40735c80d3c3e039561
--- /dev/null
+++ b/detectron/roi_data/mask_rcnn.py
@@ -0,0 +1,126 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Construct minibatches for Mask R-CNN training. Handles the minibatch blobs
+that are specific to Mask R-CNN. Other blobs that are generic to RPN or
+Fast/er R-CNN are handled by their respecitive roi_data modules.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import logging
+import numpy as np
+
+from detectron.core.config import cfg
+import detectron.utils.blob as blob_utils
+import detectron.utils.boxes as box_utils
+import detectron.utils.segms as segm_utils
+
+logger = logging.getLogger(__name__)
+
+
+def add_mask_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx):
+    """Add Mask R-CNN specific blobs to the input blob dictionary."""
+    # Prepare the mask targets by associating one gt mask to each training roi
+    # that has a fg (non-bg) class label.
+    M = cfg.MRCNN.RESOLUTION
+    polys_gt_inds = np.where(
+        (roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0)
+    )[0]
+    polys_gt = [roidb['segms'][i] for i in polys_gt_inds]
+    boxes_from_polys = segm_utils.polys_to_boxes(polys_gt)
+    fg_inds = np.where(blobs['labels_int32'] > 0)[0]
+    roi_has_mask = blobs['labels_int32'].copy()
+    roi_has_mask[roi_has_mask > 0] = 1
+
+    if fg_inds.shape[0] > 0:
+        # Class labels for the foreground rois
+        mask_class_labels = blobs['labels_int32'][fg_inds]
+        masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True)
+
+        # Find overlap between all foreground rois and the bounding boxes
+        # enclosing each segmentation
+        rois_fg = sampled_boxes[fg_inds]
+        overlaps_bbfg_bbpolys = box_utils.bbox_overlaps(
+            rois_fg.astype(np.float32, copy=False),
+            boxes_from_polys.astype(np.float32, copy=False)
+        )
+        # Map from each fg rois to the index of the mask with highest overlap
+        # (measured by bbox overlap)
+        fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1)
+
+        # add fg targets
+        for i in range(rois_fg.shape[0]):
+            fg_polys_ind = fg_polys_inds[i]
+            poly_gt = polys_gt[fg_polys_ind]
+            roi_fg = rois_fg[i]
+            # Rasterize the portion of the polygon mask within the given fg roi
+            # to an M x M binary image
+            mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M)
+            mask = np.array(mask > 0, dtype=np.int32)  # Ensure it's binary
+            masks[i, :] = np.reshape(mask, M**2)
+    else:  # If there are no fg masks (it does happen)
+        # The network cannot handle empty blobs, so we must provide a mask
+        # We simply take the first bg roi, given it an all -1's mask (ignore
+        # label), and label it with class zero (bg).
+        bg_inds = np.where(blobs['labels_int32'] == 0)[0]
+        # rois_fg is actually one background roi, but that's ok because ...
+        rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1))
+        # We give it an -1's blob (ignore label)
+        masks = -blob_utils.ones((1, M**2), int32=True)
+        # We label it with class = 0 (background)
+        mask_class_labels = blob_utils.zeros((1, ))
+        # Mark that the first roi has a mask
+        roi_has_mask[0] = 1
+
+    if cfg.MRCNN.CLS_SPECIFIC_MASK:
+        masks = _expand_to_class_specific_mask_targets(masks, mask_class_labels)
+
+    # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2)
+    rois_fg *= im_scale
+    repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1))
+    rois_fg = np.hstack((repeated_batch_idx, rois_fg))
+
+    # Update blobs dict with Mask R-CNN blobs
+    blobs['mask_rois'] = rois_fg
+    blobs['roi_has_mask_int32'] = roi_has_mask
+    blobs['masks_int32'] = masks
+
+
+def _expand_to_class_specific_mask_targets(masks, mask_class_labels):
+    """Expand masks from shape (#masks, M ** 2) to (#masks, #classes * M ** 2)
+    to encode class specific mask targets.
+    """
+    assert masks.shape[0] == mask_class_labels.shape[0]
+    M = cfg.MRCNN.RESOLUTION
+
+    # Target values of -1 are "don't care" / ignore labels
+    mask_targets = -blob_utils.ones(
+        (masks.shape[0], cfg.MODEL.NUM_CLASSES * M**2), int32=True
+    )
+
+    for i in range(masks.shape[0]):
+        cls = int(mask_class_labels[i])
+        start = M**2 * cls
+        end = start + M**2
+        # Ignore background instance
+        # (only happens when there is no fg samples in an image)
+        if cls > 0:
+            mask_targets[i, start:end] = masks[i, :]
+
+    return mask_targets
diff --git a/detectron/roi_data/minibatch.py b/detectron/roi_data/minibatch.py
new file mode 100644
index 0000000000000000000000000000000000000000..a680be92a1e9a9e939004ec25bafa293258a0e14
--- /dev/null
+++ b/detectron/roi_data/minibatch.py
@@ -0,0 +1,116 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+#
+# Based on:
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+
+"""Construct minibatches for Detectron networks."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import cv2
+import logging
+import numpy as np
+
+from detectron.core.config import cfg
+import detectron.roi_data.fast_rcnn as fast_rcnn_roi_data
+import detectron.roi_data.retinanet as retinanet_roi_data
+import detectron.roi_data.rpn as rpn_roi_data
+import detectron.utils.blob as blob_utils
+
+logger = logging.getLogger(__name__)
+
+
+def get_minibatch_blob_names(is_training=True):
+    """Return blob names in the order in which they are read by the data loader.
+    """
+    # data blob: holds a batch of N images, each with 3 channels
+    blob_names = ['data']
+    if cfg.RPN.RPN_ON:
+        # RPN-only or end-to-end Faster R-CNN
+        blob_names += rpn_roi_data.get_rpn_blob_names(is_training=is_training)
+    elif cfg.RETINANET.RETINANET_ON:
+        blob_names += retinanet_roi_data.get_retinanet_blob_names(
+            is_training=is_training
+        )
+    else:
+        # Fast R-CNN like models trained on precomputed proposals
+        blob_names += fast_rcnn_roi_data.get_fast_rcnn_blob_names(
+            is_training=is_training
+        )
+    return blob_names
+
+
+def get_minibatch(roidb):
+    """Given a roidb, construct a minibatch sampled from it."""
+    # We collect blobs from each image onto a list and then concat them into a
+    # single tensor, hence we initialize each blob to an empty list
+    blobs = {k: [] for k in get_minibatch_blob_names()}
+    # Get the input image blob, formatted for caffe2
+    im_blob, im_scales = _get_image_blob(roidb)
+    blobs['data'] = im_blob
+    if cfg.RPN.RPN_ON:
+        # RPN-only or end-to-end Faster/Mask R-CNN
+        valid = rpn_roi_data.add_rpn_blobs(blobs, im_scales, roidb)
+    elif cfg.RETINANET.RETINANET_ON:
+        im_width, im_height = im_blob.shape[3], im_blob.shape[2]
+        # im_width, im_height corresponds to the network input: padded image
+        # (if needed) width and height. We pass it as input and slice the data
+        # accordingly so that we don't need to use SampleAsOp
+        valid = retinanet_roi_data.add_retinanet_blobs(
+            blobs, im_scales, roidb, im_width, im_height
+        )
+    else:
+        # Fast R-CNN like models trained on precomputed proposals
+        valid = fast_rcnn_roi_data.add_fast_rcnn_blobs(blobs, im_scales, roidb)
+    return blobs, valid
+
+
+def _get_image_blob(roidb):
+    """Builds an input blob from the images in the roidb at the specified
+    scales.
+    """
+    num_images = len(roidb)
+    # Sample random scales to use for each image in this batch
+    scale_inds = np.random.randint(
+        0, high=len(cfg.TRAIN.SCALES), size=num_images
+    )
+    processed_ims = []
+    im_scales = []
+    for i in range(num_images):
+        im = cv2.imread(roidb[i]['image'])
+        assert im is not None, \
+            'Failed to read image \'{}\''.format(roidb[i]['image'])
+        if roidb[i]['flipped']:
+            im = im[:, ::-1, :]
+        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
+        im, im_scale = blob_utils.prep_im_for_blob(
+            im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE
+        )
+        im_scales.append(im_scale)
+        processed_ims.append(im)
+
+    # Create a blob to hold the input images
+    blob = blob_utils.im_list_to_blob(processed_ims)
+
+    return blob, im_scales
diff --git a/detectron/roi_data/retinanet.py b/detectron/roi_data/retinanet.py
new file mode 100644
index 0000000000000000000000000000000000000000..b737042cd022913233be6f52432ae382353dee4a
--- /dev/null
+++ b/detectron/roi_data/retinanet.py
@@ -0,0 +1,288 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Compute minibatch blobs for training a RetinaNet network."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import numpy as np
+import logging
+
+import detectron.utils.boxes as box_utils
+import detectron.roi_data.data_utils as data_utils
+from detectron.core.config import cfg
+
+
+logger = logging.getLogger(__name__)
+
+
+def get_retinanet_blob_names(is_training=True):
+    """
+    Returns blob names in the order in which they are read by the data
+    loader.
+
+    N = number of images per minibatch
+    A = number of anchors = num_scales * num_aspect_ratios
+        (for example 9 used in RetinaNet paper)
+    H, W = spatial dimensions (different for each FPN level)
+    M = Out of all the anchors generated, depending on the positive/negative IoU
+        overlap thresholds, we will have M positive anchors. These are the anchors
+        that bounding box branch will regress on.
+
+    retnet_cls_labels -> labels for the cls branch for each FPN level
+                         Shape: N x A x H x W
+
+    retnet_roi_bbox_targets -> targets for the bbox regression branch
+                               Shape: M x 4
+
+    retnet_roi_fg_bbox_locs -> for the bbox regression, since we are only
+                               interested in regressing on fg bboxes which are
+                               M in number and the output prediction of the network
+                               is of shape N x (A * 4) x H x W
+                               (in case of non class-specific bbox), so we
+                               store the locations of positive fg boxes in this
+                               blob retnet_roi_fg_bbox_locs of shape M x 4 where
+                               each row looks like: [img_id, anchor_id, x_loc, y_loc]
+    """
+    # im_info: (height, width, image scale)
+    blob_names = ['im_info']
+    assert cfg.FPN.FPN_ON, "RetinaNet uses FPN for dense detection"
+    # Same format as RPN blobs, but one per FPN level
+    if is_training:
+        blob_names += ['retnet_fg_num', 'retnet_bg_num']
+        for lvl in range(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL + 1):
+            suffix = 'fpn{}'.format(lvl)
+            blob_names += [
+                'retnet_cls_labels_' + suffix,
+                'retnet_roi_bbox_targets_' + suffix,
+                'retnet_roi_fg_bbox_locs_' + suffix,
+            ]
+    return blob_names
+
+
+def add_retinanet_blobs(blobs, im_scales, roidb, image_width, image_height):
+    """Add RetinaNet blobs."""
+    # RetinaNet is applied to many feature levels, as in the FPN paper
+    k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
+    scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
+    num_aspect_ratios = len(cfg.RETINANET.ASPECT_RATIOS)
+    aspect_ratios = cfg.RETINANET.ASPECT_RATIOS
+    anchor_scale = cfg.RETINANET.ANCHOR_SCALE
+
+    # get anchors from all levels for all scales/aspect ratios
+    foas = []
+    for lvl in range(k_min, k_max + 1):
+        stride = 2. ** lvl
+        for octave in range(scales_per_octave):
+            octave_scale = 2 ** (octave / float(scales_per_octave))
+            for idx in range(num_aspect_ratios):
+                anchor_sizes = (stride * octave_scale * anchor_scale, )
+                anchor_aspect_ratios = (aspect_ratios[idx], )
+                foa = data_utils.get_field_of_anchors(
+                    stride, anchor_sizes, anchor_aspect_ratios, octave, idx)
+                foas.append(foa)
+    all_anchors = np.concatenate([f.field_of_anchors for f in foas])
+
+    blobs['retnet_fg_num'], blobs['retnet_bg_num'] = 0.0, 0.0
+    for im_i, entry in enumerate(roidb):
+        scale = im_scales[im_i]
+        im_height = np.round(entry['height'] * scale)
+        im_width = np.round(entry['width'] * scale)
+        gt_inds = np.where(
+            (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0]
+        assert len(gt_inds) > 0, \
+            'Empty ground truth empty for image is not allowed. Please check.'
+
+        gt_rois = entry['boxes'][gt_inds, :] * scale
+        gt_classes = entry['gt_classes'][gt_inds]
+
+        im_info = np.array([[im_height, im_width, scale]], dtype=np.float32)
+        blobs['im_info'].append(im_info)
+
+        retinanet_blobs, fg_num, bg_num = _get_retinanet_blobs(
+            foas, all_anchors, gt_rois, gt_classes, image_width, image_height)
+        for i, foa in enumerate(foas):
+            for k, v in retinanet_blobs[i].items():
+                # the way it stacks is:
+                # [[anchors for image1] + [anchors for images 2]]
+                level = int(np.log2(foa.stride))
+                key = '{}_fpn{}'.format(k, level)
+                if k == 'retnet_roi_fg_bbox_locs':
+                    v[:, 0] = im_i
+                    # loc_stride: 80 * 4 if cls_specific else 4
+                    loc_stride = 4  # 4 coordinate corresponding to bbox prediction
+                    if cfg.RETINANET.CLASS_SPECIFIC_BBOX:
+                        loc_stride *= (cfg.MODEL.NUM_CLASSES - 1)
+                    anchor_ind = foa.octave * num_aspect_ratios + foa.aspect
+                    # v[:, 1] is the class label [range 0-80] if we do
+                    # class-specfic bbox otherwise it is 0. In case of class
+                    # specific, based on the label, the location of current
+                    # anchor is class_label * 4 and then we take into account
+                    # the anchor_ind if the anchors
+                    v[:, 1] *= 4
+                    v[:, 1] += loc_stride * anchor_ind
+                blobs[key].append(v)
+        blobs['retnet_fg_num'] += fg_num
+        blobs['retnet_bg_num'] += bg_num
+
+    blobs['retnet_fg_num'] = blobs['retnet_fg_num'].astype(np.float32)
+    blobs['retnet_bg_num'] = blobs['retnet_bg_num'].astype(np.float32)
+
+    N = len(roidb)
+    for k, v in blobs.items():
+        if isinstance(v, list) and len(v) > 0:
+            # compute number of anchors
+            A = int(len(v) / N)
+            # for the cls branch labels [per fpn level],
+            # we have blobs['retnet_cls_labels_fpn{}'] as a list until this step
+            # and length of this list is N x A where
+            # N = num_images, A = num_anchors for example, N = 2, A = 9
+            # Each element of the list has the shape 1 x 1 x H x W where H, W are
+            # spatial dimension of curret fpn lvl. Let a{i} denote the element
+            # corresponding to anchor i [9 anchors total] in the list.
+            # The elements in the list are in order [[a0, ..., a9], [a0, ..., a9]]
+            # however the network will make predictions like 2 x (9 * 80) x H x W
+            # so we first concatenate the elements of each image to a numpy array
+            # and then concatenate the two images to get the 2 x 9 x H x W
+
+            if k.find('retnet_cls_labels') >= 0:
+                tmp = []
+                # concat anchors within an image
+                for i in range(0, len(v), A):
+                    tmp.append(np.concatenate(v[i: i + A], axis=1))
+                # concat images
+                blobs[k] = np.concatenate(tmp, axis=0)
+            else:
+                # for the bbox branch elements [per FPN level],
+                #  we have the targets and the fg boxes locations
+                # in the shape: M x 4 where M is the number of fg locations in a
+                # given image at the current FPN level. For the given level,
+                # the bbox predictions will be. The elements in the list are in
+                # order [[a0, ..., a9], [a0, ..., a9]]
+                # Concatenate them to form M x 4
+                blobs[k] = np.concatenate(v, axis=0)
+    return True
+
+
+def _get_retinanet_blobs(
+        foas, all_anchors, gt_boxes, gt_classes, im_width, im_height):
+    total_anchors = all_anchors.shape[0]
+    logger.debug('Getting mad blobs: im_height {} im_width: {}'.format(
+        im_height, im_width))
+
+    inds_inside = np.arange(all_anchors.shape[0])
+    anchors = all_anchors
+    num_inside = len(inds_inside)
+
+    logger.debug('total_anchors: {}'.format(total_anchors))
+    logger.debug('inds_inside: {}'.format(num_inside))
+    logger.debug('anchors.shape: {}'.format(anchors.shape))
+
+    # Compute anchor labels:
+    # label=1 is positive, 0 is negative, -1 is don't care (ignore)
+    labels = np.empty((num_inside, ), dtype=np.float32)
+    labels.fill(-1)
+    if len(gt_boxes) > 0:
+        # Compute overlaps between the anchors and the gt boxes overlaps
+        anchor_by_gt_overlap = box_utils.bbox_overlaps(anchors, gt_boxes)
+        # Map from anchor to gt box that has highest overlap
+        anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1)
+        # For each anchor, amount of overlap with most overlapping gt box
+        anchor_to_gt_max = anchor_by_gt_overlap[
+            np.arange(num_inside), anchor_to_gt_argmax]
+
+        # Map from gt box to an anchor that has highest overlap
+        gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0)
+        # For each gt box, amount of overlap with most overlapping anchor
+        gt_to_anchor_max = anchor_by_gt_overlap[
+            gt_to_anchor_argmax, np.arange(anchor_by_gt_overlap.shape[1])]
+        # Find all anchors that share the max overlap amount
+        # (this includes many ties)
+        anchors_with_max_overlap = np.where(
+            anchor_by_gt_overlap == gt_to_anchor_max)[0]
+
+        # Fg label: for each gt use anchors with highest overlap
+        # (including ties)
+        gt_inds = anchor_to_gt_argmax[anchors_with_max_overlap]
+        labels[anchors_with_max_overlap] = gt_classes[gt_inds]
+        # Fg label: above threshold IOU
+        inds = anchor_to_gt_max >= cfg.RETINANET.POSITIVE_OVERLAP
+        gt_inds = anchor_to_gt_argmax[inds]
+        labels[inds] = gt_classes[gt_inds]
+
+    fg_inds = np.where(labels >= 1)[0]
+    bg_inds = np.where(anchor_to_gt_max < cfg.RETINANET.NEGATIVE_OVERLAP)[0]
+    labels[bg_inds] = 0
+    num_fg, num_bg = len(fg_inds), len(bg_inds)
+
+    bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
+    bbox_targets[fg_inds, :] = data_utils.compute_targets(
+        anchors[fg_inds, :], gt_boxes[anchor_to_gt_argmax[fg_inds], :])
+
+    # Map up to original set of anchors
+    labels = data_utils.unmap(labels, total_anchors, inds_inside, fill=-1)
+    bbox_targets = data_utils.unmap(bbox_targets, total_anchors, inds_inside, fill=0)
+
+    # Split the generated labels, etc. into labels per each field of anchors
+    blobs_out = []
+    start_idx = 0
+    for foa in foas:
+        H = foa.field_size
+        W = foa.field_size
+        end_idx = start_idx + H * W
+        _labels = labels[start_idx:end_idx]
+        _bbox_targets = bbox_targets[start_idx:end_idx, :]
+        start_idx = end_idx
+
+        # labels output with shape (1, height, width)
+        _labels = _labels.reshape((1, 1, H, W))
+        # bbox_targets output with shape (1, 4 * A, height, width)
+        _bbox_targets = _bbox_targets.reshape((1, H, W, 4)).transpose(0, 3, 1, 2)
+        stride = foa.stride
+        w = int(im_width / stride)
+        h = int(im_height / stride)
+
+        # data for select_smooth_l1 loss
+        num_classes = cfg.MODEL.NUM_CLASSES - 1
+        inds_4d = np.where(_labels > 0)
+        M = len(inds_4d)
+        _roi_bbox_targets = np.zeros((0, 4))
+        _roi_fg_bbox_locs = np.zeros((0, 4))
+        if M > 0:
+            im_inds, y, x = inds_4d[0], inds_4d[2], inds_4d[3]
+            _roi_bbox_targets = np.zeros((len(im_inds), 4))
+            _roi_fg_bbox_locs = np.zeros((len(im_inds), 4))
+            lbls = _labels[im_inds, :, y, x]
+            for i, lbl in enumerate(lbls):
+                l = lbl[0] - 1
+                if not cfg.RETINANET.CLASS_SPECIFIC_BBOX:
+                    l = 0
+                assert l >= 0 and l < num_classes, 'label out of the range'
+                _roi_bbox_targets[i, :] = _bbox_targets[:, :, y[i], x[i]]
+                _roi_fg_bbox_locs[i, :] = np.array([[0, l, y[i], x[i]]])
+        blobs_out.append(
+            dict(
+                retnet_cls_labels=_labels[:, :, 0:h, 0:w].astype(np.int32),
+                retnet_roi_bbox_targets=_roi_bbox_targets.astype(np.float32),
+                retnet_roi_fg_bbox_locs=_roi_fg_bbox_locs.astype(np.float32),
+            ))
+    out_num_fg = np.array([num_fg + 1.0], dtype=np.float32)
+    out_num_bg = (
+        np.array([num_bg + 1.0]) * (cfg.MODEL.NUM_CLASSES - 1) +
+        out_num_fg * (cfg.MODEL.NUM_CLASSES - 2))
+    return blobs_out, out_num_fg, out_num_bg
diff --git a/detectron/roi_data/rpn.py b/detectron/roi_data/rpn.py
new file mode 100644
index 0000000000000000000000000000000000000000..6adb5a75b53a2c7a71dbd68fded7b52594512276
--- /dev/null
+++ b/detectron/roi_data/rpn.py
@@ -0,0 +1,280 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Minibatch construction for Region Proposal Networks (RPN)."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import logging
+import numpy as np
+import numpy.random as npr
+
+from detectron.core.config import cfg
+import detectron.roi_data.data_utils as data_utils
+import detectron.utils.blob as blob_utils
+import detectron.utils.boxes as box_utils
+
+logger = logging.getLogger(__name__)
+
+
+def get_rpn_blob_names(is_training=True):
+    """Blob names used by RPN."""
+    # im_info: (height, width, image scale)
+    blob_names = ['im_info']
+    if is_training:
+        # gt boxes: (batch_idx, x1, y1, x2, y2, cls)
+        blob_names += ['roidb']
+        if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:
+            # Same format as RPN blobs, but one per FPN level
+            for lvl in range(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL + 1):
+                blob_names += [
+                    'rpn_labels_int32_wide_fpn' + str(lvl),
+                    'rpn_bbox_targets_wide_fpn' + str(lvl),
+                    'rpn_bbox_inside_weights_wide_fpn' + str(lvl),
+                    'rpn_bbox_outside_weights_wide_fpn' + str(lvl)
+                ]
+        else:
+            # Single level RPN blobs
+            blob_names += [
+                'rpn_labels_int32_wide',
+                'rpn_bbox_targets_wide',
+                'rpn_bbox_inside_weights_wide',
+                'rpn_bbox_outside_weights_wide'
+            ]
+    return blob_names
+
+
+def add_rpn_blobs(blobs, im_scales, roidb):
+    """Add blobs needed training RPN-only and end-to-end Faster R-CNN models."""
+    if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:
+        # RPN applied to many feature levels, as in the FPN paper
+        k_max = cfg.FPN.RPN_MAX_LEVEL
+        k_min = cfg.FPN.RPN_MIN_LEVEL
+        foas = []
+        for lvl in range(k_min, k_max + 1):
+            field_stride = 2.**lvl
+            anchor_sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), )
+            anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS
+            foa = data_utils.get_field_of_anchors(
+                field_stride, anchor_sizes, anchor_aspect_ratios
+            )
+            foas.append(foa)
+        all_anchors = np.concatenate([f.field_of_anchors for f in foas])
+    else:
+        foa = data_utils.get_field_of_anchors(
+            cfg.RPN.STRIDE, cfg.RPN.SIZES, cfg.RPN.ASPECT_RATIOS
+        )
+        all_anchors = foa.field_of_anchors
+
+    for im_i, entry in enumerate(roidb):
+        scale = im_scales[im_i]
+        im_height = np.round(entry['height'] * scale)
+        im_width = np.round(entry['width'] * scale)
+        gt_inds = np.where(
+            (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0)
+        )[0]
+        gt_rois = entry['boxes'][gt_inds, :] * scale
+        im_info = np.array([[im_height, im_width, scale]], dtype=np.float32)
+        blobs['im_info'].append(im_info)
+
+        # Add RPN targets
+        if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:
+            # RPN applied to many feature levels, as in the FPN paper
+            rpn_blobs = _get_rpn_blobs(
+                im_height, im_width, foas, all_anchors, gt_rois
+            )
+            for i, lvl in enumerate(range(k_min, k_max + 1)):
+                for k, v in rpn_blobs[i].items():
+                    blobs[k + '_fpn' + str(lvl)].append(v)
+        else:
+            # Classical RPN, applied to a single feature level
+            rpn_blobs = _get_rpn_blobs(
+                im_height, im_width, [foa], all_anchors, gt_rois
+            )
+            for k, v in rpn_blobs.items():
+                blobs[k].append(v)
+
+    for k, v in blobs.items():
+        if isinstance(v, list) and len(v) > 0:
+            blobs[k] = np.concatenate(v)
+
+    valid_keys = [
+        'has_visible_keypoints', 'boxes', 'segms', 'seg_areas', 'gt_classes',
+        'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints'
+    ]
+    minimal_roidb = [{} for _ in range(len(roidb))]
+    for i, e in enumerate(roidb):
+        for k in valid_keys:
+            if k in e:
+                minimal_roidb[i][k] = e[k]
+    blobs['roidb'] = blob_utils.serialize(minimal_roidb)
+
+    # Always return valid=True, since RPN minibatches are valid by design
+    return True
+
+
+def _get_rpn_blobs(im_height, im_width, foas, all_anchors, gt_boxes):
+    total_anchors = all_anchors.shape[0]
+    straddle_thresh = cfg.TRAIN.RPN_STRADDLE_THRESH
+
+    if straddle_thresh >= 0:
+        # Only keep anchors inside the image by a margin of straddle_thresh
+        # Set TRAIN.RPN_STRADDLE_THRESH to -1 (or a large value) to keep all
+        # anchors
+        inds_inside = np.where(
+            (all_anchors[:, 0] >= -straddle_thresh) &
+            (all_anchors[:, 1] >= -straddle_thresh) &
+            (all_anchors[:, 2] < im_width + straddle_thresh) &
+            (all_anchors[:, 3] < im_height + straddle_thresh)
+        )[0]
+        # keep only inside anchors
+        anchors = all_anchors[inds_inside, :]
+    else:
+        inds_inside = np.arange(all_anchors.shape[0])
+        anchors = all_anchors
+    num_inside = len(inds_inside)
+
+    logger.debug('total_anchors: {}'.format(total_anchors))
+    logger.debug('inds_inside: {}'.format(num_inside))
+    logger.debug('anchors.shape: {}'.format(anchors.shape))
+
+    # Compute anchor labels:
+    # label=1 is positive, 0 is negative, -1 is don't care (ignore)
+    labels = np.empty((num_inside, ), dtype=np.int32)
+    labels.fill(-1)
+    if len(gt_boxes) > 0:
+        # Compute overlaps between the anchors and the gt boxes overlaps
+        anchor_by_gt_overlap = box_utils.bbox_overlaps(anchors, gt_boxes)
+        # Map from anchor to gt box that has highest overlap
+        anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1)
+        # For each anchor, amount of overlap with most overlapping gt box
+        anchor_to_gt_max = anchor_by_gt_overlap[np.arange(num_inside),
+                                                anchor_to_gt_argmax]
+
+        # Map from gt box to an anchor that has highest overlap
+        gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0)
+        # For each gt box, amount of overlap with most overlapping anchor
+        gt_to_anchor_max = anchor_by_gt_overlap[
+            gt_to_anchor_argmax,
+            np.arange(anchor_by_gt_overlap.shape[1])
+        ]
+        # Find all anchors that share the max overlap amount
+        # (this includes many ties)
+        anchors_with_max_overlap = np.where(
+            anchor_by_gt_overlap == gt_to_anchor_max
+        )[0]
+
+        # Fg label: for each gt use anchors with highest overlap
+        # (including ties)
+        labels[anchors_with_max_overlap] = 1
+        # Fg label: above threshold IOU
+        labels[anchor_to_gt_max >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
+
+    # subsample positive labels if we have too many
+    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE_PER_IM)
+    fg_inds = np.where(labels == 1)[0]
+    if len(fg_inds) > num_fg:
+        disable_inds = npr.choice(
+            fg_inds, size=(len(fg_inds) - num_fg), replace=False
+        )
+        labels[disable_inds] = -1
+    fg_inds = np.where(labels == 1)[0]
+
+    # subsample negative labels if we have too many
+    # (samples with replacement, but since the set of bg inds is large most
+    # samples will not have repeats)
+    num_bg = cfg.TRAIN.RPN_BATCH_SIZE_PER_IM - np.sum(labels == 1)
+    bg_inds = np.where(anchor_to_gt_max < cfg.TRAIN.RPN_NEGATIVE_OVERLAP)[0]
+    if len(bg_inds) > num_bg:
+        enable_inds = bg_inds[npr.randint(len(bg_inds), size=num_bg)]
+    else:
+        enable_inds = bg_inds
+
+    labels[enable_inds] = 0
+    bg_inds = np.where(labels == 0)[0]
+
+    bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
+    bbox_targets[fg_inds, :] = data_utils.compute_targets(
+        anchors[fg_inds, :], gt_boxes[anchor_to_gt_argmax[fg_inds], :]
+    )
+
+    # Bbox regression loss has the form:
+    #   loss(x) = weight_outside * L(weight_inside * x)
+    # Inside weights allow us to set zero loss on an element-wise basis
+    # Bbox regression is only trained on positive examples so we set their
+    # weights to 1.0 (or otherwise if config is different) and 0 otherwise
+    bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
+    bbox_inside_weights[labels == 1, :] = (1.0, 1.0, 1.0, 1.0)
+
+    # The bbox regression loss only averages by the number of images in the
+    # mini-batch, whereas we need to average by the total number of example
+    # anchors selected
+    # Outside weights are used to scale each element-wise loss so the final
+    # average over the mini-batch is correct
+    bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
+    # uniform weighting of examples (given non-uniform sampling)
+    num_examples = np.sum(labels >= 0)
+    bbox_outside_weights[labels == 1, :] = 1.0 / num_examples
+    bbox_outside_weights[labels == 0, :] = 1.0 / num_examples
+
+    # Map up to original set of anchors
+    labels = data_utils.unmap(labels, total_anchors, inds_inside, fill=-1)
+    bbox_targets = data_utils.unmap(
+        bbox_targets, total_anchors, inds_inside, fill=0
+    )
+    bbox_inside_weights = data_utils.unmap(
+        bbox_inside_weights, total_anchors, inds_inside, fill=0
+    )
+    bbox_outside_weights = data_utils.unmap(
+        bbox_outside_weights, total_anchors, inds_inside, fill=0
+    )
+
+    # Split the generated labels, etc. into labels per each field of anchors
+    blobs_out = []
+    start_idx = 0
+    for foa in foas:
+        H = foa.field_size
+        W = foa.field_size
+        A = foa.num_cell_anchors
+        end_idx = start_idx + H * W * A
+        _labels = labels[start_idx:end_idx]
+        _bbox_targets = bbox_targets[start_idx:end_idx, :]
+        _bbox_inside_weights = bbox_inside_weights[start_idx:end_idx, :]
+        _bbox_outside_weights = bbox_outside_weights[start_idx:end_idx, :]
+        start_idx = end_idx
+
+        # labels output with shape (1, A, height, width)
+        _labels = _labels.reshape((1, H, W, A)).transpose(0, 3, 1, 2)
+        # bbox_targets output with shape (1, 4 * A, height, width)
+        _bbox_targets = _bbox_targets.reshape(
+            (1, H, W, A * 4)).transpose(0, 3, 1, 2)
+        # bbox_inside_weights output with shape (1, 4 * A, height, width)
+        _bbox_inside_weights = _bbox_inside_weights.reshape(
+            (1, H, W, A * 4)).transpose(0, 3, 1, 2)
+        # bbox_outside_weights output with shape (1, 4 * A, height, width)
+        _bbox_outside_weights = _bbox_outside_weights.reshape(
+            (1, H, W, A * 4)).transpose(0, 3, 1, 2)
+        blobs_out.append(
+            dict(
+                rpn_labels_int32_wide=_labels,
+                rpn_bbox_targets_wide=_bbox_targets,
+                rpn_bbox_inside_weights_wide=_bbox_inside_weights,
+                rpn_bbox_outside_weights_wide=_bbox_outside_weights
+            )
+        )
+    return blobs_out[0] if len(blobs_out) == 1 else blobs_out
diff --git a/detectron/tests/data_loader_benchmark.py b/detectron/tests/data_loader_benchmark.py
new file mode 100644
index 0000000000000000000000000000000000000000..b147a88851e5ffd8b15b6a49a945c1b31d119992
--- /dev/null
+++ b/detectron/tests/data_loader_benchmark.py
@@ -0,0 +1,167 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+# Example usage:
+# data_loader_benchmark.par \
+#   NUM_GPUS 2 \
+#   TRAIN.DATASETS "('voc_2007_trainval',)" \
+#   TRAIN.PROPOSAL_FILES /path/to/voc_2007_trainval/proposals.pkl \
+#   DATA_LOADER.NUM_THREADS 4 \
+#   DATA_LOADER.MINIBATCH_QUEUE_SIZE 64 \
+#   DATA_LOADER.BLOBS_QUEUE_CAPACITY 8
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import argparse
+import logging
+import numpy as np
+import pprint
+import sys
+import time
+
+from caffe2.python import core
+from caffe2.python import muji
+from caffe2.python import workspace
+
+from detectron.core.config import assert_and_infer_cfg
+from detectron.core.config import cfg
+from detectron.core.config import merge_cfg_from_file
+from detectron.core.config import merge_cfg_from_list
+from detectron.datasets.roidb import combined_roidb_for_training
+from detectron.roi_data.loader import RoIDataLoader
+from detectron.utils.logging import setup_logging
+from detectron.utils.timer import Timer
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--num-batches', dest='num_batches',
+        help='Number of minibatches to run',
+        default=200, type=int)
+    parser.add_argument(
+        '--sleep', dest='sleep_time',
+        help='Seconds sleep to emulate a network running',
+        default=0.1, type=float)
+    parser.add_argument(
+        '--cfg', dest='cfg_file', help='optional config file', default=None,
+        type=str)
+    parser.add_argument(
+        '--x-factor', dest='x_factor', help='simulates x-factor more GPUs',
+        default=1, type=int)
+    parser.add_argument(
+        '--profiler', dest='profiler', help='profile minibatch load time',
+        action='store_true')
+    parser.add_argument(
+        'opts', help='See detectron/core/config.py for all options', default=None,
+        nargs=argparse.REMAINDER)
+    if len(sys.argv) == 1:
+        parser.print_help()
+        sys.exit(1)
+    args = parser.parse_args()
+    return args
+
+
+def loader_loop(roi_data_loader):
+    load_timer = Timer()
+    iters = 100
+    for i in range(iters):
+        load_timer.tic()
+        roi_data_loader.get_next_minibatch()
+        load_timer.toc()
+        print('{:d}/{:d}: Average get_next_minibatch time: {:.3f}s'.format(
+              i + 1, iters, load_timer.average_time))
+
+
+def main(opts):
+    logger = logging.getLogger(__name__)
+    roidb = combined_roidb_for_training(
+        cfg.TRAIN.DATASETS, cfg.TRAIN.PROPOSAL_FILES)
+    logger.info('{:d} roidb entries'.format(len(roidb)))
+    roi_data_loader = RoIDataLoader(
+        roidb,
+        num_loaders=cfg.DATA_LOADER.NUM_THREADS,
+        minibatch_queue_size=cfg.DATA_LOADER.MINIBATCH_QUEUE_SIZE,
+        blobs_queue_capacity=cfg.DATA_LOADER.BLOBS_QUEUE_CAPACITY
+    )
+    blob_names = roi_data_loader.get_output_names()
+
+    net = core.Net('dequeue_net')
+    net.type = 'dag'
+    all_blobs = []
+    for gpu_id in range(cfg.NUM_GPUS):
+        with core.NameScope('gpu_{}'.format(gpu_id)):
+            with core.DeviceScope(muji.OnGPU(gpu_id)):
+                for blob_name in blob_names:
+                    blob = core.ScopedName(blob_name)
+                    all_blobs.append(blob)
+                    workspace.CreateBlob(blob)
+                    logger.info('Creating blob: {}'.format(blob))
+                net.DequeueBlobs(
+                    roi_data_loader._blobs_queue_name, blob_names)
+    logger.info("Protobuf:\n" + str(net.Proto()))
+
+    if opts.profiler:
+        import cProfile
+        cProfile.runctx(
+            'loader_loop(roi_data_loader)', globals(), locals(),
+            sort='cumulative')
+    else:
+        loader_loop(roi_data_loader)
+
+    roi_data_loader.register_sigint_handler()
+    roi_data_loader.start(prefill=True)
+    total_time = 0
+    for i in range(opts.num_batches):
+        start_t = time.time()
+        for _ in range(opts.x_factor):
+            workspace.RunNetOnce(net)
+        total_time += (time.time() - start_t) / opts.x_factor
+        logger.info(
+            '{:d}/{:d}: Averge dequeue time: {:.3f}s  [{:d}/{:d}]'.format(
+                i + 1, opts.num_batches, total_time / (i + 1),
+                roi_data_loader._minibatch_queue.qsize(),
+                cfg.DATA_LOADER.MINIBATCH_QUEUE_SIZE
+            )
+        )
+        # Sleep to simulate the time taken by running a little network
+        time.sleep(opts.sleep_time)
+        # To inspect:
+        # blobs = workspace.FetchBlobs(all_blobs)
+        # from IPython import embed; embed()
+    logger.info('Shutting down data loader...')
+    roi_data_loader.shutdown()
+
+
+if __name__ == '__main__':
+    workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
+    logger = setup_logging(__name__)
+    logger.setLevel(logging.DEBUG)
+    logging.getLogger('detectron.roi_data.loader').setLevel(logging.INFO)
+    np.random.seed(cfg.RNG_SEED)
+    args = parse_args()
+    logger.info('Called with args:')
+    logger.info(args)
+    if args.cfg_file is not None:
+        merge_cfg_from_file(args.cfg_file)
+    if args.opts is not None:
+        merge_cfg_from_list(args.opts)
+    assert_and_infer_cfg()
+    logger.info('Running with config:')
+    logger.info(pprint.pformat(cfg))
+    main(args)
diff --git a/detectron/tests/test_batch_permutation_op.py b/detectron/tests/test_batch_permutation_op.py
new file mode 100644
index 0000000000000000000000000000000000000000..28aa84f7490ea1b4fe5c6de7ec96af217a03a342
--- /dev/null
+++ b/detectron/tests/test_batch_permutation_op.py
@@ -0,0 +1,111 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import numpy as np
+import unittest
+
+from caffe2.proto import caffe2_pb2
+from caffe2.python import core
+from caffe2.python import gradient_checker
+from caffe2.python import workspace
+
+import detectron.utils.logging as logging_utils
+import detectron.utils.c2 as c2_utils
+
+
+class BatchPermutationOpTest(unittest.TestCase):
+    def _run_op_test(self, X, I, check_grad=False):
+        with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)):
+            op = core.CreateOperator('BatchPermutation', ['X', 'I'], ['Y'])
+            workspace.FeedBlob('X', X)
+            workspace.FeedBlob('I', I)
+        workspace.RunOperatorOnce(op)
+        Y = workspace.FetchBlob('Y')
+
+        if check_grad:
+            gc = gradient_checker.GradientChecker(
+                stepsize=0.1,
+                threshold=0.001,
+                device_option=core.DeviceOption(caffe2_pb2.CUDA, 0)
+            )
+
+            res, grad, grad_estimated = gc.CheckSimple(op, [X, I], 0, [0])
+            self.assertTrue(res, 'Grad check failed')
+
+        Y_ref = X[I]
+        np.testing.assert_allclose(Y, Y_ref, rtol=1e-5, atol=1e-08)
+
+    def _run_speed_test(self, iters=5, N=1024):
+        """This function provides an example of how to benchmark custom
+        operators using the Caffe2 'prof_dag' network execution type. Please
+        note that for 'prof_dag' to work, Caffe2 must be compiled with profiling
+        support using the `-DUSE_PROF=ON` option passed to `cmake` when building
+        Caffe2.
+        """
+        net = core.Net('test')
+        net.Proto().type = 'prof_dag'
+        net.Proto().num_workers = 2
+        Y = net.BatchPermutation(['X', 'I'], 'Y')
+        Y_flat = net.FlattenToVec([Y], 'Y_flat')
+        loss = net.AveragedLoss([Y_flat], 'loss')
+        net.AddGradientOperators([loss])
+        workspace.CreateNet(net)
+
+        X = np.random.randn(N, 256, 14, 14)
+        for _i in range(iters):
+            I = np.random.permutation(N)
+            workspace.FeedBlob('X', X.astype(np.float32))
+            workspace.FeedBlob('I', I.astype(np.int32))
+            workspace.RunNet(net.Proto().name)
+            np.testing.assert_allclose(
+                workspace.FetchBlob('Y'), X[I], rtol=1e-5, atol=1e-08
+            )
+
+    def test_forward_and_gradient(self):
+        A = np.random.randn(2, 3, 5, 7).astype(np.float32)
+        I = np.array([0, 1], dtype=np.int32)
+        self._run_op_test(A, I, check_grad=True)
+
+        A = np.random.randn(2, 3, 5, 7).astype(np.float32)
+        I = np.array([1, 0], dtype=np.int32)
+        self._run_op_test(A, I, check_grad=True)
+
+        A = np.random.randn(10, 3, 5, 7).astype(np.float32)
+        I = np.array(np.random.permutation(10), dtype=np.int32)
+        self._run_op_test(A, I, check_grad=True)
+
+    def test_size_exceptions(self):
+        A = np.random.randn(2, 256, 42, 86).astype(np.float32)
+        I = np.array(np.random.permutation(10), dtype=np.int32)
+        with self.assertRaises(RuntimeError):
+            self._run_op_test(A, I)
+
+    # See doc string in _run_speed_test
+    # def test_perf(self):
+    #     with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)):
+    #         self._run_speed_test()
+
+
+if __name__ == '__main__':
+    workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
+    c2_utils.import_detectron_ops()
+    assert 'BatchPermutation' in workspace.RegisteredOperators()
+    logging_utils.setup_logging(__name__)
+    unittest.main()
diff --git a/detectron/tests/test_bbox_transform.py b/detectron/tests/test_bbox_transform.py
new file mode 100644
index 0000000000000000000000000000000000000000..7d204f625ccc49c19e6e46de7e4ab4e227769df5
--- /dev/null
+++ b/detectron/tests/test_bbox_transform.py
@@ -0,0 +1,107 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import numpy as np
+import unittest
+
+from pycocotools import mask as COCOmask
+
+import detectron.utils.boxes as box_utils
+
+
+def random_boxes(mean_box, stdev, N):
+    boxes = np.random.randn(N, 4) * stdev + mean_box
+    return boxes.astype(dtype=np.float32)
+
+
+class TestBboxTransform(unittest.TestCase):
+    def test_bbox_transform_and_inverse(self):
+        weights = (5, 5, 10, 10)
+        src_boxes = random_boxes([10, 10, 20, 20], 1, 10)
+        dst_boxes = random_boxes([10, 10, 20, 20], 1, 10)
+        deltas = box_utils.bbox_transform_inv(
+            src_boxes, dst_boxes, weights=weights
+        )
+        dst_boxes_reconstructed = box_utils.bbox_transform(
+            src_boxes, deltas, weights=weights
+        )
+        np.testing.assert_array_almost_equal(
+            dst_boxes, dst_boxes_reconstructed, decimal=5
+        )
+
+    def test_bbox_dataset_to_prediction_roundtrip(self):
+        """Simulate the process of reading a ground-truth box from a dataset,
+        make predictions from proposals, convert the predictions back to the
+        dataset format, and then use the COCO API to compute IoU overlap between
+        the gt box and the predictions. These should have IoU of 1.
+        """
+        weights = (5, 5, 10, 10)
+        # 1/ "read" a box from a dataset in the default (x1, y1, w, h) format
+        gt_xywh_box = [10, 20, 100, 150]
+        # 2/ convert it to our internal (x1, y1, x2, y2) format
+        gt_xyxy_box = box_utils.xywh_to_xyxy(gt_xywh_box)
+        # 3/ consider nearby proposal boxes
+        prop_xyxy_boxes = random_boxes(gt_xyxy_box, 10, 10)
+        # 4/ compute proposal-to-gt transformation deltas
+        deltas = box_utils.bbox_transform_inv(
+            prop_xyxy_boxes, np.array([gt_xyxy_box]), weights=weights
+        )
+        # 5/ use deltas to transform proposals to xyxy predicted box
+        pred_xyxy_boxes = box_utils.bbox_transform(
+            prop_xyxy_boxes, deltas, weights=weights
+        )
+        # 6/ convert xyxy predicted box to xywh predicted box
+        pred_xywh_boxes = box_utils.xyxy_to_xywh(pred_xyxy_boxes)
+        # 7/ use COCO API to compute IoU
+        not_crowd = [int(False)] * pred_xywh_boxes.shape[0]
+        ious = COCOmask.iou(pred_xywh_boxes, np.array([gt_xywh_box]), not_crowd)
+        np.testing.assert_array_almost_equal(ious, np.ones(ious.shape))
+
+    def test_cython_bbox_iou_against_coco_api_bbox_iou(self):
+        """Check that our cython implementation of bounding box IoU overlap
+        matches the COCO API implementation.
+        """
+        def _do_test(b1, b2):
+            # Compute IoU overlap with the cython implementation
+            cython_iou = box_utils.bbox_overlaps(b1, b2)
+            # Compute IoU overlap with the COCO API implementation
+            # (requires converting boxes from xyxy to xywh format)
+            xywh_b1 = box_utils.xyxy_to_xywh(b1)
+            xywh_b2 = box_utils.xyxy_to_xywh(b2)
+            not_crowd = [int(False)] * b2.shape[0]
+            coco_ious = COCOmask.iou(xywh_b1, xywh_b2, not_crowd)
+            # IoUs should be similar
+            np.testing.assert_array_almost_equal(
+                cython_iou, coco_ious, decimal=5
+            )
+
+        # Test small boxes
+        b1 = random_boxes([10, 10, 20, 20], 5, 10)
+        b2 = random_boxes([10, 10, 20, 20], 5, 10)
+        _do_test(b1, b2)
+
+        # Test bigger boxes
+        b1 = random_boxes([10, 10, 110, 20], 20, 10)
+        b2 = random_boxes([10, 10, 110, 20], 20, 10)
+        _do_test(b1, b2)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/detectron/tests/test_cfg.py b/detectron/tests/test_cfg.py
new file mode 100644
index 0000000000000000000000000000000000000000..4f3bee20fdaae99b1957e7e7d54ecc3876bf04c2
--- /dev/null
+++ b/detectron/tests/test_cfg.py
@@ -0,0 +1,199 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import copy
+import tempfile
+import unittest
+
+from detectron.core.config import cfg
+from detectron.utils.collections import AttrDict
+import detectron.core.config as core_config
+import detectron.utils.env as envu
+import detectron.utils.logging as logging_utils
+
+
+class TestAttrDict(unittest.TestCase):
+    def test_immutability(self):
+        # Top level immutable
+        a = AttrDict()
+        a.foo = 0
+        a.immutable(True)
+        with self.assertRaises(AttributeError):
+            a.foo = 1
+            a.bar = 1
+        assert a.is_immutable()
+        assert a.foo == 0
+        a.immutable(False)
+        assert not a.is_immutable()
+        a.foo = 1
+        assert a.foo == 1
+
+        # Recursively immutable
+        a.level1 = AttrDict()
+        a.level1.foo = 0
+        a.level1.level2 = AttrDict()
+        a.level1.level2.foo = 0
+        a.immutable(True)
+        assert a.is_immutable()
+        with self.assertRaises(AttributeError):
+            a.level1.level2.foo = 1
+            a.level1.bar = 1
+        assert a.level1.level2.foo == 0
+
+        # Serialize immutability state
+        a.immutable(True)
+        a2 = core_config.load_cfg(envu.yaml_dump(a))
+        assert a.is_immutable()
+        assert a2.is_immutable()
+
+
+class TestCfg(unittest.TestCase):
+    def test_copy_cfg(self):
+        cfg2 = copy.deepcopy(cfg)
+        s = cfg.MODEL.TYPE
+        cfg2.MODEL.TYPE = 'dummy'
+        assert cfg.MODEL.TYPE == s
+
+    def test_merge_cfg_from_cfg(self):
+        # Test: merge from deepcopy
+        s = 'dummy0'
+        cfg2 = copy.deepcopy(cfg)
+        cfg2.MODEL.TYPE = s
+        core_config.merge_cfg_from_cfg(cfg2)
+        assert cfg.MODEL.TYPE == s
+
+        # Test: merge from yaml
+        s = 'dummy1'
+        cfg2 = core_config.load_cfg(envu.yaml_dump(cfg))
+        cfg2.MODEL.TYPE = s
+        core_config.merge_cfg_from_cfg(cfg2)
+        assert cfg.MODEL.TYPE == s
+
+        # Test: merge with a valid key
+        s = 'dummy2'
+        cfg2 = AttrDict()
+        cfg2.MODEL = AttrDict()
+        cfg2.MODEL.TYPE = s
+        core_config.merge_cfg_from_cfg(cfg2)
+        assert cfg.MODEL.TYPE == s
+
+        # Test: merge with an invalid key
+        s = 'dummy3'
+        cfg2 = AttrDict()
+        cfg2.FOO = AttrDict()
+        cfg2.FOO.BAR = s
+        with self.assertRaises(KeyError):
+            core_config.merge_cfg_from_cfg(cfg2)
+
+        # Test: merge with converted type
+        cfg2 = AttrDict()
+        cfg2.TRAIN = AttrDict()
+        cfg2.TRAIN.SCALES = [1]
+        core_config.merge_cfg_from_cfg(cfg2)
+        assert type(cfg.TRAIN.SCALES) is tuple
+        assert cfg.TRAIN.SCALES[0] == 1
+
+        # Test: merge with invalid type
+        cfg2 = AttrDict()
+        cfg2.TRAIN = AttrDict()
+        cfg2.TRAIN.SCALES = 1
+        with self.assertRaises(ValueError):
+            core_config.merge_cfg_from_cfg(cfg2)
+
+    def test_merge_cfg_from_file(self):
+        with tempfile.NamedTemporaryFile() as f:
+            envu.yaml_dump(cfg, f)
+            s = cfg.MODEL.TYPE
+            cfg.MODEL.TYPE = 'dummy'
+            assert cfg.MODEL.TYPE != s
+            core_config.merge_cfg_from_file(f.name)
+            assert cfg.MODEL.TYPE == s
+
+    def test_merge_cfg_from_list(self):
+        opts = [
+            'TRAIN.SCALES', '(100, )', 'MODEL.TYPE', u'foobar', 'NUM_GPUS', 2
+        ]
+        assert len(cfg.TRAIN.SCALES) > 0
+        assert cfg.TRAIN.SCALES[0] != 100
+        assert cfg.MODEL.TYPE != 'foobar'
+        assert cfg.NUM_GPUS != 2
+        core_config.merge_cfg_from_list(opts)
+        assert type(cfg.TRAIN.SCALES) is tuple
+        assert len(cfg.TRAIN.SCALES) == 1
+        assert cfg.TRAIN.SCALES[0] == 100
+        assert cfg.MODEL.TYPE == 'foobar'
+        assert cfg.NUM_GPUS == 2
+
+    def test_deprecated_key_from_list(self):
+        # You should see logger messages like:
+        #   "Deprecated config key (ignoring): MODEL.DILATION"
+        opts = ['FINAL_MSG', 'foobar', 'MODEL.DILATION', 2]
+        with self.assertRaises(AttributeError):
+            _ = cfg.FINAL_MSG  # noqa
+        with self.assertRaises(AttributeError):
+            _ = cfg.MODEL.DILATION  # noqa
+        core_config.merge_cfg_from_list(opts)
+        with self.assertRaises(AttributeError):
+            _ = cfg.FINAL_MSG  # noqa
+        with self.assertRaises(AttributeError):
+            _ = cfg.MODEL.DILATION  # noqa
+
+    def test_deprecated_key_from_file(self):
+        # You should see logger messages like:
+        #   "Deprecated config key (ignoring): MODEL.DILATION"
+        with tempfile.NamedTemporaryFile() as f:
+            cfg2 = copy.deepcopy(cfg)
+            cfg2.MODEL.DILATION = 2
+            envu.yaml_dump(cfg2, f)
+            with self.assertRaises(AttributeError):
+                _ = cfg.MODEL.DILATION  # noqa
+            core_config.merge_cfg_from_file(f.name)
+            with self.assertRaises(AttributeError):
+                _ = cfg.MODEL.DILATION  # noqa
+
+    def test_renamed_key_from_list(self):
+        # You should see logger messages like:
+        #  "Key EXAMPLE.RENAMED.KEY was renamed to EXAMPLE.KEY;
+        #  please update your config"
+        opts = ['EXAMPLE.RENAMED.KEY', 'foobar']
+        with self.assertRaises(AttributeError):
+            _ = cfg.EXAMPLE.RENAMED.KEY  # noqa
+        with self.assertRaises(KeyError):
+            core_config.merge_cfg_from_list(opts)
+
+    def test_renamed_key_from_file(self):
+        # You should see logger messages like:
+        #  "Key EXAMPLE.RENAMED.KEY was renamed to EXAMPLE.KEY;
+        #  please update your config"
+        with tempfile.NamedTemporaryFile() as f:
+            cfg2 = copy.deepcopy(cfg)
+            cfg2.EXAMPLE = AttrDict()
+            cfg2.EXAMPLE.RENAMED = AttrDict()
+            cfg2.EXAMPLE.RENAMED.KEY = 'foobar'
+            envu.yaml_dump(cfg2, f)
+            with self.assertRaises(AttributeError):
+                _ = cfg.EXAMPLE.RENAMED.KEY  # noqa
+            with self.assertRaises(KeyError):
+                core_config.merge_cfg_from_file(f.name)
+
+
+if __name__ == '__main__':
+    logging_utils.setup_logging(__name__)
+    unittest.main()
diff --git a/detectron/tests/test_loader.py b/detectron/tests/test_loader.py
new file mode 100644
index 0000000000000000000000000000000000000000..ac3a94c2dc04aa5a717a3b917bf86a1c6ac3fea6
--- /dev/null
+++ b/detectron/tests/test_loader.py
@@ -0,0 +1,121 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import numpy as np
+import logging
+import unittest
+import mock
+
+from caffe2.proto import caffe2_pb2
+from caffe2.python import core
+from caffe2.python import muji
+from caffe2.python import workspace
+
+from detectron.core.config import assert_and_infer_cfg
+from detectron.core.config import cfg
+from detectron.roi_data.loader import RoIDataLoader
+import detectron.utils.logging as logging_utils
+
+
+def get_roidb_blobs(roidb):
+    blobs = {}
+    blobs['data'] = np.stack([entry['data'] for entry in roidb])
+    return blobs, True
+
+
+def get_net(data_loader, name):
+    logger = logging.getLogger(__name__)
+    blob_names = data_loader.get_output_names()
+    net = core.Net(name)
+    net.type = 'dag'
+    for gpu_id in range(cfg.NUM_GPUS):
+        with core.NameScope('gpu_{}'.format(gpu_id)):
+            with core.DeviceScope(muji.OnGPU(gpu_id)):
+                for blob_name in blob_names:
+                    blob = core.ScopedName(blob_name)
+                    workspace.CreateBlob(blob)
+                net.DequeueBlobs(
+                    data_loader._blobs_queue_name, blob_names)
+    logger.info("Protobuf:\n" + str(net.Proto()))
+
+    return net
+
+
+def get_roidb_sample_data(sample_data):
+    roidb = []
+    for _ in range(np.random.randint(4, 10)):
+        roidb.append({'data': sample_data})
+    return roidb
+
+
+def create_loader_and_network(sample_data, name):
+    roidb = get_roidb_sample_data(sample_data)
+    loader = RoIDataLoader(roidb)
+    net = get_net(loader, 'dequeue_net_train')
+    loader.register_sigint_handler()
+    loader.start(prefill=False)
+    return loader, net
+
+
+def run_net(net):
+    workspace.RunNetOnce(net)
+    gpu_dev = core.DeviceOption(caffe2_pb2.CUDA, 0)
+    name_scope = 'gpu_{}'.format(0)
+    with core.NameScope(name_scope):
+        with core.DeviceScope(gpu_dev):
+            data = workspace.FetchBlob(core.ScopedName('data'))
+            return data
+
+
+class TestRoIDataLoader(unittest.TestCase):
+    @mock.patch(
+        'detectron.roi_data.loader.get_minibatch_blob_names',
+        return_value=[u'data']
+    )
+    @mock.patch(
+        'detectron.roi_data.loader.get_minibatch',
+        side_effect=get_roidb_blobs
+    )
+    def test_two_parallel_loaders(self, _1, _2):
+        train_data = np.random.rand(2, 3, 3).astype(np.float32)
+        train_loader, train_net = create_loader_and_network(train_data,
+                                                            'dequeue_net_train')
+        test_data = np.random.rand(2, 4, 4).astype(np.float32)
+        test_loader, test_net = create_loader_and_network(test_data,
+                                                          'dequeue_net_test')
+        for _ in range(5):
+            data = run_net(train_net)
+            self.assertEqual(data[0].tolist(), train_data.tolist())
+            data = run_net(test_net)
+            self.assertEqual(data[0].tolist(), test_data.tolist())
+        test_loader.shutdown()
+        train_loader.shutdown()
+
+
+if __name__ == '__main__':
+    workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
+    logger = logging_utils.setup_logging(__name__)
+    logger.setLevel(logging.DEBUG)
+    logging.getLogger('detectron.roi_data.loader').setLevel(logging.INFO)
+    np.random.seed(cfg.RNG_SEED)
+    cfg.TRAIN.ASPECT_GROUPING = False
+    cfg.NUM_GPUS = 2
+    assert_and_infer_cfg()
+    unittest.main()
diff --git a/detectron/tests/test_restore_checkpoint.py b/detectron/tests/test_restore_checkpoint.py
new file mode 100644
index 0000000000000000000000000000000000000000..2694eee95c906565f6bb5ff861329e3b2392b089
--- /dev/null
+++ b/detectron/tests/test_restore_checkpoint.py
@@ -0,0 +1,132 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import logging
+import numpy as np
+import os
+import shutil
+import tempfile
+
+from caffe2.python import workspace
+
+from detectron.core.config import assert_and_infer_cfg
+from detectron.core.config import cfg
+from detectron.core.config import get_output_dir
+from detectron.datasets.roidb import combined_roidb_for_training
+from detectron.modeling import model_builder
+from detectron.utils.logging import setup_logging
+import detectron.utils.c2 as c2_utils
+import detectron.utils.net as nu
+
+c2_utils.import_detectron_ops()
+
+
+def get_params(model):
+    blobs = {}  # gpu_0 blobs with unscoped_name as key
+    all_blobs = {}  # all blobs with scoped name as key
+    # Save all parameters
+    for param in model.params:
+        scoped_name = str(param)
+        unscoped_name = c2_utils.UnscopeName(scoped_name)
+        if 'gpu_0' in scoped_name:
+            blobs[unscoped_name] = workspace.FetchBlob(scoped_name)
+        all_blobs[scoped_name] = workspace.FetchBlob(scoped_name)
+    for param in model.TrainableParams():
+        scoped_name = str(param) + '_momentum'
+        unscoped_name = c2_utils.UnscopeName(scoped_name)
+        if 'gpu_0' in scoped_name:
+            blobs[unscoped_name] = workspace.FetchBlob(scoped_name)
+        all_blobs[scoped_name] = workspace.FetchBlob(scoped_name)
+    return blobs, all_blobs
+
+
+def add_momentum_init_ops(model):
+    for param in model.TrainableParams(gpu_id=0):
+        model.param_init_net.GaussianFill(
+            [param + '_momentum'], param + '_momentum', mean=0.0, std=1.0)
+
+
+def init_weights(model):
+    # init weights in gpu_id = 0 and then broadcast
+    workspace.RunNetOnce(model.param_init_net)
+    nu.broadcast_parameters(model)
+
+
+def test_restore_checkpoint():
+    # Create Model
+    model = model_builder.create(cfg.MODEL.TYPE, train=True)
+    add_momentum_init_ops(model)
+    init_weights(model)
+    # Fill input blobs
+    roidb = combined_roidb_for_training(
+        cfg.TRAIN.DATASETS, cfg.TRAIN.PROPOSAL_FILES
+    )
+    model_builder.add_training_inputs(model, roidb=roidb)
+    workspace.CreateNet(model.net)
+    # Bookkeeping for checkpoint creation
+    iter_num = 0
+    checkpoints = {}
+    output_dir = get_output_dir(cfg.TRAIN.DATASETS, training=True)
+    chk_file_path = os.path.join(output_dir, 'model_iter{}.pkl'.format(iter_num))
+    checkpoints[iter_num] = chk_file_path
+    # Save model weights
+    nu.save_model_to_weights_file(checkpoints[iter_num], model)
+    orig_gpu_0_params, orig_all_params = get_params(model)
+    # Change the model weights
+    init_weights(model)
+    # Reload the weights in the model
+    nu.initialize_gpu_from_weights_file(model, chk_file_path, gpu_id=0)
+    nu.broadcast_parameters(model)
+    shutil.rmtree(cfg.OUTPUT_DIR)
+    _, restored_all_params = get_params(model)
+    # Check if all params are loaded correctly
+    for scoped_name, blob in orig_all_params.items():
+        np.testing.assert_array_equal(blob, restored_all_params[scoped_name])
+    # Check if broadcast_parameters works
+    for scoped_name, blob in restored_all_params.items():
+        unscoped_name = c2_utils.UnscopeName(scoped_name)
+        np.testing.assert_array_equal(blob, orig_gpu_0_params[unscoped_name])
+
+
+if __name__ == '__main__':
+    workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
+    logger = setup_logging(__name__)
+    logger.setLevel(logging.DEBUG)
+    logging.getLogger('detectron.roi_data.loader').setLevel(logging.INFO)
+    np.random.seed(cfg.RNG_SEED)
+    output_dir = tempfile.mkdtemp()
+    # Generate config for test
+    cfg.MODEL.TYPE = 'generalized_rcnn'
+    cfg.MODEL.CONV_BODY = 'FPN.add_fpn_ResNet50_conv5_body'
+    cfg.MODEL.NUM_CLASSES = 81
+    cfg.MODEL.FASTER_RCNN = True
+    cfg.FPN.FPN_ON = True
+    cfg.FPN.MULTILEVEL_ROIS = True
+    cfg.FPN.MULTILEVEL_RPN = True
+    cfg.FAST_RCNN.ROI_BOX_HEAD = 'fast_rcnn_heads.add_roi_2mlp_head'
+    cfg.FAST_RCNN.ROI_XFORM_METHOD = 'RoIAlign'
+    cfg.OUTPUT_DIR = output_dir
+    cfg.TRAIN.DATASETS = ('coco_2014_minival',)
+    cfg.TRAIN.WEIGHTS = b''
+    for num_gpu in range(workspace.NumCudaDevices()):
+        cfg.immutable(False)
+        cfg.NUM_GPUS = num_gpu + 1
+        assert_and_infer_cfg()
+        test_restore_checkpoint()
diff --git a/detectron/tests/test_smooth_l1_loss_op.py b/detectron/tests/test_smooth_l1_loss_op.py
new file mode 100644
index 0000000000000000000000000000000000000000..bd0bdb1af1f0b891473e28696ee712f082046c55
--- /dev/null
+++ b/detectron/tests/test_smooth_l1_loss_op.py
@@ -0,0 +1,80 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import numpy as np
+import unittest
+
+from caffe2.proto import caffe2_pb2
+from caffe2.python import core
+from caffe2.python import gradient_checker
+from caffe2.python import workspace
+
+import detectron.utils.c2 as c2_utils
+import detectron.utils.logging as logging_utils
+
+
+class SmoothL1LossTest(unittest.TestCase):
+    def test_forward_and_gradient(self):
+        Y = np.random.randn(128, 4 * 21).astype(np.float32)
+        Y_hat = np.random.randn(128, 4 * 21).astype(np.float32)
+        inside_weights = np.random.randn(128, 4 * 21).astype(np.float32)
+        inside_weights[inside_weights < 0] = 0
+        outside_weights = np.random.randn(128, 4 * 21).astype(np.float32)
+        outside_weights[outside_weights < 0] = 0
+        scale = np.random.random()
+        beta = np.random.random()
+
+        op = core.CreateOperator(
+            'SmoothL1Loss', ['Y_hat', 'Y', 'inside_weights', 'outside_weights'],
+            ['loss'],
+            scale=scale,
+            beta=beta
+        )
+
+        gc = gradient_checker.GradientChecker(
+            stepsize=0.005,
+            threshold=0.005,
+            device_option=core.DeviceOption(caffe2_pb2.CUDA, 0)
+        )
+
+        res, grad, grad_estimated = gc.CheckSimple(
+            op, [Y_hat, Y, inside_weights, outside_weights], 0, [0]
+        )
+
+        self.assertTrue(
+            grad.shape == grad_estimated.shape,
+            'Fail check: grad.shape != grad_estimated.shape'
+        )
+
+        # To inspect the gradient and estimated gradient:
+        # np.set_printoptions(precision=3, suppress=True)
+        # print('grad:')
+        # print(grad)
+        # print('grad_estimated:')
+        # print(grad_estimated)
+
+        self.assertTrue(res)
+
+
+if __name__ == '__main__':
+    c2_utils.import_detectron_ops()
+    assert 'SmoothL1Loss' in workspace.RegisteredOperators()
+    logging_utils.setup_logging(__name__)
+    unittest.main()
diff --git a/detectron/tests/test_spatial_narrow_as_op.py b/detectron/tests/test_spatial_narrow_as_op.py
new file mode 100644
index 0000000000000000000000000000000000000000..ca101aa9b2715d66f36b7847ed1ed8f83f13c872
--- /dev/null
+++ b/detectron/tests/test_spatial_narrow_as_op.py
@@ -0,0 +1,91 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import numpy as np
+import unittest
+
+from caffe2.proto import caffe2_pb2
+from caffe2.python import core
+from caffe2.python import gradient_checker
+from caffe2.python import workspace
+
+import detectron.utils.c2 as c2_utils
+import detectron.utils.logging as logging_utils
+
+
+class SpatialNarrowAsOpTest(unittest.TestCase):
+    def _run_test(self, A, B, check_grad=False):
+        with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)):
+            op = core.CreateOperator('SpatialNarrowAs', ['A', 'B'], ['C'])
+            workspace.FeedBlob('A', A)
+            workspace.FeedBlob('B', B)
+        workspace.RunOperatorOnce(op)
+        C = workspace.FetchBlob('C')
+
+        if check_grad:
+            gc = gradient_checker.GradientChecker(
+                stepsize=0.005,
+                threshold=0.005,
+                device_option=core.DeviceOption(caffe2_pb2.CUDA, 0)
+            )
+
+            res, grad, grad_estimated = gc.CheckSimple(op, [A, B], 0, [0])
+            self.assertTrue(res, 'Grad check failed')
+
+        dims = C.shape
+        C_ref = A[:dims[0], :dims[1], :dims[2], :dims[3]]
+        np.testing.assert_allclose(C, C_ref, rtol=1e-5, atol=1e-08)
+
+    def test_small_forward_and_gradient(self):
+        A = np.random.randn(2, 3, 5, 7).astype(np.float32)
+        B = np.random.randn(2, 3, 2, 2).astype(np.float32)
+        self._run_test(A, B, check_grad=True)
+
+        A = np.random.randn(2, 3, 5, 7).astype(np.float32)
+        B = np.random.randn(2, 3, 5).astype(np.float32)
+        self._run_test(A, B, check_grad=True)
+
+    def test_large_forward(self):
+        A = np.random.randn(2, 256, 42, 100).astype(np.float32)
+        B = np.random.randn(2, 256, 35, 87).astype(np.float32)
+        self._run_test(A, B)
+
+        A = np.random.randn(2, 256, 42, 87).astype(np.float32)
+        B = np.random.randn(2, 256, 35, 87).astype(np.float32)
+        self._run_test(A, B)
+
+    def test_size_exceptions(self):
+        A = np.random.randn(2, 256, 42, 86).astype(np.float32)
+        B = np.random.randn(2, 256, 35, 87).astype(np.float32)
+        with self.assertRaises(RuntimeError):
+            self._run_test(A, B)
+
+        A = np.random.randn(2, 255, 42, 88).astype(np.float32)
+        B = np.random.randn(2, 256, 35, 87).astype(np.float32)
+        with self.assertRaises(RuntimeError):
+            self._run_test(A, B)
+
+
+if __name__ == '__main__':
+    workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
+    c2_utils.import_detectron_ops()
+    assert 'SpatialNarrowAs' in workspace.RegisteredOperators()
+    logging_utils.setup_logging(__name__)
+    unittest.main()
diff --git a/detectron/tests/test_zero_even_op.py b/detectron/tests/test_zero_even_op.py
new file mode 100644
index 0000000000000000000000000000000000000000..82076a8a9043b4c635905a3ff5817765da214beb
--- /dev/null
+++ b/detectron/tests/test_zero_even_op.py
@@ -0,0 +1,127 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import numpy as np
+import unittest
+
+from caffe2.proto import caffe2_pb2
+from caffe2.python import core
+from caffe2.python import workspace
+
+import detectron.utils.c2 as c2_utils
+
+
+class ZeroEvenOpTest(unittest.TestCase):
+
+    def _run_zero_even_op(self, X):
+        op = core.CreateOperator('ZeroEven', ['X'], ['Y'])
+        workspace.FeedBlob('X', X)
+        workspace.RunOperatorOnce(op)
+        Y = workspace.FetchBlob('Y')
+        return Y
+
+    def _run_zero_even_op_gpu(self, X):
+        with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)):
+            op = core.CreateOperator('ZeroEven', ['X'], ['Y'])
+            workspace.FeedBlob('X', X)
+        workspace.RunOperatorOnce(op)
+        Y = workspace.FetchBlob('Y')
+        return Y
+
+    def test_throws_on_non_1D_arrays(self):
+        X = np.zeros((2, 2), dtype=np.float32)
+        with self.assertRaisesRegexp(RuntimeError, 'X\.ndim\(\) == 1'):
+            self._run_zero_even_op(X)
+
+    def test_handles_empty_arrays(self):
+        X = np.array([], dtype=np.float32)
+        Y_exp = np.copy(X)
+        Y_act = self._run_zero_even_op(X)
+        np.testing.assert_allclose(Y_act, Y_exp)
+
+    def test_sets_vals_at_even_inds_to_zero(self):
+        X = np.array([0, 1, 2, 3, 4], dtype=np.float32)
+        Y_exp = np.array([0, 1, 0, 3, 0], dtype=np.float32)
+        Y_act = self._run_zero_even_op(X)
+        np.testing.assert_allclose(Y_act[0::2], Y_exp[0::2])
+
+    def test_preserves_vals_at_odd_inds(self):
+        X = np.array([0, 1, 2, 3, 4], dtype=np.float32)
+        Y_exp = np.array([0, 1, 0, 3, 0], dtype=np.float32)
+        Y_act = self._run_zero_even_op(X)
+        np.testing.assert_allclose(Y_act[1::2], Y_exp[1::2])
+
+    def test_handles_even_length_arrays(self):
+        X = np.random.rand(64).astype(np.float32)
+        Y_exp = np.copy(X)
+        Y_exp[0::2] = 0.0
+        Y_act = self._run_zero_even_op(X)
+        np.testing.assert_allclose(Y_act, Y_exp)
+
+    def test_handles_odd_length_arrays(self):
+        X = np.random.randn(77).astype(np.float32)
+        Y_exp = np.copy(X)
+        Y_exp[0::2] = 0.0
+        Y_act = self._run_zero_even_op(X)
+        np.testing.assert_allclose(Y_act, Y_exp)
+
+    def test_gpu_throws_on_non_1D_arrays(self):
+        X = np.zeros((2, 2), dtype=np.float32)
+        with self.assertRaisesRegexp(RuntimeError, 'X\.ndim\(\) == 1'):
+            self._run_zero_even_op_gpu(X)
+
+    def test_gpu_handles_empty_arrays(self):
+        X = np.array([], dtype=np.float32)
+        Y_exp = np.copy(X)
+        Y_act = self._run_zero_even_op_gpu(X)
+        np.testing.assert_allclose(Y_act, Y_exp)
+
+    def test_gpu_sets_vals_at_even_inds_to_zero(self):
+        X = np.array([0, 1, 2, 3, 4], dtype=np.float32)
+        Y_exp = np.array([0, 1, 0, 3, 0], dtype=np.float32)
+        Y_act = self._run_zero_even_op_gpu(X)
+        np.testing.assert_allclose(Y_act[0::2], Y_exp[0::2])
+
+    def test_gpu_preserves_vals_at_odd_inds(self):
+        X = np.array([0, 1, 2, 3, 4], dtype=np.float32)
+        Y_exp = np.array([0, 1, 0, 3, 0], dtype=np.float32)
+        Y_act = self._run_zero_even_op_gpu(X)
+        np.testing.assert_allclose(Y_act[1::2], Y_exp[1::2])
+
+    def test_gpu_handles_even_length_arrays(self):
+        X = np.random.rand(64).astype(np.float32)
+        Y_exp = np.copy(X)
+        Y_exp[0::2] = 0.0
+        Y_act = self._run_zero_even_op_gpu(X)
+        np.testing.assert_allclose(Y_act, Y_exp)
+
+    def test_gpu_handles_odd_length_arrays(self):
+        X = np.random.randn(77).astype(np.float32)
+        Y_exp = np.copy(X)
+        Y_exp[0::2] = 0.0
+        Y_act = self._run_zero_even_op_gpu(X)
+        np.testing.assert_allclose(Y_act, Y_exp)
+
+
+if __name__ == '__main__':
+    workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
+    c2_utils.import_custom_ops()
+    assert 'ZeroEven' in workspace.RegisteredOperators()
+    unittest.main()
diff --git a/detectron/utils/__init__.py b/detectron/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/detectron/utils/blob.py b/detectron/utils/blob.py
new file mode 100644
index 0000000000000000000000000000000000000000..1cf04baf4f8049d4b1e69f977bf6820b80fba1d1
--- /dev/null
+++ b/detectron/utils/blob.py
@@ -0,0 +1,181 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+#
+# Based on:
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+
+"""Caffe2 blob helper functions."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import cv2
+import numpy as np
+from six.moves import cPickle as pickle
+
+from caffe2.proto import caffe2_pb2
+
+from detectron.core.config import cfg
+
+
+def get_image_blob(im, target_scale, target_max_size):
+    """Convert an image into a network input.
+
+    Arguments:
+        im (ndarray): a color image in BGR order
+
+    Returns:
+        blob (ndarray): a data blob holding an image pyramid
+        im_scale (float): image scale (target size) / (original size)
+        im_info (ndarray)
+    """
+    processed_im, im_scale = prep_im_for_blob(
+        im, cfg.PIXEL_MEANS, target_scale, target_max_size
+    )
+    blob = im_list_to_blob(processed_im)
+    # NOTE: this height and width may be larger than actual scaled input image
+    # due to the FPN.COARSEST_STRIDE related padding in im_list_to_blob. We are
+    # maintaining this behavior for now to make existing results exactly
+    # reproducible (in practice using the true input image height and width
+    # yields nearly the same results, but they are sometimes slightly different
+    # because predictions near the edge of the image will be pruned more
+    # aggressively).
+    height, width = blob.shape[2], blob.shape[3]
+    im_info = np.hstack((height, width, im_scale))[np.newaxis, :]
+    return blob, im_scale, im_info.astype(np.float32)
+
+
+def im_list_to_blob(ims):
+    """Convert a list of images into a network input. Assumes images were
+    prepared using prep_im_for_blob or equivalent: i.e.
+      - BGR channel order
+      - pixel means subtracted
+      - resized to the desired input size
+      - float32 numpy ndarray format
+    Output is a 4D HCHW tensor of the images concatenated along axis 0 with
+    shape.
+    """
+    if not isinstance(ims, list):
+        ims = [ims]
+    max_shape = np.array([im.shape for im in ims]).max(axis=0)
+    # Pad the image so they can be divisible by a stride
+    if cfg.FPN.FPN_ON:
+        stride = float(cfg.FPN.COARSEST_STRIDE)
+        max_shape[0] = int(np.ceil(max_shape[0] / stride) * stride)
+        max_shape[1] = int(np.ceil(max_shape[1] / stride) * stride)
+
+    num_images = len(ims)
+    blob = np.zeros(
+        (num_images, max_shape[0], max_shape[1], 3), dtype=np.float32
+    )
+    for i in range(num_images):
+        im = ims[i]
+        blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
+    # Move channels (axis 3) to axis 1
+    # Axis order will become: (batch elem, channel, height, width)
+    channel_swap = (0, 3, 1, 2)
+    blob = blob.transpose(channel_swap)
+    return blob
+
+
+def prep_im_for_blob(im, pixel_means, target_size, max_size):
+    """Prepare an image for use as a network input blob. Specially:
+      - Subtract per-channel pixel mean
+      - Convert to float32
+      - Rescale to each of the specified target size (capped at max_size)
+    Returns a list of transformed images, one for each target size. Also returns
+    the scale factors that were used to compute each returned image.
+    """
+    im = im.astype(np.float32, copy=False)
+    im -= pixel_means
+    im_shape = im.shape
+    im_size_min = np.min(im_shape[0:2])
+    im_size_max = np.max(im_shape[0:2])
+    im_scale = float(target_size) / float(im_size_min)
+    # Prevent the biggest axis from being more than max_size
+    if np.round(im_scale * im_size_max) > max_size:
+        im_scale = float(max_size) / float(im_size_max)
+    im = cv2.resize(
+        im,
+        None,
+        None,
+        fx=im_scale,
+        fy=im_scale,
+        interpolation=cv2.INTER_LINEAR
+    )
+    return im, im_scale
+
+
+def zeros(shape, int32=False):
+    """Return a blob of all zeros of the given shape with the correct float or
+    int data type.
+    """
+    return np.zeros(shape, dtype=np.int32 if int32 else np.float32)
+
+
+def ones(shape, int32=False):
+    """Return a blob of all ones of the given shape with the correct float or
+    int data type.
+    """
+    return np.ones(shape, dtype=np.int32 if int32 else np.float32)
+
+
+def py_op_copy_blob(blob_in, blob_out):
+    """Copy a numpy ndarray given as blob_in into the Caffe2 CPUTensor blob
+    given as blob_out. Supports float32 and int32 blob data types. This function
+    is intended for copying numpy data into a Caffe2 blob in PythonOps.
+    """
+    # Some awkward voodoo required by Caffe2 to support int32 blobs
+    needs_int32_init = False
+    try:
+        _ = blob.data.dtype  # noqa
+    except Exception:
+        needs_int32_init = blob_in.dtype == np.int32
+    if needs_int32_init:
+        # init can only take a list (failed on tuple)
+        blob_out.init(list(blob_in.shape), caffe2_pb2.TensorProto.INT32)
+    else:
+        blob_out.reshape(blob_in.shape)
+    blob_out.data[...] = blob_in
+
+
+def get_loss_gradients(model, loss_blobs):
+    """Generate a gradient of 1 for each loss specified in 'loss_blobs'"""
+    loss_gradients = {}
+    for b in loss_blobs:
+        loss_grad = model.net.ConstantFill(b, [b + '_grad'], value=1.0)
+        loss_gradients[str(b)] = str(loss_grad)
+    return loss_gradients
+
+
+def serialize(obj):
+    """Serialize a Python object using pickle and encode it as an array of
+    float32 values so that it can be feed into the workspace. See deserialize().
+    """
+    return np.fromstring(pickle.dumps(obj), dtype=np.uint8).astype(np.float32)
+
+
+def deserialize(arr):
+    """Unserialize a Python object from an array of float32 values fetched from
+    a workspace. See serialize().
+    """
+    return pickle.loads(arr.astype(np.uint8).tobytes())
diff --git a/detectron/utils/boxes.py b/detectron/utils/boxes.py
new file mode 100644
index 0000000000000000000000000000000000000000..7564aabc8a492c96099d07a0e62fc6cba7458645
--- /dev/null
+++ b/detectron/utils/boxes.py
@@ -0,0 +1,338 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+#
+# Based on:
+# --------------------------------------------------------
+# Fast/er R-CNN
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+
+"""Box manipulation functions. The internal Detectron box format is
+[x1, y1, x2, y2] where (x1, y1) specify the top-left box corner and (x2, y2)
+specify the bottom-right box corner. Boxes from external sources, e.g.,
+datasets, may be in other formats (such as [x, y, w, h]) and require conversion.
+
+This module uses a convention that may seem strange at first: the width of a box
+is computed as x2 - x1 + 1 (likewise for height). The "+ 1" dates back to old
+object detection days when the coordinates were integer pixel indices, rather
+than floating point coordinates in a subpixel coordinate frame. A box with x2 =
+x1 and y2 = y1 was taken to include a single pixel, having a width of 1, and
+hence requiring the "+ 1". Now, most datasets will likely provide boxes with
+floating point coordinates and the width should be more reasonably computed as
+x2 - x1.
+
+In practice, as long as a model is trained and tested with a consistent
+convention either decision seems to be ok (at least in our experience on COCO).
+Since we have a long history of training models with the "+ 1" convention, we
+are reluctant to change it even if our modern tastes prefer not to use it.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import numpy as np
+
+from detectron.core.config import cfg
+import detectron.utils.cython_bbox as cython_bbox
+import detectron.utils.cython_nms as cython_nms
+
+bbox_overlaps = cython_bbox.bbox_overlaps
+
+
+def boxes_area(boxes):
+    """Compute the area of an array of boxes."""
+    w = (boxes[:, 2] - boxes[:, 0] + 1)
+    h = (boxes[:, 3] - boxes[:, 1] + 1)
+    areas = w * h
+    assert np.all(areas >= 0), 'Negative areas founds'
+    return areas
+
+
+def unique_boxes(boxes, scale=1.0):
+    """Return indices of unique boxes."""
+    v = np.array([1, 1e3, 1e6, 1e9])
+    hashes = np.round(boxes * scale).dot(v)
+    _, index = np.unique(hashes, return_index=True)
+    return np.sort(index)
+
+
+def xywh_to_xyxy(xywh):
+    """Convert [x1 y1 w h] box format to [x1 y1 x2 y2] format."""
+    if isinstance(xywh, (list, tuple)):
+        # Single box given as a list of coordinates
+        assert len(xywh) == 4
+        x1, y1 = xywh[0], xywh[1]
+        x2 = x1 + np.maximum(0., xywh[2] - 1.)
+        y2 = y1 + np.maximum(0., xywh[3] - 1.)
+        return (x1, y1, x2, y2)
+    elif isinstance(xywh, np.ndarray):
+        # Multiple boxes given as a 2D ndarray
+        return np.hstack(
+            (xywh[:, 0:2], xywh[:, 0:2] + np.maximum(0, xywh[:, 2:4] - 1))
+        )
+    else:
+        raise TypeError('Argument xywh must be a list, tuple, or numpy array.')
+
+
+def xyxy_to_xywh(xyxy):
+    """Convert [x1 y1 x2 y2] box format to [x1 y1 w h] format."""
+    if isinstance(xyxy, (list, tuple)):
+        # Single box given as a list of coordinates
+        assert len(xyxy) == 4
+        x1, y1 = xyxy[0], xyxy[1]
+        w = xyxy[2] - x1 + 1
+        h = xyxy[3] - y1 + 1
+        return (x1, y1, w, h)
+    elif isinstance(xyxy, np.ndarray):
+        # Multiple boxes given as a 2D ndarray
+        return np.hstack((xyxy[:, 0:2], xyxy[:, 2:4] - xyxy[:, 0:2] + 1))
+    else:
+        raise TypeError('Argument xyxy must be a list, tuple, or numpy array.')
+
+
+def filter_small_boxes(boxes, min_size):
+    """Keep boxes with width and height both greater than min_size."""
+    w = boxes[:, 2] - boxes[:, 0] + 1
+    h = boxes[:, 3] - boxes[:, 1] + 1
+    keep = np.where((w > min_size) & (h > min_size))[0]
+    return keep
+
+
+def clip_boxes_to_image(boxes, height, width):
+    """Clip an array of boxes to an image with the given height and width."""
+    boxes[:, [0, 2]] = np.minimum(width - 1., np.maximum(0., boxes[:, [0, 2]]))
+    boxes[:, [1, 3]] = np.minimum(height - 1., np.maximum(0., boxes[:, [1, 3]]))
+    return boxes
+
+
+def clip_xyxy_to_image(x1, y1, x2, y2, height, width):
+    """Clip coordinates to an image with the given height and width."""
+    x1 = np.minimum(width - 1., np.maximum(0., x1))
+    y1 = np.minimum(height - 1., np.maximum(0., y1))
+    x2 = np.minimum(width - 1., np.maximum(0., x2))
+    y2 = np.minimum(height - 1., np.maximum(0., y2))
+    return x1, y1, x2, y2
+
+
+def clip_tiled_boxes(boxes, im_shape):
+    """Clip boxes to image boundaries. im_shape is [height, width] and boxes
+    has shape (N, 4 * num_tiled_boxes)."""
+    assert boxes.shape[1] % 4 == 0, \
+        'boxes.shape[1] is {:d}, but must be divisible by 4.'.format(
+        boxes.shape[1]
+    )
+    # x1 >= 0
+    boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
+    # y1 >= 0
+    boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
+    # x2 < im_shape[1]
+    boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
+    # y2 < im_shape[0]
+    boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
+    return boxes
+
+
+def bbox_transform(boxes, deltas, weights=(1.0, 1.0, 1.0, 1.0)):
+    """Forward transform that maps proposal boxes to predicted ground-truth
+    boxes using bounding-box regression deltas. See bbox_transform_inv for a
+    description of the weights argument.
+    """
+    if boxes.shape[0] == 0:
+        return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
+
+    boxes = boxes.astype(deltas.dtype, copy=False)
+
+    widths = boxes[:, 2] - boxes[:, 0] + 1.0
+    heights = boxes[:, 3] - boxes[:, 1] + 1.0
+    ctr_x = boxes[:, 0] + 0.5 * widths
+    ctr_y = boxes[:, 1] + 0.5 * heights
+
+    wx, wy, ww, wh = weights
+    dx = deltas[:, 0::4] / wx
+    dy = deltas[:, 1::4] / wy
+    dw = deltas[:, 2::4] / ww
+    dh = deltas[:, 3::4] / wh
+
+    # Prevent sending too large values into np.exp()
+    dw = np.minimum(dw, cfg.BBOX_XFORM_CLIP)
+    dh = np.minimum(dh, cfg.BBOX_XFORM_CLIP)
+
+    pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
+    pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
+    pred_w = np.exp(dw) * widths[:, np.newaxis]
+    pred_h = np.exp(dh) * heights[:, np.newaxis]
+
+    pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
+    # x1
+    pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
+    # y1
+    pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
+    # x2 (note: "- 1" is correct; don't be fooled by the asymmetry)
+    pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1
+    # y2 (note: "- 1" is correct; don't be fooled by the asymmetry)
+    pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1
+
+    return pred_boxes
+
+
+def bbox_transform_inv(boxes, gt_boxes, weights=(1.0, 1.0, 1.0, 1.0)):
+    """Inverse transform that computes target bounding-box regression deltas
+    given proposal boxes and ground-truth boxes. The weights argument should be
+    a 4-tuple of multiplicative weights that are applied to the regression
+    target.
+
+    In older versions of this code (and in py-faster-rcnn), the weights were set
+    such that the regression deltas would have unit standard deviation on the
+    training dataset. Presently, rather than computing these statistics exactly,
+    we use a fixed set of weights (10., 10., 5., 5.) by default. These are
+    approximately the weights one would get from COCO using the previous unit
+    stdev heuristic.
+    """
+    ex_widths = boxes[:, 2] - boxes[:, 0] + 1.0
+    ex_heights = boxes[:, 3] - boxes[:, 1] + 1.0
+    ex_ctr_x = boxes[:, 0] + 0.5 * ex_widths
+    ex_ctr_y = boxes[:, 1] + 0.5 * ex_heights
+
+    gt_widths = gt_boxes[:, 2] - gt_boxes[:, 0] + 1.0
+    gt_heights = gt_boxes[:, 3] - gt_boxes[:, 1] + 1.0
+    gt_ctr_x = gt_boxes[:, 0] + 0.5 * gt_widths
+    gt_ctr_y = gt_boxes[:, 1] + 0.5 * gt_heights
+
+    wx, wy, ww, wh = weights
+    targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths
+    targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights
+    targets_dw = ww * np.log(gt_widths / ex_widths)
+    targets_dh = wh * np.log(gt_heights / ex_heights)
+
+    targets = np.vstack((targets_dx, targets_dy, targets_dw,
+                         targets_dh)).transpose()
+    return targets
+
+
+def expand_boxes(boxes, scale):
+    """Expand an array of boxes by a given scale."""
+    w_half = (boxes[:, 2] - boxes[:, 0]) * .5
+    h_half = (boxes[:, 3] - boxes[:, 1]) * .5
+    x_c = (boxes[:, 2] + boxes[:, 0]) * .5
+    y_c = (boxes[:, 3] + boxes[:, 1]) * .5
+
+    w_half *= scale
+    h_half *= scale
+
+    boxes_exp = np.zeros(boxes.shape)
+    boxes_exp[:, 0] = x_c - w_half
+    boxes_exp[:, 2] = x_c + w_half
+    boxes_exp[:, 1] = y_c - h_half
+    boxes_exp[:, 3] = y_c + h_half
+
+    return boxes_exp
+
+
+def flip_boxes(boxes, im_width):
+    """Flip boxes horizontally."""
+    boxes_flipped = boxes.copy()
+    boxes_flipped[:, 0::4] = im_width - boxes[:, 2::4] - 1
+    boxes_flipped[:, 2::4] = im_width - boxes[:, 0::4] - 1
+    return boxes_flipped
+
+
+def aspect_ratio(boxes, aspect_ratio):
+    """Perform width-relative aspect ratio transformation."""
+    boxes_ar = boxes.copy()
+    boxes_ar[:, 0::4] = aspect_ratio * boxes[:, 0::4]
+    boxes_ar[:, 2::4] = aspect_ratio * boxes[:, 2::4]
+    return boxes_ar
+
+
+def box_voting(top_dets, all_dets, thresh, scoring_method='ID', beta=1.0):
+    """Apply bounding-box voting to refine `top_dets` by voting with `all_dets`.
+    See: https://arxiv.org/abs/1505.01749. Optional score averaging (not in the
+    referenced  paper) can be applied by setting `scoring_method` appropriately.
+    """
+    # top_dets is [N, 5] each row is [x1 y1 x2 y2, sore]
+    # all_dets is [N, 5] each row is [x1 y1 x2 y2, sore]
+    top_dets_out = top_dets.copy()
+    top_boxes = top_dets[:, :4]
+    all_boxes = all_dets[:, :4]
+    all_scores = all_dets[:, 4]
+    top_to_all_overlaps = bbox_overlaps(top_boxes, all_boxes)
+    for k in range(top_dets_out.shape[0]):
+        inds_to_vote = np.where(top_to_all_overlaps[k] >= thresh)[0]
+        boxes_to_vote = all_boxes[inds_to_vote, :]
+        ws = all_scores[inds_to_vote]
+        top_dets_out[k, :4] = np.average(boxes_to_vote, axis=0, weights=ws)
+        if scoring_method == 'ID':
+            # Identity, nothing to do
+            pass
+        elif scoring_method == 'TEMP_AVG':
+            # Average probabilities (considered as P(detected class) vs.
+            # P(not the detected class)) after smoothing with a temperature
+            # hyperparameter.
+            P = np.vstack((ws, 1.0 - ws))
+            P_max = np.max(P, axis=0)
+            X = np.log(P / P_max)
+            X_exp = np.exp(X / beta)
+            P_temp = X_exp / np.sum(X_exp, axis=0)
+            P_avg = P_temp[0].mean()
+            top_dets_out[k, 4] = P_avg
+        elif scoring_method == 'AVG':
+            # Combine new probs from overlapping boxes
+            top_dets_out[k, 4] = ws.mean()
+        elif scoring_method == 'IOU_AVG':
+            P = ws
+            ws = top_to_all_overlaps[k, inds_to_vote]
+            P_avg = np.average(P, weights=ws)
+            top_dets_out[k, 4] = P_avg
+        elif scoring_method == 'GENERALIZED_AVG':
+            P_avg = np.mean(ws**beta)**(1.0 / beta)
+            top_dets_out[k, 4] = P_avg
+        elif scoring_method == 'QUASI_SUM':
+            top_dets_out[k, 4] = ws.sum() / float(len(ws))**beta
+        else:
+            raise NotImplementedError(
+                'Unknown scoring method {}'.format(scoring_method)
+            )
+
+    return top_dets_out
+
+
+def nms(dets, thresh):
+    """Apply classic DPM-style greedy NMS."""
+    if dets.shape[0] == 0:
+        return []
+    return cython_nms.nms(dets, thresh)
+
+
+def soft_nms(
+    dets, sigma=0.5, overlap_thresh=0.3, score_thresh=0.001, method='linear'
+):
+    """Apply the soft NMS algorithm from https://arxiv.org/abs/1704.04503."""
+    if dets.shape[0] == 0:
+        return dets, []
+
+    methods = {'hard': 0, 'linear': 1, 'gaussian': 2}
+    assert method in methods, 'Unknown soft_nms method: {}'.format(method)
+
+    dets, keep = cython_nms.soft_nms(
+        np.ascontiguousarray(dets, dtype=np.float32),
+        np.float32(sigma),
+        np.float32(overlap_thresh),
+        np.float32(score_thresh),
+        np.uint8(methods[method])
+    )
+    return dets, keep
diff --git a/detectron/utils/c2.py b/detectron/utils/c2.py
new file mode 100644
index 0000000000000000000000000000000000000000..ba0cb3ee8afe728b8c352f0b88e59fb6b261fc8b
--- /dev/null
+++ b/detectron/utils/c2.py
@@ -0,0 +1,166 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Helpful utilities for working with Caffe2."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from six import string_types
+import contextlib
+import subprocess
+
+from caffe2.proto import caffe2_pb2
+from caffe2.python import core
+from caffe2.python import dyndep
+from caffe2.python import scope
+from caffe2.python import workspace
+
+import detectron.utils.env as envu
+
+
+def import_contrib_ops():
+    """Import contrib ops needed by Detectron."""
+    envu.import_nccl_ops()
+
+
+def import_detectron_ops():
+    """Import Detectron ops."""
+    detectron_ops_lib = envu.get_detectron_ops_lib()
+    dyndep.InitOpsLibrary(detectron_ops_lib)
+
+
+def import_custom_ops():
+    """Import custom ops."""
+    custom_ops_lib = envu.get_custom_ops_lib()
+    dyndep.InitOpsLibrary(custom_ops_lib)
+
+
+def SuffixNet(name, net, prefix_len, outputs):
+    """Returns a new Net from the given Net (`net`) that includes only the ops
+    after removing the first `prefix_len` number of ops. The new Net is thus a
+    suffix of `net`. Blobs listed in `outputs` are registered as external output
+    blobs.
+    """
+    outputs = BlobReferenceList(outputs)
+    for output in outputs:
+        assert net.BlobIsDefined(output)
+    new_net = net.Clone(name)
+
+    del new_net.Proto().op[:]
+    del new_net.Proto().external_input[:]
+    del new_net.Proto().external_output[:]
+
+    # Add suffix ops
+    new_net.Proto().op.extend(net.Proto().op[prefix_len:])
+    # Add external input blobs
+    # Treat any undefined blobs as external inputs
+    input_names = [
+        i for op in new_net.Proto().op for i in op.input
+        if not new_net.BlobIsDefined(i)]
+    new_net.Proto().external_input.extend(input_names)
+    # Add external output blobs
+    output_names = [str(o) for o in outputs]
+    new_net.Proto().external_output.extend(output_names)
+    return new_net, [new_net.GetBlobRef(o) for o in output_names]
+
+
+def BlobReferenceList(blob_ref_or_list):
+    """Ensure that the argument is returned as a list of BlobReferences."""
+    if isinstance(blob_ref_or_list, core.BlobReference):
+        return [blob_ref_or_list]
+    elif type(blob_ref_or_list) in (list, tuple):
+        for b in blob_ref_or_list:
+            assert isinstance(b, core.BlobReference)
+        return blob_ref_or_list
+    else:
+        raise TypeError(
+            'blob_ref_or_list must be a BlobReference or a list/tuple of '
+            'BlobReferences'
+        )
+
+
+def UnscopeName(possibly_scoped_name):
+    """Remove any name scoping from a (possibly) scoped name. For example,
+    convert the name 'gpu_0/foo' to 'foo'."""
+    assert isinstance(possibly_scoped_name, string_types)
+    return possibly_scoped_name[
+        possibly_scoped_name.rfind(scope._NAMESCOPE_SEPARATOR) + 1:]
+
+
+@contextlib.contextmanager
+def NamedCudaScope(gpu_id):
+    """Creates a GPU name scope and CUDA device scope. This function is provided
+    to reduce `with ...` nesting levels."""
+    with GpuNameScope(gpu_id):
+        with CudaScope(gpu_id):
+            yield
+
+
+@contextlib.contextmanager
+def GpuNameScope(gpu_id):
+    """Create a name scope for GPU device `gpu_id`."""
+    with core.NameScope('gpu_{:d}'.format(gpu_id)):
+        yield
+
+
+@contextlib.contextmanager
+def CudaScope(gpu_id):
+    """Create a CUDA device scope for GPU device `gpu_id`."""
+    gpu_dev = CudaDevice(gpu_id)
+    with core.DeviceScope(gpu_dev):
+        yield
+
+
+@contextlib.contextmanager
+def CpuScope():
+    """Create a CPU device scope."""
+    cpu_dev = core.DeviceOption(caffe2_pb2.CPU)
+    with core.DeviceScope(cpu_dev):
+        yield
+
+
+def CudaDevice(gpu_id):
+    """Create a Cuda device."""
+    return core.DeviceOption(caffe2_pb2.CUDA, gpu_id)
+
+
+def gauss_fill(std):
+    """Gaussian fill helper to reduce verbosity."""
+    return ('GaussianFill', {'std': std})
+
+
+def const_fill(value):
+    """Constant fill helper to reduce verbosity."""
+    return ('ConstantFill', {'value': value})
+
+
+def get_nvidia_info():
+    return (
+        get_nvidia_smi_output(),
+        workspace.GetCUDAVersion(),
+        workspace.GetCuDNNVersion(),
+    )
+
+
+def get_nvidia_smi_output():
+    try:
+        info = subprocess.check_output(["nvidia-smi"], stderr=subprocess.STDOUT)
+        info = info.decode("utf8")
+    except Exception as e:
+        info = "Executing nvidia-smi failed: " + str(e)
+    return info.strip()
diff --git a/detectron/utils/collections.py b/detectron/utils/collections.py
new file mode 100644
index 0000000000000000000000000000000000000000..e62ce79f80512407d2beed1657be1a889fd037bd
--- /dev/null
+++ b/detectron/utils/collections.py
@@ -0,0 +1,66 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""A simple attribute dictionary used for representing configuration options."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+
+class AttrDict(dict):
+
+    IMMUTABLE = '__immutable__'
+
+    def __init__(self, *args, **kwargs):
+        super(AttrDict, self).__init__(*args, **kwargs)
+        self.__dict__[AttrDict.IMMUTABLE] = False
+
+    def __getattr__(self, name):
+        if name in self.__dict__:
+            return self.__dict__[name]
+        elif name in self:
+            return self[name]
+        else:
+            raise AttributeError(name)
+
+    def __setattr__(self, name, value):
+        if not self.__dict__[AttrDict.IMMUTABLE]:
+            if name in self.__dict__:
+                self.__dict__[name] = value
+            else:
+                self[name] = value
+        else:
+            raise AttributeError(
+                'Attempted to set "{}" to "{}", but AttrDict is immutable'.
+                format(name, value)
+            )
+
+    def immutable(self, is_immutable):
+        """Set immutability to is_immutable and recursively apply the setting
+        to all nested AttrDicts.
+        """
+        self.__dict__[AttrDict.IMMUTABLE] = is_immutable
+        # Recursively set immutable state
+        for v in self.__dict__.values():
+            if isinstance(v, AttrDict):
+                v.immutable(is_immutable)
+        for v in self.values():
+            if isinstance(v, AttrDict):
+                v.immutable(is_immutable)
+
+    def is_immutable(self):
+        return self.__dict__[AttrDict.IMMUTABLE]
diff --git a/detectron/utils/colormap.py b/detectron/utils/colormap.py
new file mode 100644
index 0000000000000000000000000000000000000000..bc6869f289a9c47519ca69bdddba3dd4fa82ea27
--- /dev/null
+++ b/detectron/utils/colormap.py
@@ -0,0 +1,113 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""An awesome colormap for really neat visualizations."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import numpy as np
+
+
+def colormap(rgb=False):
+    color_list = np.array(
+        [
+            0.000, 0.447, 0.741,
+            0.850, 0.325, 0.098,
+            0.929, 0.694, 0.125,
+            0.494, 0.184, 0.556,
+            0.466, 0.674, 0.188,
+            0.301, 0.745, 0.933,
+            0.635, 0.078, 0.184,
+            0.300, 0.300, 0.300,
+            0.600, 0.600, 0.600,
+            1.000, 0.000, 0.000,
+            1.000, 0.500, 0.000,
+            0.749, 0.749, 0.000,
+            0.000, 1.000, 0.000,
+            0.000, 0.000, 1.000,
+            0.667, 0.000, 1.000,
+            0.333, 0.333, 0.000,
+            0.333, 0.667, 0.000,
+            0.333, 1.000, 0.000,
+            0.667, 0.333, 0.000,
+            0.667, 0.667, 0.000,
+            0.667, 1.000, 0.000,
+            1.000, 0.333, 0.000,
+            1.000, 0.667, 0.000,
+            1.000, 1.000, 0.000,
+            0.000, 0.333, 0.500,
+            0.000, 0.667, 0.500,
+            0.000, 1.000, 0.500,
+            0.333, 0.000, 0.500,
+            0.333, 0.333, 0.500,
+            0.333, 0.667, 0.500,
+            0.333, 1.000, 0.500,
+            0.667, 0.000, 0.500,
+            0.667, 0.333, 0.500,
+            0.667, 0.667, 0.500,
+            0.667, 1.000, 0.500,
+            1.000, 0.000, 0.500,
+            1.000, 0.333, 0.500,
+            1.000, 0.667, 0.500,
+            1.000, 1.000, 0.500,
+            0.000, 0.333, 1.000,
+            0.000, 0.667, 1.000,
+            0.000, 1.000, 1.000,
+            0.333, 0.000, 1.000,
+            0.333, 0.333, 1.000,
+            0.333, 0.667, 1.000,
+            0.333, 1.000, 1.000,
+            0.667, 0.000, 1.000,
+            0.667, 0.333, 1.000,
+            0.667, 0.667, 1.000,
+            0.667, 1.000, 1.000,
+            1.000, 0.000, 1.000,
+            1.000, 0.333, 1.000,
+            1.000, 0.667, 1.000,
+            0.167, 0.000, 0.000,
+            0.333, 0.000, 0.000,
+            0.500, 0.000, 0.000,
+            0.667, 0.000, 0.000,
+            0.833, 0.000, 0.000,
+            1.000, 0.000, 0.000,
+            0.000, 0.167, 0.000,
+            0.000, 0.333, 0.000,
+            0.000, 0.500, 0.000,
+            0.000, 0.667, 0.000,
+            0.000, 0.833, 0.000,
+            0.000, 1.000, 0.000,
+            0.000, 0.000, 0.167,
+            0.000, 0.000, 0.333,
+            0.000, 0.000, 0.500,
+            0.000, 0.000, 0.667,
+            0.000, 0.000, 0.833,
+            0.000, 0.000, 1.000,
+            0.000, 0.000, 0.000,
+            0.143, 0.143, 0.143,
+            0.286, 0.286, 0.286,
+            0.429, 0.429, 0.429,
+            0.571, 0.571, 0.571,
+            0.714, 0.714, 0.714,
+            0.857, 0.857, 0.857,
+            1.000, 1.000, 1.000
+        ]
+    ).astype(np.float32)
+    color_list = color_list.reshape((-1, 3)) * 255
+    if not rgb:
+        color_list = color_list[:, ::-1]
+    return color_list
diff --git a/detectron/utils/coordinator.py b/detectron/utils/coordinator.py
new file mode 100644
index 0000000000000000000000000000000000000000..62eb25be2ec9a2635a186e215257eb1a53d1fa53
--- /dev/null
+++ b/detectron/utils/coordinator.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Coordinated access to a shared multithreading/processing queue."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import contextlib
+import logging
+import threading
+import traceback
+from six.moves import queue as Queue
+
+log = logging.getLogger(__name__)
+
+
+class Coordinator(object):
+
+    def __init__(self):
+        self._event = threading.Event()
+
+    def request_stop(self):
+        log.debug('Coordinator stopping')
+        self._event.set()
+
+    def should_stop(self):
+        return self._event.is_set()
+
+    def wait_for_stop(self):
+        return self._event.wait()
+
+    @contextlib.contextmanager
+    def stop_on_exception(self):
+        try:
+            yield
+        except Exception:
+            if not self.should_stop():
+                traceback.print_exc()
+                self.request_stop()
+
+
+def coordinated_get(coordinator, queue):
+    while not coordinator.should_stop():
+        try:
+            return queue.get(block=True, timeout=1.0)
+        except Queue.Empty:
+            continue
+    raise Exception('Coordinator stopped during get()')
+
+
+def coordinated_put(coordinator, queue, element):
+    while not coordinator.should_stop():
+        try:
+            queue.put(element, block=True, timeout=1.0)
+            return
+        except Queue.Full:
+            continue
+    raise Exception('Coordinator stopped during put()')
diff --git a/detectron/utils/cython_bbox.pyx b/detectron/utils/cython_bbox.pyx
new file mode 100644
index 0000000000000000000000000000000000000000..4c1f015f96fd08441f3e90835767e3ea9adfa25d
--- /dev/null
+++ b/detectron/utils/cython_bbox.pyx
@@ -0,0 +1,73 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+#
+# Based on:
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Sergey Karayev
+# --------------------------------------------------------
+
+cimport cython
+import numpy as np
+cimport numpy as np
+
+DTYPE = np.float32
+ctypedef np.float32_t DTYPE_t
+
+@cython.boundscheck(False)
+def bbox_overlaps(
+        np.ndarray[DTYPE_t, ndim=2] boxes,
+        np.ndarray[DTYPE_t, ndim=2] query_boxes):
+    """
+    Parameters
+    ----------
+    boxes: (N, 4) ndarray of float
+    query_boxes: (K, 4) ndarray of float
+    Returns
+    -------
+    overlaps: (N, K) ndarray of overlap between boxes and query_boxes
+    """
+    cdef unsigned int N = boxes.shape[0]
+    cdef unsigned int K = query_boxes.shape[0]
+    cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
+    cdef DTYPE_t iw, ih, box_area
+    cdef DTYPE_t ua
+    cdef unsigned int k, n
+    with nogil:
+        for k in range(K):
+            box_area = (
+                (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
+                (query_boxes[k, 3] - query_boxes[k, 1] + 1)
+            )
+            for n in range(N):
+                iw = (
+                    min(boxes[n, 2], query_boxes[k, 2]) -
+                    max(boxes[n, 0], query_boxes[k, 0]) + 1
+                )
+                if iw > 0:
+                    ih = (
+                        min(boxes[n, 3], query_boxes[k, 3]) -
+                        max(boxes[n, 1], query_boxes[k, 1]) + 1
+                    )
+                    if ih > 0:
+                        ua = float(
+                            (boxes[n, 2] - boxes[n, 0] + 1) *
+                            (boxes[n, 3] - boxes[n, 1] + 1) +
+                            box_area - iw * ih
+                        )
+                        overlaps[n, k] = iw * ih / ua
+    return overlaps
diff --git a/detectron/utils/cython_nms.pyx b/detectron/utils/cython_nms.pyx
new file mode 100644
index 0000000000000000000000000000000000000000..0c1785b4f97d1b315b16700ef0cd59cc77af3fe2
--- /dev/null
+++ b/detectron/utils/cython_nms.pyx
@@ -0,0 +1,203 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+#
+# Based on:
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+
+cimport cython
+import numpy as np
+cimport numpy as np
+
+cdef inline np.float32_t max(np.float32_t a, np.float32_t b) nogil:
+    return a if a >= b else b
+
+cdef inline np.float32_t min(np.float32_t a, np.float32_t b) nogil:
+    return a if a <= b else b
+
+@cython.boundscheck(False)
+@cython.cdivision(True)
+@cython.wraparound(False)
+def nms(np.ndarray[np.float32_t, ndim=2] dets, np.float32_t thresh):
+    cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
+    cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
+    cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
+    cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
+    cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
+
+    cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+    cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
+
+    cdef int ndets = dets.shape[0]
+    cdef np.ndarray[np.int_t, ndim=1] suppressed = \
+            np.zeros((ndets), dtype=np.int)
+
+    # nominal indices
+    cdef int _i, _j
+    # sorted indices
+    cdef int i, j
+    # temp variables for box i's (the box currently under consideration)
+    cdef np.float32_t ix1, iy1, ix2, iy2, iarea
+    # variables for computing overlap with box j (lower scoring box)
+    cdef np.float32_t xx1, yy1, xx2, yy2
+    cdef np.float32_t w, h
+    cdef np.float32_t inter, ovr
+
+    with nogil:
+      for _i in range(ndets):
+          i = order[_i]
+          if suppressed[i] == 1:
+              continue
+          ix1 = x1[i]
+          iy1 = y1[i]
+          ix2 = x2[i]
+          iy2 = y2[i]
+          iarea = areas[i]
+          for _j in range(_i + 1, ndets):
+              j = order[_j]
+              if suppressed[j] == 1:
+                  continue
+              xx1 = max(ix1, x1[j])
+              yy1 = max(iy1, y1[j])
+              xx2 = min(ix2, x2[j])
+              yy2 = min(iy2, y2[j])
+              w = max(0.0, xx2 - xx1 + 1)
+              h = max(0.0, yy2 - yy1 + 1)
+              inter = w * h
+              ovr = inter / (iarea + areas[j] - inter)
+              if ovr >= thresh:
+                  suppressed[j] = 1
+
+    return np.where(suppressed == 0)[0]
+
+# ----------------------------------------------------------
+# Soft-NMS: Improving Object Detection With One Line of Code
+# Copyright (c) University of Maryland, College Park
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Navaneeth Bodla and Bharat Singh
+# ----------------------------------------------------------
+@cython.boundscheck(False)
+@cython.cdivision(True)
+@cython.wraparound(False)
+def soft_nms(
+    np.ndarray[float, ndim=2] boxes_in,
+    float sigma=0.5,
+    float Nt=0.3,
+    float threshold=0.001,
+    unsigned int method=0
+):
+    boxes = boxes_in.copy()
+    cdef unsigned int N = boxes.shape[0]
+    cdef float iw, ih, box_area
+    cdef float ua
+    cdef int pos = 0
+    cdef float maxscore = 0
+    cdef int maxpos = 0
+    cdef float x1, x2, y1, y2, tx1, tx2, ty1, ty2, ts, area, weight, ov
+    inds = np.arange(N)
+
+    for i in range(N):
+        maxscore = boxes[i, 4]
+        maxpos = i
+
+        tx1 = boxes[i,0]
+        ty1 = boxes[i,1]
+        tx2 = boxes[i,2]
+        ty2 = boxes[i,3]
+        ts = boxes[i,4]
+        ti = inds[i]
+
+        pos = i + 1
+        # get max box
+        while pos < N:
+            if maxscore < boxes[pos, 4]:
+                maxscore = boxes[pos, 4]
+                maxpos = pos
+            pos = pos + 1
+
+        # add max box as a detection
+        boxes[i,0] = boxes[maxpos,0]
+        boxes[i,1] = boxes[maxpos,1]
+        boxes[i,2] = boxes[maxpos,2]
+        boxes[i,3] = boxes[maxpos,3]
+        boxes[i,4] = boxes[maxpos,4]
+        inds[i] = inds[maxpos]
+
+        # swap ith box with position of max box
+        boxes[maxpos,0] = tx1
+        boxes[maxpos,1] = ty1
+        boxes[maxpos,2] = tx2
+        boxes[maxpos,3] = ty2
+        boxes[maxpos,4] = ts
+        inds[maxpos] = ti
+
+        tx1 = boxes[i,0]
+        ty1 = boxes[i,1]
+        tx2 = boxes[i,2]
+        ty2 = boxes[i,3]
+        ts = boxes[i,4]
+
+        pos = i + 1
+        # NMS iterations, note that N changes if detection boxes fall below
+        # threshold
+        while pos < N:
+            x1 = boxes[pos, 0]
+            y1 = boxes[pos, 1]
+            x2 = boxes[pos, 2]
+            y2 = boxes[pos, 3]
+            s = boxes[pos, 4]
+
+            area = (x2 - x1 + 1) * (y2 - y1 + 1)
+            iw = (min(tx2, x2) - max(tx1, x1) + 1)
+            if iw > 0:
+                ih = (min(ty2, y2) - max(ty1, y1) + 1)
+                if ih > 0:
+                    ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
+                    ov = iw * ih / ua #iou between max box and detection box
+
+                    if method == 1: # linear
+                        if ov > Nt:
+                            weight = 1 - ov
+                        else:
+                            weight = 1
+                    elif method == 2: # gaussian
+                        weight = np.exp(-(ov * ov)/sigma)
+                    else: # original NMS
+                        if ov > Nt:
+                            weight = 0
+                        else:
+                            weight = 1
+
+                    boxes[pos, 4] = weight*boxes[pos, 4]
+
+                    # if box score falls below threshold, discard the box by
+                    # swapping with last box update N
+                    if boxes[pos, 4] < threshold:
+                        boxes[pos,0] = boxes[N-1, 0]
+                        boxes[pos,1] = boxes[N-1, 1]
+                        boxes[pos,2] = boxes[N-1, 2]
+                        boxes[pos,3] = boxes[N-1, 3]
+                        boxes[pos,4] = boxes[N-1, 4]
+                        inds[pos] = inds[N-1]
+                        N = N - 1
+                        pos = pos - 1
+
+            pos = pos + 1
+
+    return boxes[:N], inds[:N]
diff --git a/detectron/utils/env.py b/detectron/utils/env.py
new file mode 100644
index 0000000000000000000000000000000000000000..128162fcf6929a3f40eae577dcb8560c31e29a2b
--- /dev/null
+++ b/detectron/utils/env.py
@@ -0,0 +1,91 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Environment helper functions."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import os
+import sys
+import yaml
+
+# Default value of the CMake install prefix
+_CMAKE_INSTALL_PREFIX = '/usr/local'
+# Detectron ops lib
+_DETECTRON_OPS_LIB = 'libcaffe2_detectron_ops_gpu.so'
+
+
+def get_runtime_dir():
+    """Retrieve the path to the runtime directory."""
+    return sys.path[0]
+
+
+def get_py_bin_ext():
+    """Retrieve python binary extension."""
+    return '.py'
+
+
+def set_up_matplotlib():
+    """Set matplotlib up."""
+    import matplotlib
+    # Use a non-interactive backend
+    matplotlib.use('Agg')
+
+
+def exit_on_error():
+    """Exit from a detectron tool when there's an error."""
+    sys.exit(1)
+
+
+def import_nccl_ops():
+    """Import NCCL ops."""
+    # There is no need to load NCCL ops since the
+    # NCCL dependency is built into the Caffe2 gpu lib
+    pass
+
+
+def get_detectron_ops_lib():
+    """Retrieve Detectron ops library."""
+    # Candidate prefixes for detectron ops lib path
+    prefixes = [_CMAKE_INSTALL_PREFIX, sys.prefix, sys.exec_prefix] + sys.path
+    # Candidate subdirs for detectron ops lib
+    subdirs = ['lib', 'torch/lib']
+    # Try to find detectron ops lib
+    for prefix in prefixes:
+        for subdir in subdirs:
+            ops_path = os.path.join(prefix, subdir, _DETECTRON_OPS_LIB)
+            if os.path.exists(ops_path):
+                print('Found Detectron ops lib: {}'.format(ops_path))
+                return ops_path
+    raise Exception('Detectron ops lib not found')
+
+
+def get_custom_ops_lib():
+    """Retrieve custom ops library."""
+    det_dir, _ = os.path.split(os.path.dirname(__file__))
+    root_dir, _ = os.path.split(det_dir)
+    custom_ops_lib = os.path.join(
+        root_dir, 'build/libcaffe2_detectron_custom_ops_gpu.so')
+    assert os.path.exists(custom_ops_lib), \
+        'Custom ops lib not found at \'{}\''.format(custom_ops_lib)
+    return custom_ops_lib
+
+
+# YAML load/dump function aliases
+yaml_load = yaml.load
+yaml_dump = yaml.dump
diff --git a/detectron/utils/image.py b/detectron/utils/image.py
new file mode 100644
index 0000000000000000000000000000000000000000..f7a5d3652075da44a377be4243d5885732e7ab0f
--- /dev/null
+++ b/detectron/utils/image.py
@@ -0,0 +1,45 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Image helper functions."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import cv2
+import numpy as np
+
+
+def aspect_ratio_rel(im, aspect_ratio):
+    """Performs width-relative aspect ratio transformation."""
+    im_h, im_w = im.shape[:2]
+    im_ar_w = int(round(aspect_ratio * im_w))
+    im_ar = cv2.resize(im, dsize=(im_ar_w, im_h))
+    return im_ar
+
+
+def aspect_ratio_abs(im, aspect_ratio):
+    """Performs absolute aspect ratio transformation."""
+    im_h, im_w = im.shape[:2]
+    im_area = im_h * im_w
+
+    im_ar_w = np.sqrt(im_area * aspect_ratio)
+    im_ar_h = np.sqrt(im_area / aspect_ratio)
+    assert np.isclose(im_ar_w / im_ar_h, aspect_ratio)
+
+    im_ar = cv2.resize(im, dsize=(int(im_ar_w), int(im_ar_h)))
+    return im_ar
diff --git a/detectron/utils/io.py b/detectron/utils/io.py
new file mode 100644
index 0000000000000000000000000000000000000000..4501b0a3f10ccf92a4e31d8fea1ac5b1bb4c680c
--- /dev/null
+++ b/detectron/utils/io.py
@@ -0,0 +1,192 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""IO utilities."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import errno
+import hashlib
+import logging
+import os
+import re
+import six
+import sys
+from six.moves import cPickle as pickle
+from six.moves import urllib
+from uuid import uuid4
+
+logger = logging.getLogger(__name__)
+
+_DETECTRON_S3_BASE_URL = 'https://dl.fbaipublicfiles.com/detectron'
+
+
+def save_object(obj, file_name, pickle_format=2):
+    """Save a Python object by pickling it.
+
+Unless specifically overridden, we want to save it in Pickle format=2 since this
+will allow other Python2 executables to load the resulting Pickle. When we want
+to completely remove Python2 backward-compatibility, we can bump it up to 3. We
+should never use pickle.HIGHEST_PROTOCOL as far as possible if the resulting
+file is manifested or used, external to the system.
+    """
+    file_name = os.path.abspath(file_name)
+    # Avoid filesystem race conditions (particularly on network filesystems)
+    # by saving to a random tmp file on the same filesystem, and then
+    # atomically rename to the target filename.
+    tmp_file_name = file_name + ".tmp." + uuid4().hex
+    try:
+        with open(tmp_file_name, 'wb') as f:
+            pickle.dump(obj, f, pickle_format)
+            f.flush()  # make sure it's written to disk
+            os.fsync(f.fileno())
+        os.rename(tmp_file_name, file_name)
+    finally:
+        # Clean up the temp file on failure. Rather than using os.path.exists(),
+        # which can be unreliable on network filesystems, attempt to delete and
+        # ignore os errors.
+        try:
+            os.remove(tmp_file_name)
+        except EnvironmentError as e:  # parent class of IOError, OSError
+            if getattr(e, 'errno', None) != errno.ENOENT:  # We expect ENOENT
+                logger.info("Could not delete temp file %r",
+                    tmp_file_name, exc_info=True)
+                # pass through since we don't want the job to crash
+
+
+def load_object(file_name):
+    with open(file_name, 'rb') as f:
+        # The default encoding used while unpickling is 7-bit (ASCII.) However,
+        # the blobs are arbitrary 8-bit bytes which don't agree. The absolute
+        # correct way to do this is to use `encoding="bytes"` and then interpret
+        # the blob names either as ASCII, or better, as unicode utf-8. A
+        # reasonable fix, however, is to treat it the encoding as 8-bit latin1
+        # (which agrees with the first 256 characters of Unicode anyway.)
+        if six.PY2:
+            return pickle.load(f)
+        else:
+            return pickle.load(f, encoding='latin1')
+
+
+def cache_url(url_or_file, cache_dir):
+    """Download the file specified by the URL to the cache_dir and return the
+    path to the cached file. If the argument is not a URL, simply return it as
+    is.
+    """
+    is_url = re.match(
+        r'^(?:http)s?://', url_or_file, re.IGNORECASE
+    ) is not None
+
+    if not is_url:
+        return url_or_file
+
+    url = url_or_file
+    assert url.startswith(_DETECTRON_S3_BASE_URL), \
+        ('Detectron only automatically caches URLs in the Detectron S3 '
+         'bucket: {}').format(_DETECTRON_S3_BASE_URL)
+
+    cache_file_path = url.replace(_DETECTRON_S3_BASE_URL, cache_dir)
+    if os.path.exists(cache_file_path):
+        assert_cache_file_is_ok(url, cache_file_path)
+        return cache_file_path
+
+    cache_file_dir = os.path.dirname(cache_file_path)
+    if not os.path.exists(cache_file_dir):
+        os.makedirs(cache_file_dir)
+
+    logger.info('Downloading remote file {} to {}'.format(url, cache_file_path))
+    download_url(url, cache_file_path)
+    assert_cache_file_is_ok(url, cache_file_path)
+    return cache_file_path
+
+
+def assert_cache_file_is_ok(url, file_path):
+    """Check that cache file has the correct hash."""
+    # File is already in the cache, verify that the md5sum matches and
+    # return local path
+    cache_file_md5sum = _get_file_md5sum(file_path)
+    ref_md5sum = _get_reference_md5sum(url)
+    assert cache_file_md5sum == ref_md5sum, \
+        ('Target URL {} appears to be downloaded to the local cache file '
+         '{}, but the md5 hash of the local file does not match the '
+         'reference (actual: {} vs. expected: {}). You may wish to delete '
+         'the cached file and try again to trigger automatic '
+         'download.').format(url, file_path, cache_file_md5sum, ref_md5sum)
+
+
+def _progress_bar(count, total):
+    """Report download progress.
+    Credit:
+    https://stackoverflow.com/questions/3173320/text-progress-bar-in-the-console/27871113
+    """
+    bar_len = 60
+    filled_len = int(round(bar_len * count / float(total)))
+
+    percents = round(100.0 * count / float(total), 1)
+    bar = '=' * filled_len + '-' * (bar_len - filled_len)
+
+    sys.stdout.write(
+        '  [{}] {}% of {:.1f}MB file  \r'.
+        format(bar, percents, total / 1024 / 1024)
+    )
+    sys.stdout.flush()
+    if count >= total:
+        sys.stdout.write('\n')
+
+
+def download_url(
+    url, dst_file_path, chunk_size=8192, progress_hook=_progress_bar
+):
+    """Download url and write it to dst_file_path.
+    Credit:
+    https://stackoverflow.com/questions/2028517/python-urllib2-progress-hook
+    """
+    response = urllib.request.urlopen(url)
+    if six.PY2:
+        total_size = response.info().getheader('Content-Length').strip()
+    else:
+        total_size = response.info().get('Content-Length').strip()
+    total_size = int(total_size)
+    bytes_so_far = 0
+
+    with open(dst_file_path, 'wb') as f:
+        while 1:
+            chunk = response.read(chunk_size)
+            bytes_so_far += len(chunk)
+            if not chunk:
+                break
+            if progress_hook:
+                progress_hook(bytes_so_far, total_size)
+            f.write(chunk)
+
+    return bytes_so_far
+
+
+def _get_file_md5sum(file_name):
+    """Compute the md5 hash of a file."""
+    hash_obj = hashlib.md5()
+    with open(file_name, 'rb') as f:
+        hash_obj.update(f.read())
+    return hash_obj.hexdigest().encode('utf-8')
+
+
+def _get_reference_md5sum(url):
+    """By convention the md5 hash for url is stored in url + '.md5sum'."""
+    url_md5sum = url + '.md5sum'
+    md5sum = urllib.request.urlopen(url_md5sum).read().strip()
+    return md5sum
diff --git a/detectron/utils/keypoints.py b/detectron/utils/keypoints.py
new file mode 100644
index 0000000000000000000000000000000000000000..b305cea2d6d4e527da226645d44cf34328f02cc6
--- /dev/null
+++ b/detectron/utils/keypoints.py
@@ -0,0 +1,266 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Keypoint utilities (somewhat specific to COCO keypoints)."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import cv2
+import numpy as np
+
+from detectron.core.config import cfg
+import detectron.utils.blob as blob_utils
+
+
+def get_keypoints():
+    """Get the COCO keypoints and their left/right flip coorespondence map."""
+    # Keypoints are not available in the COCO json for the test split, so we
+    # provide them here.
+    keypoints = [
+        'nose',
+        'left_eye',
+        'right_eye',
+        'left_ear',
+        'right_ear',
+        'left_shoulder',
+        'right_shoulder',
+        'left_elbow',
+        'right_elbow',
+        'left_wrist',
+        'right_wrist',
+        'left_hip',
+        'right_hip',
+        'left_knee',
+        'right_knee',
+        'left_ankle',
+        'right_ankle'
+    ]
+    keypoint_flip_map = {
+        'left_eye': 'right_eye',
+        'left_ear': 'right_ear',
+        'left_shoulder': 'right_shoulder',
+        'left_elbow': 'right_elbow',
+        'left_wrist': 'right_wrist',
+        'left_hip': 'right_hip',
+        'left_knee': 'right_knee',
+        'left_ankle': 'right_ankle'
+    }
+    return keypoints, keypoint_flip_map
+
+
+def get_person_class_index():
+    """Index of the person class in COCO."""
+    return 1
+
+
+def flip_keypoints(keypoints, keypoint_flip_map, keypoint_coords, width):
+    """Left/right flip keypoint_coords. keypoints and keypoint_flip_map are
+    accessible from get_keypoints().
+    """
+    flipped_kps = keypoint_coords.copy()
+    for lkp, rkp in keypoint_flip_map.items():
+        lid = keypoints.index(lkp)
+        rid = keypoints.index(rkp)
+        flipped_kps[:, :, lid] = keypoint_coords[:, :, rid]
+        flipped_kps[:, :, rid] = keypoint_coords[:, :, lid]
+
+    # Flip x coordinates
+    flipped_kps[:, 0, :] = width - flipped_kps[:, 0, :] - 1
+    # Maintain COCO convention that if visibility == 0, then x, y = 0
+    inds = np.where(flipped_kps[:, 2, :] == 0)
+    flipped_kps[inds[0], 0, inds[1]] = 0
+    return flipped_kps
+
+
+def flip_heatmaps(heatmaps):
+    """Flip heatmaps horizontally."""
+    keypoints, flip_map = get_keypoints()
+    heatmaps_flipped = heatmaps.copy()
+    for lkp, rkp in flip_map.items():
+        lid = keypoints.index(lkp)
+        rid = keypoints.index(rkp)
+        heatmaps_flipped[:, rid, :, :] = heatmaps[:, lid, :, :]
+        heatmaps_flipped[:, lid, :, :] = heatmaps[:, rid, :, :]
+    heatmaps_flipped = heatmaps_flipped[:, :, :, ::-1]
+    return heatmaps_flipped
+
+
+def heatmaps_to_keypoints(maps, rois):
+    """Extract predicted keypoint locations from heatmaps. Output has shape
+    (#rois, 4, #keypoints) with the 4 rows corresponding to (x, y, logit, prob)
+    for each keypoint.
+    """
+    # This function converts a discrete image coordinate in a HEATMAP_SIZE x
+    # HEATMAP_SIZE image to a continuous keypoint coordinate. We maintain
+    # consistency with keypoints_to_heatmap_labels by using the conversion from
+    # Heckbert 1990: c = d + 0.5, where d is a discrete coordinate and c is a
+    # continuous coordinate.
+    offset_x = rois[:, 0]
+    offset_y = rois[:, 1]
+
+    widths = rois[:, 2] - rois[:, 0]
+    heights = rois[:, 3] - rois[:, 1]
+    widths = np.maximum(widths, 1)
+    heights = np.maximum(heights, 1)
+    widths_ceil = np.ceil(widths)
+    heights_ceil = np.ceil(heights)
+
+    # NCHW to NHWC for use with OpenCV
+    maps = np.transpose(maps, [0, 2, 3, 1])
+    min_size = cfg.KRCNN.INFERENCE_MIN_SIZE
+    xy_preds = np.zeros(
+        (len(rois), 4, cfg.KRCNN.NUM_KEYPOINTS), dtype=np.float32)
+    for i in range(len(rois)):
+        if min_size > 0:
+            roi_map_width = int(np.maximum(widths_ceil[i], min_size))
+            roi_map_height = int(np.maximum(heights_ceil[i], min_size))
+        else:
+            roi_map_width = widths_ceil[i]
+            roi_map_height = heights_ceil[i]
+        width_correction = widths[i] / roi_map_width
+        height_correction = heights[i] / roi_map_height
+        roi_map = cv2.resize(
+            maps[i], (roi_map_width, roi_map_height),
+            interpolation=cv2.INTER_CUBIC)
+        # Bring back to CHW
+        roi_map = np.transpose(roi_map, [2, 0, 1])
+        roi_map_probs = scores_to_probs(roi_map.copy())
+        w = roi_map.shape[2]
+        for k in range(cfg.KRCNN.NUM_KEYPOINTS):
+            pos = roi_map[k, :, :].argmax()
+            x_int = pos % w
+            y_int = (pos - x_int) // w
+            assert (roi_map_probs[k, y_int, x_int] ==
+                    roi_map_probs[k, :, :].max())
+            x = (x_int + 0.5) * width_correction
+            y = (y_int + 0.5) * height_correction
+            xy_preds[i, 0, k] = x + offset_x[i]
+            xy_preds[i, 1, k] = y + offset_y[i]
+            xy_preds[i, 2, k] = roi_map[k, y_int, x_int]
+            xy_preds[i, 3, k] = roi_map_probs[k, y_int, x_int]
+
+    return xy_preds
+
+
+def keypoints_to_heatmap_labels(keypoints, rois):
+    """Encode keypoint location in the target heatmap for use in
+    SoftmaxWithLoss.
+    """
+    # Maps keypoints from the half-open interval [x1, x2) on continuous image
+    # coordinates to the closed interval [0, HEATMAP_SIZE - 1] on discrete image
+    # coordinates. We use the continuous <-> discrete conversion from Heckbert
+    # 1990 ("What is the coordinate of a pixel?"): d = floor(c) and c = d + 0.5,
+    # where d is a discrete coordinate and c is a continuous coordinate.
+    assert keypoints.shape[2] == cfg.KRCNN.NUM_KEYPOINTS
+
+    shape = (len(rois), cfg.KRCNN.NUM_KEYPOINTS)
+    heatmaps = blob_utils.zeros(shape)
+    weights = blob_utils.zeros(shape)
+
+    offset_x = rois[:, 0]
+    offset_y = rois[:, 1]
+    scale_x = cfg.KRCNN.HEATMAP_SIZE / (rois[:, 2] - rois[:, 0])
+    scale_y = cfg.KRCNN.HEATMAP_SIZE / (rois[:, 3] - rois[:, 1])
+
+    for kp in range(keypoints.shape[2]):
+        vis = keypoints[:, 2, kp] > 0
+        x = keypoints[:, 0, kp].astype(np.float32)
+        y = keypoints[:, 1, kp].astype(np.float32)
+        # Since we use floor below, if a keypoint is exactly on the roi's right
+        # or bottom boundary, we shift it in by eps (conceptually) to keep it in
+        # the ground truth heatmap.
+        x_boundary_inds = np.where(x == rois[:, 2])[0]
+        y_boundary_inds = np.where(y == rois[:, 3])[0]
+        x = (x - offset_x) * scale_x
+        x = np.floor(x)
+        if len(x_boundary_inds) > 0:
+            x[x_boundary_inds] = cfg.KRCNN.HEATMAP_SIZE - 1
+
+        y = (y - offset_y) * scale_y
+        y = np.floor(y)
+        if len(y_boundary_inds) > 0:
+            y[y_boundary_inds] = cfg.KRCNN.HEATMAP_SIZE - 1
+
+        valid_loc = np.logical_and(
+            np.logical_and(x >= 0, y >= 0),
+            np.logical_and(
+                x < cfg.KRCNN.HEATMAP_SIZE, y < cfg.KRCNN.HEATMAP_SIZE))
+
+        valid = np.logical_and(valid_loc, vis)
+        valid = valid.astype(np.int32)
+
+        lin_ind = y * cfg.KRCNN.HEATMAP_SIZE + x
+        heatmaps[:, kp] = lin_ind * valid
+        weights[:, kp] = valid
+
+    return heatmaps, weights
+
+
+def scores_to_probs(scores):
+    """Transforms CxHxW of scores to probabilities spatially."""
+    channels = scores.shape[0]
+    for c in range(channels):
+        temp = scores[c, :, :]
+        max_score = temp.max()
+        temp = np.exp(temp - max_score) / np.sum(np.exp(temp - max_score))
+        scores[c, :, :] = temp
+    return scores
+
+
+def nms_oks(kp_predictions, rois, thresh):
+    """Nms based on kp predictions."""
+    scores = np.mean(kp_predictions[:, 2, :], axis=1)
+    order = scores.argsort()[::-1]
+
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+        ovr = compute_oks(
+            kp_predictions[i], rois[i], kp_predictions[order[1:]],
+            rois[order[1:]])
+        inds = np.where(ovr <= thresh)[0]
+        order = order[inds + 1]
+
+    return keep
+
+
+def compute_oks(src_keypoints, src_roi, dst_keypoints, dst_roi):
+    """Compute OKS for predicted keypoints wrt gt_keypoints.
+    src_keypoints: 4xK
+    src_roi: 4x1
+    dst_keypoints: Nx4xK
+    dst_roi: Nx4
+    """
+
+    sigmas = np.array([
+        .26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87,
+        .87, .89, .89]) / 10.0
+    vars = (sigmas * 2)**2
+
+    # area
+    src_area = (src_roi[2] - src_roi[0] + 1) * (src_roi[3] - src_roi[1] + 1)
+
+    # measure the per-keypoint distance if keypoints visible
+    dx = dst_keypoints[:, 0, :] - src_keypoints[0, :]
+    dy = dst_keypoints[:, 1, :] - src_keypoints[1, :]
+
+    e = (dx**2 + dy**2) / vars / (src_area + np.spacing(1)) / 2
+    e = np.sum(np.exp(-e), axis=1) / e.shape[1]
+
+    return e
diff --git a/detectron/utils/logging.py b/detectron/utils/logging.py
new file mode 100644
index 0000000000000000000000000000000000000000..eba6dfb84c30cb3c3c01850d7efc4af72f30447c
--- /dev/null
+++ b/detectron/utils/logging.py
@@ -0,0 +1,83 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Utilities for logging."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from collections import deque
+from email.mime.text import MIMEText
+import json
+import logging
+import numpy as np
+import smtplib
+import sys
+
+
+def log_json_stats(stats, sort_keys=True):
+    # hack to control precision of top-level floats
+    stats = {
+        k: '{:.6f}'.format(v) if isinstance(v, float) else v
+        for k, v in stats.items()
+    }
+    print('json_stats: {:s}'.format(json.dumps(stats, sort_keys=sort_keys)))
+
+
+class SmoothedValue(object):
+    """Track a series of values and provide access to smoothed values over a
+    window or the global series average.
+    """
+
+    def __init__(self, window_size):
+        self.deque = deque(maxlen=window_size)
+        self.series = []
+        self.total = 0.0
+        self.count = 0
+
+    def AddValue(self, value):
+        self.deque.append(value)
+        self.series.append(value)
+        self.count += 1
+        self.total += value
+
+    def GetMedianValue(self):
+        return np.median(self.deque)
+
+    def GetAverageValue(self):
+        return np.mean(self.deque)
+
+    def GetGlobalAverageValue(self):
+        return self.total / self.count
+
+
+def send_email(subject, body, to):
+    s = smtplib.SMTP('localhost')
+    mime = MIMEText(body)
+    mime['Subject'] = subject
+    mime['To'] = to
+    s.sendmail('detectron', to, mime.as_string())
+
+
+def setup_logging(name):
+    FORMAT = '%(levelname)s %(filename)s:%(lineno)4d: %(message)s'
+    # Manually clear root loggers to prevent any module that may have called
+    # logging.basicConfig() from blocking our logging setup
+    logging.root.handlers = []
+    logging.basicConfig(level=logging.INFO, format=FORMAT, stream=sys.stdout)
+    logger = logging.getLogger(name)
+    return logger
diff --git a/detectron/utils/lr_policy.py b/detectron/utils/lr_policy.py
new file mode 100644
index 0000000000000000000000000000000000000000..92391b18e179b915c2b8e72b165cf422c1235c04
--- /dev/null
+++ b/detectron/utils/lr_policy.py
@@ -0,0 +1,131 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Learning rate policies."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import numpy as np
+
+from detectron.core.config import cfg
+
+
+def get_lr_at_iter(it):
+    """Get the learning rate at iteration it according to the cfg.SOLVER
+    settings.
+    """
+    lr = get_lr_func()(it)
+    if it < cfg.SOLVER.WARM_UP_ITERS:
+        method = cfg.SOLVER.WARM_UP_METHOD
+        if method == 'constant':
+            warmup_factor = cfg.SOLVER.WARM_UP_FACTOR
+        elif method == 'linear':
+            alpha = it / cfg.SOLVER.WARM_UP_ITERS
+            warmup_factor = cfg.SOLVER.WARM_UP_FACTOR * (1 - alpha) + alpha
+        else:
+            raise KeyError('Unknown SOLVER.WARM_UP_METHOD: {}'.format(method))
+        lr *= warmup_factor
+    return np.float32(lr)
+
+
+# ---------------------------------------------------------------------------- #
+# Learning rate policy functions
+# ---------------------------------------------------------------------------- #
+
+def lr_func_steps_with_lrs(cur_iter):
+    """For cfg.SOLVER.LR_POLICY = 'steps_with_lrs'
+
+    Change the learning rate to specified values at specified iterations.
+
+    Example:
+    cfg.SOLVER.MAX_ITER: 90
+    cfg.SOLVER.STEPS:    [0,    60,    80]
+    cfg.SOLVER.LRS:      [0.02, 0.002, 0.0002]
+    for cur_iter in [0, 59]   use 0.02
+                 in [60, 79]  use 0.002
+                 in [80, inf] use 0.0002
+    """
+    ind = get_step_index(cur_iter)
+    return cfg.SOLVER.LRS[ind]
+
+
+def lr_func_steps_with_decay(cur_iter):
+    """For cfg.SOLVER.LR_POLICY = 'steps_with_decay'
+
+    Change the learning rate specified iterations based on the formula
+    lr = base_lr * gamma ** lr_step_count.
+
+    Example:
+    cfg.SOLVER.MAX_ITER: 90
+    cfg.SOLVER.STEPS:    [0,    60,    80]
+    cfg.SOLVER.BASE_LR:  0.02
+    cfg.SOLVER.GAMMA:    0.1
+    for cur_iter in [0, 59]   use 0.02 = 0.02 * 0.1 ** 0
+                 in [60, 79]  use 0.002 = 0.02 * 0.1 ** 1
+                 in [80, inf] use 0.0002 = 0.02 * 0.1 ** 2
+    """
+    ind = get_step_index(cur_iter)
+    return cfg.SOLVER.BASE_LR * cfg.SOLVER.GAMMA ** ind
+
+
+def lr_func_step(cur_iter):
+    """For cfg.SOLVER.LR_POLICY = 'step'
+    """
+    return (
+        cfg.SOLVER.BASE_LR *
+        cfg.SOLVER.GAMMA ** (cur_iter // cfg.SOLVER.STEP_SIZE))
+
+
+def lr_func_cosine_decay(cur_iter):
+    """For cfg.SOLVER.LR_POLICY = 'cosine_decay'
+    """
+    iter_frac = float(cur_iter) / cfg.SOLVER.MAX_ITER
+    cos_frac = 0.5 * (np.cos(np.pi * iter_frac) + 1)
+    return cfg.SOLVER.BASE_LR * cos_frac
+
+
+def lr_func_exp_decay(cur_iter):
+    """For cfg.SOLVER.LR_POLICY = 'exp_decay'
+    """
+    # GAMMA is final/initial learning rate ratio
+    iter_frac = float(cur_iter) / cfg.SOLVER.MAX_ITER
+    exp_frac = np.exp(iter_frac * np.log(cfg.SOLVER.GAMMA))
+    return cfg.SOLVER.BASE_LR * exp_frac
+
+
+# ---------------------------------------------------------------------------- #
+# Helpers
+# ---------------------------------------------------------------------------- #
+
+def get_step_index(cur_iter):
+    """Given an iteration, find which learning rate step we're at."""
+    assert cfg.SOLVER.STEPS[0] == 0, 'The first step should always start at 0.'
+    steps = cfg.SOLVER.STEPS + [cfg.SOLVER.MAX_ITER]
+    for ind, step in enumerate(steps):  # NoQA
+        if cur_iter < step:
+            break
+    return ind - 1
+
+
+def get_lr_func():
+    policy = 'lr_func_' + cfg.SOLVER.LR_POLICY
+    if policy not in globals():
+        raise NotImplementedError(
+            'Unknown LR policy: {}'.format(cfg.SOLVER.LR_POLICY))
+    else:
+        return globals()[policy]
diff --git a/detectron/utils/model_convert_utils.py b/detectron/utils/model_convert_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c2b2738dcf9161fa943f45ec195b93cb380c92c
--- /dev/null
+++ b/detectron/utils/model_convert_utils.py
@@ -0,0 +1,406 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+'''Helper functions for model conversion to pb'''
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from functools import wraps
+import copy
+import numpy as np
+
+from caffe2.python import core, workspace
+from caffe2.proto import caffe2_pb2
+
+
+class OpFilter(object):
+    def __init__(self, **kwargs):
+        self.type = None
+        self.type_in = None
+        self.inputs = None
+        self.outputs = None
+        self.input_has = None
+        self.output_has = None
+        self.cond = None
+        self.reverse = False
+
+        assert all([x in self.__dict__ for x in kwargs])
+        self.__dict__.update(kwargs)
+
+    def check(self, op):
+        ret = self.reverse
+        if self.type and op.type != self.type:
+            return ret
+        if self.type_in and op.type not in self.type_in:
+            return ret
+        if self.inputs and set(op.input) != set(self.inputs):
+            return ret
+        if self.outputs and set(op.output) != set(self.outputs):
+            return ret
+        if self.input_has and self.input_has not in op.input:
+            return ret
+        if self.output_has and self.output_has not in op.output:
+            return ret
+        if self.cond is not None and not self.cond:
+            return ret
+        return not ret
+
+
+def filter_op(op, **kwargs):
+    ''' Returns true if passed all checks '''
+    return OpFilter(**kwargs).check(op)
+
+
+def op_filter(**filter_args):
+    ''' Returns None if no condition is satisfied '''
+    def actual_decorator(f):
+        @wraps(f)
+        def wrapper(op, **params):
+            if not filter_op(op, **filter_args):
+                return None
+            return f(op, **params)
+        return wrapper
+    return actual_decorator
+
+
+def op_func_chain(convert_func_list):
+    ''' Run funcs one by one until func return is not None '''
+    assert isinstance(convert_func_list, list)
+
+    def _chain(op):
+        for x in convert_func_list:
+            ret = x(op)
+            if ret is not None:
+                return ret
+        return None
+
+    return _chain
+
+
+def convert_op_in_ops(ops_ref, func_or_list):
+    func = func_or_list
+    if isinstance(func_or_list, list):
+        func = op_func_chain(func_or_list)
+    ops = [op for op in ops_ref]
+    converted_ops = []
+    for op in ops:
+        new_ops = func(op)
+        if new_ops is not None and not isinstance(new_ops, list):
+            new_ops = [new_ops]
+        converted_ops.extend(new_ops if new_ops is not None else [op])
+    del ops_ref[:]
+    # ops_ref maybe of type RepeatedCompositeFieldContainer
+    # which does not have append()
+    ops_ref.extend(converted_ops)
+
+
+def convert_op_in_proto(proto, func_or_list):
+    convert_op_in_ops(proto.op, func_or_list)
+
+
+def get_op_arg(op, arg_name):
+    for x in op.arg:
+        if x.name == arg_name:
+            return x
+    return None
+
+
+def get_op_arg_valf(op, arg_name, default_val):
+    arg = get_op_arg(op, arg_name)
+    return arg.f if arg is not None else default_val
+
+
+def update_mobile_engines(net):
+    for op in net.op:
+        if op.type == "Conv":
+            op.engine = "NNPACK"
+        if op.type == "ConvTranspose":
+            op.engine = "BLOCK"
+
+
+def pairwise(iterable):
+    "s -> (s0,s1), (s1,s2), (s2, s3), ..."
+    from itertools import tee
+    a, b = tee(iterable)
+    next(b, None)
+    return zip(a, b)
+
+
+def blob_uses(net, blob):
+    u = []
+    for i, op in enumerate(net.op):
+        if blob in op.input or blob in op.control_input:
+            u.append(i)
+    return u
+
+
+def fuse_first_affine(net, params, removed_tensors):
+    net = copy.deepcopy(net)
+    params = copy.deepcopy(params)
+
+    for ((i, current), (j, next_)) in pairwise(enumerate(net.op)):
+        if next_.input[0] != current.output[0]:
+            continue
+
+        if current.type not in ("Conv", "ConvTranspose") \
+           or next_.type != "AffineChannel":
+            continue
+        if current.output[0] != next_.output[0] and \
+                len(blob_uses(net, current.output[0])) != 1:
+            # Can't fuse if more than one user unless AffineChannel is inplace
+            continue
+
+        # else, can fuse
+        conv = current
+        affine = next_
+        fused_conv = copy.deepcopy(conv)
+        fused_conv.output[0] = affine.output[0]
+        conv_weight = params[conv.input[1]]
+        conv_has_bias = len(conv.input) > 2
+        conv_bias = params[conv.input[2]] if conv_has_bias else 0
+
+        A = params[affine.input[1]]
+        B = params[affine.input[2]]
+
+        # Thus, can just have the affine transform
+        # X * A + B
+        # where
+        # A = bn_scale * 1.0 / (sqrt(running_var + eps))
+        # B =  (bias - running_mean * (1.0 / sqrt(running_var + eps))
+        # * bn_scale)
+
+        # This identify should hold if we have correctly fused
+        # np.testing.assert_array_equal(
+        #     params[conv.output[0]] * A + B,
+        #     params[bn.output[0]])
+
+        # Now, we have that the computation made is the following:
+        # ((X `conv` W) + b) * A + B
+        # Then, we can simply fuse this as follows:
+        # (X `conv` (W * A)) + b * A + B
+        # which is simply
+        # (X `conv` Q) + C
+        # where
+
+        # Q = W * A
+        # C = b * A + B
+
+        # For ConvTranspose, from the view of convolutions as a
+        # Toepeliz multiplication, we have W_ = W^T, so the weights
+        # are laid out as (R, S, K, K) (vs (S, R, K, K) for a Conv),
+        # so the weights broadcast slightly differently. Remember, our
+        # BN scale 'B' is of size (S,)
+
+        A_ = A.reshape(-1, 1, 1, 1) if conv.type == "Conv" else \
+            A.reshape(1, -1, 1, 1)
+
+        C = conv_bias * A + B
+        Q = conv_weight * A_
+
+        assert params[conv.input[1]].shape == Q.shape
+
+        params[conv.input[1]] = Q
+        if conv_has_bias:
+            assert params[conv.input[2]].shape == C.shape
+            params[conv.input[2]] = C
+        else:
+            # make af_bias to be bias of the conv layer
+            fused_conv.input.append(affine.input[2])
+            params[affine.input[2]] = B
+
+        new_ops = net.op[:i] + [fused_conv] + net.op[j + 1:]
+        del net.op[:]
+        if conv_has_bias:
+            del params[affine.input[2]]
+            removed_tensors.append(affine.input[2])
+        removed_tensors.append(affine.input[1])
+        del params[affine.input[1]]
+        net.op.extend(new_ops)
+        break
+    return net, params, removed_tensors
+
+
+def fuse_affine(net, params, ignore_failure):
+    # Run until we hit a fixed point
+    removed_tensors = []
+    while True:
+        (next_net, next_params, removed_tensors) = \
+            fuse_first_affine(net, params, removed_tensors)
+        if len(next_net.op) == len(net.op):
+            if (
+                any(op.type == "AffineChannel" for op in next_net.op) and
+                not ignore_failure
+            ):
+                raise Exception(
+                    "Model contains AffineChannel op after fusion: %s", next_net)
+            return (next_net, next_params, removed_tensors)
+        net, params, removed_tensors = (next_net, next_params, removed_tensors)
+
+
+def fuse_net(fuse_func, net, blobs, ignore_failure=False):
+    is_core_net = isinstance(net, core.Net)
+    if is_core_net:
+        net = net.Proto()
+
+    net, params, removed_tensors = fuse_func(net, blobs, ignore_failure)
+    for rt in removed_tensors:
+        net.external_input.remove(rt)
+
+    if is_core_net:
+        net = core.Net(net)
+
+    return net, params
+
+
+def fuse_net_affine(net, blobs):
+    return fuse_net(fuse_affine, net, blobs)
+
+
+def add_tensor(net, name, blob):
+    ''' Create an operator to store the tensor 'blob',
+        run the operator to put the blob to workspace.
+        uint8 is stored as an array of string with one element.
+    '''
+    kTypeNameMapper = {
+        np.dtype('float32'): "GivenTensorFill",
+        np.dtype('int32'): "GivenTensorIntFill",
+        np.dtype('int64'): "GivenTensorInt64Fill",
+        np.dtype('uint8'): "GivenTensorStringFill",
+    }
+
+    shape = blob.shape
+    values = blob
+    # pass array of uint8 as a string to save storage
+    # storing uint8_t has a large overhead for now
+    if blob.dtype == np.dtype('uint8'):
+        shape = [1]
+        values = [str(blob.data)]
+
+    op = core.CreateOperator(
+        kTypeNameMapper[blob.dtype],
+        [], [name],
+        shape=shape,
+        values=values,
+        # arg=[
+        #     putils.MakeArgument("shape", shape),
+        #     putils.MakeArgument("values", values),
+        # ]
+    )
+    net.op.extend([op])
+
+
+def gen_init_net_from_blobs(blobs, blobs_to_use=None, excluded_blobs=None):
+    ''' Generate an initialization net based on a blob dict '''
+    ret = caffe2_pb2.NetDef()
+    if blobs_to_use is None:
+        blobs_to_use = {x for x in blobs}
+    else:
+        blobs_to_use = copy.deepcopy(blobs_to_use)
+    if excluded_blobs is not None:
+        blobs_to_use = [x for x in blobs_to_use if x not in excluded_blobs]
+    for name in blobs_to_use:
+        blob = blobs[name]
+        if isinstance(blob, str):
+            print('Blob {} with type {} is not supported in generating init net,'
+                  ' skipped.'.format(name, type(blob)))
+            continue
+        add_tensor(ret, name, blob)
+
+    return ret
+
+
+def get_ws_blobs(blob_names=None):
+    ''' Get blobs in 'blob_names' in the default workspace,
+        get all blobs if blob_names is None '''
+    blobs = {}
+    if blob_names is None:
+        blob_names = workspace.Blobs()
+    blobs = {x: workspace.FetchBlob(x) for x in blob_names}
+
+    return blobs
+
+
+def get_device_option_cpu():
+    device_option = core.DeviceOption(caffe2_pb2.CPU)
+    return device_option
+
+
+def get_device_option_cuda(gpu_id=0):
+    device_option = caffe2_pb2.DeviceOption()
+    device_option.device_type = caffe2_pb2.CUDA
+    device_option.device_id = gpu_id
+    return device_option
+
+
+def create_input_blobs_for_net(net_def):
+    for op in net_def.op:
+        for blob_in in op.input:
+            if not workspace.HasBlob(blob_in):
+                workspace.CreateBlob(blob_in)
+
+
+def compare_model(model1_func, model2_func, test_image, check_blobs):
+    ''' model_func(test_image, check_blobs)
+    '''
+    cb1, cb2 = check_blobs, check_blobs
+    if isinstance(check_blobs, dict):
+        cb1 = check_blobs.keys()
+        cb2 = check_blobs.values()
+    print('Running the first model...')
+    res1 = model1_func(test_image, check_blobs)
+    print('Running the second model...')
+    res2 = model2_func(test_image, check_blobs)
+    for idx in range(len(cb1)):
+        print('Checking {} -> {}...'.format(cb1[idx], cb2[idx]))
+        n1, n2 = cb1[idx], cb2[idx]
+        r1 = res1[n1] if n1 in res1 else None
+        r2 = res2[n2] if n2 in res2 else None
+        assert r1 is not None or r2 is None, \
+            "Blob {} in model1 is None".format(n1)
+        assert r2 is not None or r1 is None, \
+            "Blob {} in model2 is None".format(n2)
+        assert r1.shape == r2.shape, \
+            "Blob {} and {} shape mismatched: {} vs {}".format(
+                n1, n2, r1.shape, r2.shape)
+
+        np.testing.assert_array_almost_equal(
+            r1, r2, decimal=3,
+            err_msg='{} and {} not matched. Max diff: {}'.format(
+                n1, n2, np.amax(np.absolute(r1 - r2))))
+
+    return True
+
+
+# graph_name could not contain word 'graph'
+def save_graph(net, file_name, graph_name="net", op_only=True):
+    from caffe2.python import net_drawer
+    graph = None
+    ops = net.op
+    if not op_only:
+        graph = net_drawer.GetPydotGraph(
+            ops, graph_name,
+            rankdir="TB")
+    else:
+        graph = net_drawer.GetPydotGraphMinimal(
+            ops, graph_name,
+            rankdir="TB", minimal_dependency=True)
+
+    try:
+        graph.write_png(file_name)
+    except Exception as e:
+        print('Error when writing graph to image {}'.format(e))
diff --git a/detectron/utils/net.py b/detectron/utils/net.py
new file mode 100644
index 0000000000000000000000000000000000000000..f98f02f4028e988f00d5c9fcc64fa0c1ecc292b7
--- /dev/null
+++ b/detectron/utils/net.py
@@ -0,0 +1,298 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Helper functions for working with Caffe2 networks (i.e., operator graphs)."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from collections import OrderedDict
+import logging
+import numpy as np
+import os
+import pprint
+
+from caffe2.python import core
+from caffe2.python import workspace
+
+from detectron.core.config import cfg
+from detectron.core.config import load_cfg
+from detectron.utils.io import load_object
+from detectron.utils.io import save_object
+import detectron.utils.c2 as c2_utils
+import detectron.utils.env as envu
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+
+def initialize_from_weights_file(model, weights_file, broadcast=True):
+    """Initialize a model from weights stored in a pickled dictionary. If
+    multiple GPUs are used, the loaded weights are synchronized on all GPUs,
+    unless 'broadcast' is False.
+    """
+    initialize_gpu_from_weights_file(model, weights_file, gpu_id=0)
+    if broadcast:
+        broadcast_parameters(model)
+
+
+def initialize_gpu_from_weights_file(model, weights_file, gpu_id=0):
+    """Initialize a network with ops on a specific GPU.
+
+    If you use CUDA_VISIBLE_DEVICES to target specific GPUs, Caffe2 will
+    automatically map logical GPU ids (starting from 0) to the physical GPUs
+    specified in CUDA_VISIBLE_DEVICES.
+    """
+    logger.info('Loading weights from: {}'.format(weights_file))
+    ws_blobs = workspace.Blobs()
+    src_blobs = load_object(weights_file)
+
+    if 'cfg' in src_blobs:
+        saved_cfg = load_cfg(src_blobs['cfg'])
+        configure_bbox_reg_weights(model, saved_cfg)
+    if 'blobs' in src_blobs:
+        # Backwards compat--dictionary used to be only blobs, now they are
+        # stored under the 'blobs' key
+        src_blobs = src_blobs['blobs']
+    # Initialize weights on GPU gpu_id only
+    unscoped_param_names = OrderedDict()  # Print these out in model order
+    for blob in model.params:
+        unscoped_param_names[c2_utils.UnscopeName(str(blob))] = True
+    with c2_utils.NamedCudaScope(gpu_id):
+        for unscoped_param_name in unscoped_param_names.keys():
+            if (unscoped_param_name.find(']_') >= 0 and
+                    unscoped_param_name not in src_blobs):
+                # Special case for sharing initialization from a pretrained
+                # model:
+                # If a blob named '_[xyz]_foo' is in model.params and not in
+                # the initialization blob dictionary, then load source blob
+                # 'foo' into destination blob '_[xyz]_foo'
+                src_name = unscoped_param_name[
+                    unscoped_param_name.find(']_') + 2:]
+            else:
+                src_name = unscoped_param_name
+            if src_name not in src_blobs:
+                logger.info('{:s} not found'.format(src_name))
+                continue
+            dst_name = core.ScopedName(unscoped_param_name)
+            has_momentum = src_name + '_momentum' in src_blobs
+            has_momentum_str = ' [+ momentum]' if has_momentum else ''
+            logger.info(
+                '{:s}{:} loaded from weights file into {:s}: {}'.format(
+                    src_name, has_momentum_str, dst_name, src_blobs[src_name]
+                    .shape
+                )
+            )
+            if dst_name in ws_blobs:
+                # If the blob is already in the workspace, make sure that it
+                # matches the shape of the loaded blob
+                ws_blob = workspace.FetchBlob(dst_name)
+                assert ws_blob.shape == src_blobs[src_name].shape, \
+                    ('Workspace blob {} with shape {} does not match '
+                     'weights file shape {}').format(
+                        src_name,
+                        ws_blob.shape,
+                        src_blobs[src_name].shape)
+            workspace.FeedBlob(
+                dst_name,
+                src_blobs[src_name].astype(np.float32, copy=False))
+            if has_momentum:
+                workspace.FeedBlob(
+                    dst_name + '_momentum',
+                    src_blobs[src_name + '_momentum'].astype(
+                        np.float32, copy=False))
+
+    # We preserve blobs that are in the weights file but not used by the current
+    # model. We load these into CPU memory under the '__preserve__/' namescope.
+    # These blobs will be stored when saving a model to a weights file. This
+    # feature allows for alternating optimization of Faster R-CNN in which blobs
+    # unused by one step can still be preserved forward and used to initialize
+    # another step.
+    for src_name in src_blobs.keys():
+        if (src_name not in unscoped_param_names and
+                not src_name.endswith('_momentum') and
+                src_blobs[src_name] is not None):
+            with c2_utils.CpuScope():
+                workspace.FeedBlob(
+                    '__preserve__/{:s}'.format(src_name), src_blobs[src_name])
+                logger.info(
+                    '{:s} preserved in workspace (unused)'.format(src_name))
+
+
+def save_model_to_weights_file(weights_file, model):
+    """Stash model weights in a dictionary and pickle them to a file. We map
+    GPU device scoped names to unscoped names (e.g., 'gpu_0/conv1_w' ->
+    'conv1_w').
+    """
+    logger.info(
+        'Saving parameters and momentum to {}'.format(
+            os.path.abspath(weights_file)))
+    blobs = {}
+    # Save all parameters
+    for param in model.params:
+        scoped_name = str(param)
+        unscoped_name = c2_utils.UnscopeName(scoped_name)
+        if unscoped_name not in blobs:
+            logger.debug(' {:s} -> {:s}'.format(scoped_name, unscoped_name))
+            blobs[unscoped_name] = workspace.FetchBlob(scoped_name)
+    # Save momentum
+    for param in model.TrainableParams():
+        scoped_name = str(param) + '_momentum'
+        unscoped_name = c2_utils.UnscopeName(scoped_name)
+        if unscoped_name not in blobs:
+            logger.debug(' {:s} -> {:s}'.format(scoped_name, unscoped_name))
+            blobs[unscoped_name] = workspace.FetchBlob(scoped_name)
+    # Save preserved blobs
+    for scoped_name in workspace.Blobs():
+        if scoped_name.startswith('__preserve__/'):
+            unscoped_name = c2_utils.UnscopeName(scoped_name)
+            if unscoped_name not in blobs:
+                logger.debug(
+                    ' {:s} -> {:s} (preserved)'.format(
+                        scoped_name, unscoped_name))
+                blobs[unscoped_name] = workspace.FetchBlob(scoped_name)
+    cfg_yaml = envu.yaml_dump(cfg)
+    save_object(dict(blobs=blobs, cfg=cfg_yaml), weights_file)
+
+
+def broadcast_parameters(model):
+    """Copy parameter blobs from GPU 0 over the corresponding parameter blobs
+    on GPUs 1 through cfg.NUM_GPUS - 1.
+    """
+    if cfg.NUM_GPUS == 1:
+        # no-op if only running on a single GPU
+        return
+
+    def _do_broadcast(all_blobs):
+        assert len(all_blobs) % cfg.NUM_GPUS == 0, \
+            ('Unexpected value for NUM_GPUS. Make sure you are not '
+             'running single-GPU inference with NUM_GPUS > 1.')
+        blobs_per_gpu = int(len(all_blobs) / cfg.NUM_GPUS)
+        for i in range(blobs_per_gpu):
+            blobs = [p for p in all_blobs[i::blobs_per_gpu]]
+            data = workspace.FetchBlob(blobs[0])
+            logger.debug('Broadcasting {} to'.format(str(blobs[0])))
+            for i, p in enumerate(blobs[1:]):
+                logger.debug(' |-> {}'.format(str(p)))
+                with c2_utils.CudaScope(i + 1):
+                    workspace.FeedBlob(p, data)
+
+    _do_broadcast(model.params)
+    _do_broadcast([b + '_momentum' for b in model.TrainableParams()])
+
+
+def sum_multi_gpu_blob(blob_name):
+    """Return the sum of a scalar blob held on multiple GPUs."""
+    val = 0
+    for i in range(cfg.NUM_GPUS):
+        val += float(workspace.FetchBlob('gpu_{}/{}'.format(i, blob_name)))
+    return val
+
+
+def average_multi_gpu_blob(blob_name):
+    """Return the average of a scalar blob held on multiple GPUs."""
+    return sum_multi_gpu_blob(blob_name) / cfg.NUM_GPUS
+
+
+def print_net(model, namescope='gpu_0'):
+    """Print the model network."""
+    logger.info('Printing model: {}'.format(model.net.Name()))
+    op_list = model.net.Proto().op
+    for op in op_list:
+        input_name = op.input
+        # For simplicity: only print the first output
+        # Not recommended if there are split layers
+        output_name = str(op.output[0])
+        op_type = op.type
+        op_name = op.name
+
+        if namescope is None or output_name.startswith(namescope):
+            # Only print the forward pass network
+            if output_name.find('grad') >= 0 or output_name.find('__m') >= 0:
+                continue
+
+            try:
+                # Under some conditions (e.g., dynamic memory optimization)
+                # it is possible that the network frees some blobs when they are
+                # no longer needed. Handle this case...
+                output_shape = workspace.FetchBlob(output_name).shape
+            except BaseException:
+                output_shape = '<unknown>'
+
+            first_blob = True
+            op_label = op_type + (op_name if op_name == '' else ':' + op_name)
+            suffix = ' ------- (op: {})'.format(op_label)
+            for j in range(len(input_name)):
+                if input_name[j] in model.params:
+                    continue
+                input_blob = workspace.FetchBlob(input_name[j])
+                if isinstance(input_blob, np.ndarray):
+                    input_shape = input_blob.shape
+                    logger.info('{:28s}: {:20s} => {:28s}: {:20s}{}'.format(
+                        c2_utils.UnscopeName(str(input_name[j])),
+                        '{}'.format(input_shape),
+                        c2_utils.UnscopeName(str(output_name)),
+                        '{}'.format(output_shape),
+                        suffix))
+                    if first_blob:
+                        first_blob = False
+                        suffix = ' ------|'
+    logger.info('End of model: {}'.format(model.net.Name()))
+
+
+def configure_bbox_reg_weights(model, saved_cfg):
+    """Compatibility for old models trained with bounding box regression
+    mean/std normalization (instead of fixed weights).
+    """
+    if 'MODEL' not in saved_cfg or 'BBOX_REG_WEIGHTS' not in saved_cfg.MODEL:
+        logger.warning('Model from weights file was trained before config key '
+                       'MODEL.BBOX_REG_WEIGHTS was added. Forcing '
+                       'MODEL.BBOX_REG_WEIGHTS = (1., 1., 1., 1.) to ensure '
+                       'correct **inference** behavior.')
+        # Generally we don't allow modifying the config, but this is a one-off
+        # hack to support some very old models
+        is_immutable = cfg.is_immutable()
+        cfg.immutable(False)
+        cfg.MODEL.BBOX_REG_WEIGHTS = (1., 1., 1., 1.)
+        cfg.immutable(is_immutable)
+        logger.info('New config:')
+        logger.info(pprint.pformat(cfg))
+        assert not model.train, (
+            'This model was trained with an older version of the code that '
+            'used bounding box regression mean/std normalization. It can no '
+            'longer be used for training. To upgrade it to a trainable model '
+            'please use fb/compat/convert_bbox_reg_normalized_model.py.'
+        )
+
+
+def get_group_gn(dim):
+    """
+    get number of groups used by GroupNorm, based on number of channels
+    """
+    dim_per_gp = cfg.GROUP_NORM.DIM_PER_GP
+    num_groups = cfg.GROUP_NORM.NUM_GROUPS
+
+    assert dim_per_gp == -1 or num_groups == -1, \
+        "GroupNorm: can only specify G or C/G."
+
+    if dim_per_gp > 0:
+        assert dim % dim_per_gp == 0
+        group_gn = dim // dim_per_gp
+    else:
+        assert dim % num_groups == 0
+        group_gn = num_groups
+    return group_gn
diff --git a/detectron/utils/segms.py b/detectron/utils/segms.py
new file mode 100644
index 0000000000000000000000000000000000000000..4620a3592a5a7c6e3de4e2b4af05ed90ca14a5ec
--- /dev/null
+++ b/detectron/utils/segms.py
@@ -0,0 +1,279 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Functions for interacting with segmentation masks in the COCO format.
+
+The following terms are used in this module
+    mask: a binary mask encoded as a 2D numpy array
+    segm: a segmentation mask in one of the two COCO formats (polygon or RLE)
+    polygon: COCO's polygon format
+    RLE: COCO's run length encoding format
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import numpy as np
+
+import pycocotools.mask as mask_util
+
+# Type used for storing masks in polygon format
+_POLY_TYPE = list
+# Type used for storing masks in RLE format
+_RLE_TYPE = dict
+
+
+def is_poly(segm):
+    """Determine if segm is a polygon. Valid segm expected (polygon or RLE)."""
+    assert isinstance(segm, (_POLY_TYPE, _RLE_TYPE)), \
+        'Invalid segm type: {}'.format(type(segm))
+    return isinstance(segm, _POLY_TYPE)
+
+
+def flip_segms(segms, height, width):
+    """Left/right flip each mask in a list of masks."""
+    def _flip_poly(poly, width):
+        flipped_poly = np.array(poly)
+        flipped_poly[0::2] = width - np.array(poly[0::2]) - 1
+        return flipped_poly.tolist()
+
+    def _flip_rle(rle, height, width):
+        if 'counts' in rle and type(rle['counts']) == list:
+            # Magic RLE format handling painfully discovered by looking at the
+            # COCO API showAnns function.
+            rle = mask_util.frPyObjects([rle], height, width)
+        mask = mask_util.decode(rle)
+        mask = mask[:, ::-1, :]
+        rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
+        return rle
+
+    flipped_segms = []
+    for segm in segms:
+        if is_poly(segm):
+            # Polygon format
+            flipped_segms.append([_flip_poly(poly, width) for poly in segm])
+        else:
+            # RLE format
+            flipped_segms.append(_flip_rle(segm, height, width))
+    return flipped_segms
+
+
+def polys_to_mask(polygons, height, width):
+    """Convert from the COCO polygon segmentation format to a binary mask
+    encoded as a 2D array of data type numpy.float32. The polygon segmentation
+    is understood to be enclosed inside a height x width image. The resulting
+    mask is therefore of shape (height, width).
+    """
+    rle = mask_util.frPyObjects(polygons, height, width)
+    mask = np.array(mask_util.decode(rle), dtype=np.float32)
+    # Flatten in case polygons was a list
+    mask = np.sum(mask, axis=2)
+    mask = np.array(mask > 0, dtype=np.float32)
+    return mask
+
+
+def mask_to_bbox(mask):
+    """Compute the tight bounding box of a binary mask."""
+    xs = np.where(np.sum(mask, axis=0) > 0)[0]
+    ys = np.where(np.sum(mask, axis=1) > 0)[0]
+
+    if len(xs) == 0 or len(ys) == 0:
+        return None
+
+    x0 = xs[0]
+    x1 = xs[-1]
+    y0 = ys[0]
+    y1 = ys[-1]
+    return np.array((x0, y0, x1, y1), dtype=np.float32)
+
+
+def polys_to_mask_wrt_box(polygons, box, M):
+    """Convert from the COCO polygon segmentation format to a binary mask
+    encoded as a 2D array of data type numpy.float32. The polygon segmentation
+    is understood to be enclosed in the given box and rasterized to an M x M
+    mask. The resulting mask is therefore of shape (M, M).
+    """
+    w = box[2] - box[0]
+    h = box[3] - box[1]
+
+    w = np.maximum(w, 1)
+    h = np.maximum(h, 1)
+
+    polygons_norm = []
+    for poly in polygons:
+        p = np.array(poly, dtype=np.float32)
+        p[0::2] = (p[0::2] - box[0]) * M / w
+        p[1::2] = (p[1::2] - box[1]) * M / h
+        polygons_norm.append(p)
+
+    rle = mask_util.frPyObjects(polygons_norm, M, M)
+    mask = np.array(mask_util.decode(rle), dtype=np.float32)
+    # Flatten in case polygons was a list
+    mask = np.sum(mask, axis=2)
+    mask = np.array(mask > 0, dtype=np.float32)
+    return mask
+
+
+def polys_to_boxes(polys):
+    """Convert a list of polygons into an array of tight bounding boxes."""
+    boxes_from_polys = np.zeros((len(polys), 4), dtype=np.float32)
+    for i in range(len(polys)):
+        poly = polys[i]
+        x0 = min(min(p[::2]) for p in poly)
+        x1 = max(max(p[::2]) for p in poly)
+        y0 = min(min(p[1::2]) for p in poly)
+        y1 = max(max(p[1::2]) for p in poly)
+        boxes_from_polys[i, :] = [x0, y0, x1, y1]
+
+    return boxes_from_polys
+
+
+def rle_mask_voting(
+    top_masks, all_masks, all_dets, iou_thresh, binarize_thresh, method='AVG'
+):
+    """Returns new masks (in correspondence with `top_masks`) by combining
+    multiple overlapping masks coming from the pool of `all_masks`. Two methods
+    for combining masks are supported: 'AVG' uses a weighted average of
+    overlapping mask pixels; 'UNION' takes the union of all mask pixels.
+    """
+    if len(top_masks) == 0:
+        return
+
+    all_not_crowd = [False] * len(all_masks)
+    top_to_all_overlaps = mask_util.iou(top_masks, all_masks, all_not_crowd)
+    decoded_all_masks = [
+        np.array(mask_util.decode(rle), dtype=np.float32) for rle in all_masks
+    ]
+    decoded_top_masks = [
+        np.array(mask_util.decode(rle), dtype=np.float32) for rle in top_masks
+    ]
+    all_boxes = all_dets[:, :4].astype(np.int32)
+    all_scores = all_dets[:, 4]
+
+    # Fill box support with weights
+    mask_shape = decoded_all_masks[0].shape
+    mask_weights = np.zeros((len(all_masks), mask_shape[0], mask_shape[1]))
+    for k in range(len(all_masks)):
+        ref_box = all_boxes[k]
+        x_0 = max(ref_box[0], 0)
+        x_1 = min(ref_box[2] + 1, mask_shape[1])
+        y_0 = max(ref_box[1], 0)
+        y_1 = min(ref_box[3] + 1, mask_shape[0])
+        mask_weights[k, y_0:y_1, x_0:x_1] = all_scores[k]
+    mask_weights = np.maximum(mask_weights, 1e-5)
+
+    top_segms_out = []
+    for k in range(len(top_masks)):
+        # Corner case of empty mask
+        if decoded_top_masks[k].sum() == 0:
+            top_segms_out.append(top_masks[k])
+            continue
+
+        inds_to_vote = np.where(top_to_all_overlaps[k] >= iou_thresh)[0]
+        # Only matches itself
+        if len(inds_to_vote) == 1:
+            top_segms_out.append(top_masks[k])
+            continue
+
+        masks_to_vote = [decoded_all_masks[i] for i in inds_to_vote]
+        if method == 'AVG':
+            ws = mask_weights[inds_to_vote]
+            soft_mask = np.average(masks_to_vote, axis=0, weights=ws)
+            mask = np.array(soft_mask > binarize_thresh, dtype=np.uint8)
+        elif method == 'UNION':
+            # Any pixel that's on joins the mask
+            soft_mask = np.sum(masks_to_vote, axis=0)
+            mask = np.array(soft_mask > 1e-5, dtype=np.uint8)
+        else:
+            raise NotImplementedError('Method {} is unknown'.format(method))
+        rle = mask_util.encode(np.array(mask[:, :, np.newaxis], order='F'))[0]
+        top_segms_out.append(rle)
+
+    return top_segms_out
+
+
+def rle_mask_nms(masks, dets, thresh, mode='IOU'):
+    """Performs greedy non-maximum suppression based on an overlap measurement
+    between masks. The type of measurement is determined by `mode` and can be
+    either 'IOU' (standard intersection over union) or 'IOMA' (intersection over
+    mininum area).
+    """
+    if len(masks) == 0:
+        return []
+    if len(masks) == 1:
+        return [0]
+
+    if mode == 'IOU':
+        # Computes ious[m1, m2] = area(intersect(m1, m2)) / area(union(m1, m2))
+        all_not_crowds = [False] * len(masks)
+        ious = mask_util.iou(masks, masks, all_not_crowds)
+    elif mode == 'IOMA':
+        # Computes ious[m1, m2] = area(intersect(m1, m2)) / min(area(m1), area(m2))
+        all_crowds = [True] * len(masks)
+        # ious[m1, m2] = area(intersect(m1, m2)) / area(m2)
+        ious = mask_util.iou(masks, masks, all_crowds)
+        # ... = max(area(intersect(m1, m2)) / area(m2),
+        #           area(intersect(m2, m1)) / area(m1))
+        ious = np.maximum(ious, ious.transpose())
+    elif mode == 'CONTAINMENT':
+        # Computes ious[m1, m2] = area(intersect(m1, m2)) / area(m2)
+        # Which measures how much m2 is contained inside m1
+        all_crowds = [True] * len(masks)
+        ious = mask_util.iou(masks, masks, all_crowds)
+    else:
+        raise NotImplementedError('Mode {} is unknown'.format(mode))
+
+    scores = dets[:, 4]
+    order = np.argsort(-scores)
+
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+        ovr = ious[i, order[1:]]
+        inds_to_keep = np.where(ovr <= thresh)[0]
+        order = order[inds_to_keep + 1]
+
+    return keep
+
+
+def rle_masks_to_boxes(masks):
+    """Computes the bounding box of each mask in a list of RLE encoded masks."""
+    if len(masks) == 0:
+        return []
+
+    decoded_masks = [
+        np.array(mask_util.decode(rle), dtype=np.float32) for rle in masks
+    ]
+
+    def get_bounds(flat_mask):
+        inds = np.where(flat_mask > 0)[0]
+        return inds.min(), inds.max()
+
+    boxes = np.zeros((len(decoded_masks), 4))
+    keep = [True] * len(decoded_masks)
+    for i, mask in enumerate(decoded_masks):
+        if mask.sum() == 0:
+            keep[i] = False
+            continue
+        flat_mask = mask.sum(axis=0)
+        x0, x1 = get_bounds(flat_mask)
+        flat_mask = mask.sum(axis=1)
+        y0, y1 = get_bounds(flat_mask)
+        boxes[i, :] = (x0, y0, x1, y1)
+
+    return boxes, np.where(keep)[0]
diff --git a/detectron/utils/subprocess.py b/detectron/utils/subprocess.py
new file mode 100644
index 0000000000000000000000000000000000000000..c7911886a32d43bfbb41d9356ffcfa9d40d7feb4
--- /dev/null
+++ b/detectron/utils/subprocess.py
@@ -0,0 +1,133 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Primitives for running multiple single-GPU jobs in parallel over subranges of
+data. These are used for running multi-GPU inference. Subprocesses are used to
+avoid the GIL since inference may involve non-trivial amounts of Python code.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import os
+import numpy as np
+import subprocess
+from six.moves import shlex_quote
+
+from detectron.core.config import cfg
+from detectron.utils.io import load_object
+import detectron.utils.env as envu
+
+import logging
+logger = logging.getLogger(__name__)
+
+
+def process_in_parallel(
+    tag, total_range_size, binary, output_dir, opts=''
+):
+    """Run the specified binary cfg.NUM_GPUS times in parallel, each time as a
+    subprocess that uses one GPU. The binary must accept the command line
+    arguments `--range {start} {end}` that specify a data processing range.
+    """
+    # Snapshot the current cfg state in order to pass to the inference
+    # subprocesses
+    cfg_file = os.path.join(output_dir, '{}_range_config.yaml'.format(tag))
+    with open(cfg_file, 'w') as f:
+        envu.yaml_dump(cfg, stream=f)
+    subprocess_env = os.environ.copy()
+    processes = []
+    subinds = np.array_split(range(total_range_size), cfg.NUM_GPUS)
+    # Determine GPUs to use
+    cuda_visible_devices = os.environ.get('CUDA_VISIBLE_DEVICES')
+    if cuda_visible_devices:
+        gpu_inds = map(int, cuda_visible_devices.split(','))
+        assert -1 not in gpu_inds, \
+            'Hiding GPU indices using the \'-1\' index is not supported'
+    else:
+        gpu_inds = range(cfg.NUM_GPUS)
+    # Run the binary in cfg.NUM_GPUS subprocesses
+    for i, gpu_ind in enumerate(gpu_inds):
+        start = subinds[i][0]
+        end = subinds[i][-1] + 1
+        subprocess_env['CUDA_VISIBLE_DEVICES'] = str(gpu_ind)
+        cmd = '{binary} --range {start} {end} --cfg {cfg_file} NUM_GPUS 1 {opts}'
+        cmd = cmd.format(
+            binary=shlex_quote(binary),
+            start=int(start),
+            end=int(end),
+            cfg_file=shlex_quote(cfg_file),
+            opts=' '.join([shlex_quote(opt) for opt in opts])
+        )
+        logger.info('{} range command {}: {}'.format(tag, i, cmd))
+        if i == 0:
+            subprocess_stdout = subprocess.PIPE
+        else:
+            filename = os.path.join(
+                output_dir, '%s_range_%s_%s.stdout' % (tag, start, end)
+            )
+            subprocess_stdout = open(filename, 'w')  # NOQA (close below)
+        p = subprocess.Popen(
+            cmd,
+            shell=True,
+            env=subprocess_env,
+            stdout=subprocess_stdout,
+            stderr=subprocess.STDOUT,
+            bufsize=1
+        )
+        processes.append((i, p, start, end, subprocess_stdout))
+    # Log output from inference processes and collate their results
+    outputs = []
+    for i, p, start, end, subprocess_stdout in processes:
+        log_subprocess_output(i, p, output_dir, tag, start, end)
+        if i > 0:
+            subprocess_stdout.close()
+        range_file = os.path.join(
+            output_dir, '%s_range_%s_%s.pkl' % (tag, start, end)
+        )
+        range_data = load_object(range_file)
+        outputs.append(range_data)
+    return outputs
+
+
+def log_subprocess_output(i, p, output_dir, tag, start, end):
+    """Capture the output of each subprocess and log it in the parent process.
+    The first subprocess's output is logged in realtime. The output from the
+    other subprocesses is buffered and then printed all at once (in order) when
+    subprocesses finish.
+    """
+    outfile = os.path.join(
+        output_dir, '%s_range_%s_%s.stdout' % (tag, start, end)
+    )
+    logger.info('# ' + '-' * 76 + ' #')
+    logger.info(
+        'stdout of subprocess %s with range [%s, %s]' % (i, start + 1, end)
+    )
+    logger.info('# ' + '-' * 76 + ' #')
+    if i == 0:
+        # Stream the piped stdout from the first subprocess in realtime
+        with open(outfile, 'wb') as f:
+            for line in iter(p.stdout.readline, b''):
+                print(line.rstrip().decode("utf8"))
+                f.write(line)
+        p.stdout.close()
+        ret = p.wait()
+    else:
+        # For subprocesses >= 1, wait and dump their log file
+        ret = p.wait()
+        with open(outfile, 'r') as f:
+            print(''.join(f.readlines()))
+    assert ret == 0, 'Range subprocess failed (exit code: {})'.format(ret)
diff --git a/detectron/utils/timer.py b/detectron/utils/timer.py
new file mode 100644
index 0000000000000000000000000000000000000000..69a20dbde18434ff3b4015102efb5a1c4f95d53b
--- /dev/null
+++ b/detectron/utils/timer.py
@@ -0,0 +1,60 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+#
+# Based on:
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+
+"""Timing related functions."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import time
+
+
+class Timer(object):
+    """A simple timer."""
+
+    def __init__(self):
+        self.reset()
+
+    def tic(self):
+        # using time.time instead of time.clock because time time.clock
+        # does not normalize for multithreading
+        self.start_time = time.time()
+
+    def toc(self, average=True):
+        self.diff = time.time() - self.start_time
+        self.total_time += self.diff
+        self.calls += 1
+        self.average_time = self.total_time / self.calls
+        if average:
+            return self.average_time
+        else:
+            return self.diff
+
+    def reset(self):
+        self.total_time = 0.
+        self.calls = 0
+        self.start_time = 0.
+        self.diff = 0.
+        self.average_time = 0.
diff --git a/detectron/utils/train.py b/detectron/utils/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..d5f739334146acd6a88e40b360143f4fc997e476
--- /dev/null
+++ b/detectron/utils/train.py
@@ -0,0 +1,206 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+#
+# Based on:
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+
+"""Utilities driving the train_net binary"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from shutil import copyfile
+import cv2  # NOQA (Must import before importing caffe2 due to bug in cv2)
+import logging
+import numpy as np
+import os
+import re
+
+from caffe2.python import memonger
+from caffe2.python import workspace
+
+from detectron.core.config import cfg
+from detectron.core.config import get_output_dir
+from detectron.datasets.roidb import combined_roidb_for_training
+from detectron.modeling import model_builder
+from detectron.utils import lr_policy
+from detectron.utils.training_stats import TrainingStats
+import detectron.utils.env as envu
+import detectron.utils.net as nu
+
+
+def train_model():
+    """Model training loop."""
+    model, weights_file, start_iter, checkpoints, output_dir = create_model()
+    if 'final' in checkpoints:
+        # The final model was found in the output directory, so nothing to do
+        return checkpoints
+
+    setup_model_for_training(model, weights_file, output_dir)
+    training_stats = TrainingStats(model)
+    CHECKPOINT_PERIOD = int(cfg.TRAIN.SNAPSHOT_ITERS / cfg.NUM_GPUS)
+
+    for cur_iter in range(start_iter, cfg.SOLVER.MAX_ITER):
+        if model.roi_data_loader.has_stopped():
+            handle_critical_error(model, 'roi_data_loader failed')
+        training_stats.IterTic()
+        lr = model.UpdateWorkspaceLr(cur_iter, lr_policy.get_lr_at_iter(cur_iter))
+        workspace.RunNet(model.net.Proto().name)
+        if cur_iter == start_iter:
+            nu.print_net(model)
+        training_stats.IterToc()
+        training_stats.UpdateIterStats()
+        training_stats.LogIterStats(cur_iter, lr)
+
+        if (cur_iter + 1) % CHECKPOINT_PERIOD == 0 and cur_iter > start_iter:
+            checkpoints[cur_iter] = os.path.join(
+                output_dir, 'model_iter{}.pkl'.format(cur_iter)
+            )
+            nu.save_model_to_weights_file(checkpoints[cur_iter], model)
+
+        if cur_iter == start_iter + training_stats.LOG_PERIOD:
+            # Reset the iteration timer to remove outliers from the first few
+            # SGD iterations
+            training_stats.ResetIterTimer()
+
+        if np.isnan(training_stats.iter_total_loss):
+            handle_critical_error(model, 'Loss is NaN')
+
+    # Save the final model
+    checkpoints['final'] = os.path.join(output_dir, 'model_final.pkl')
+    nu.save_model_to_weights_file(checkpoints['final'], model)
+    # Shutdown data loading threads
+    model.roi_data_loader.shutdown()
+    return checkpoints
+
+
+def handle_critical_error(model, msg):
+    logger = logging.getLogger(__name__)
+    logger.critical(msg)
+    model.roi_data_loader.shutdown()
+    raise Exception(msg)
+
+
+def create_model():
+    """Build the model and look for saved model checkpoints in case we can
+    resume from one.
+    """
+    logger = logging.getLogger(__name__)
+    start_iter = 0
+    checkpoints = {}
+    output_dir = get_output_dir(cfg.TRAIN.DATASETS, training=True)
+    weights_file = cfg.TRAIN.WEIGHTS
+    if cfg.TRAIN.AUTO_RESUME:
+        # Check for the final model (indicates training already finished)
+        final_path = os.path.join(output_dir, 'model_final.pkl')
+        if os.path.exists(final_path):
+            logger.info('model_final.pkl exists; no need to train!')
+            return None, None, None, {'final': final_path}, output_dir
+
+        if cfg.TRAIN.COPY_WEIGHTS:
+            copyfile(
+                weights_file,
+                os.path.join(output_dir, os.path.basename(weights_file)))
+            logger.info('Copy {} to {}'.format(weights_file, output_dir))
+
+        # Find the most recent checkpoint (highest iteration number)
+        files = os.listdir(output_dir)
+        for f in files:
+            iter_string = re.findall(r'(?<=model_iter)\d+(?=\.pkl)', f)
+            if len(iter_string) > 0:
+                checkpoint_iter = int(iter_string[0])
+                if checkpoint_iter > start_iter:
+                    # Start one iteration immediately after the checkpoint iter
+                    start_iter = checkpoint_iter + 1
+                    resume_weights_file = f
+
+        if start_iter > 0:
+            # Override the initialization weights with the found checkpoint
+            weights_file = os.path.join(output_dir, resume_weights_file)
+            logger.info(
+                '========> Resuming from checkpoint {} at start iter {}'.
+                format(weights_file, start_iter)
+            )
+
+    logger.info('Building model: {}'.format(cfg.MODEL.TYPE))
+    model = model_builder.create(cfg.MODEL.TYPE, train=True)
+    if cfg.MEMONGER:
+        optimize_memory(model)
+    # Performs random weight initialization as defined by the model
+    workspace.RunNetOnce(model.param_init_net)
+    return model, weights_file, start_iter, checkpoints, output_dir
+
+
+def optimize_memory(model):
+    """Save GPU memory through blob sharing."""
+    for device in range(cfg.NUM_GPUS):
+        namescope = 'gpu_{}/'.format(device)
+        losses = [namescope + l for l in model.losses]
+        model.net._net = memonger.share_grad_blobs(
+            model.net,
+            losses,
+            set(model.param_to_grad.values()),
+            namescope,
+            share_activations=cfg.MEMONGER_SHARE_ACTIVATIONS
+        )
+
+
+def setup_model_for_training(model, weights_file, output_dir):
+    """Loaded saved weights and create the network in the C2 workspace."""
+    logger = logging.getLogger(__name__)
+    add_model_training_inputs(model)
+
+    if weights_file:
+        # Override random weight initialization with weights from a saved model
+        nu.initialize_gpu_from_weights_file(model, weights_file, gpu_id=0)
+    # Even if we're randomly initializing we still need to synchronize
+    # parameters across GPUs
+    nu.broadcast_parameters(model)
+    workspace.CreateNet(model.net)
+
+    logger.info('Outputs saved to: {:s}'.format(os.path.abspath(output_dir)))
+    dump_proto_files(model, output_dir)
+
+    # Start loading mini-batches and enqueuing blobs
+    model.roi_data_loader.register_sigint_handler()
+    model.roi_data_loader.start(prefill=True)
+    return output_dir
+
+
+def add_model_training_inputs(model):
+    """Load the training dataset and attach the training inputs to the model."""
+    logger = logging.getLogger(__name__)
+    logger.info('Loading dataset: {}'.format(cfg.TRAIN.DATASETS))
+    roidb = combined_roidb_for_training(
+        cfg.TRAIN.DATASETS, cfg.TRAIN.PROPOSAL_FILES
+    )
+    logger.info('{:d} roidb entries'.format(len(roidb)))
+    model_builder.add_training_inputs(model, roidb=roidb)
+
+
+def dump_proto_files(model, output_dir):
+    """Save prototxt descriptions of the training network and parameter
+    initialization network."""
+    with open(os.path.join(output_dir, 'net.pbtxt'), 'w') as fid:
+        fid.write(str(model.net.Proto()))
+    with open(os.path.join(output_dir, 'param_init_net.pbtxt'), 'w') as fid:
+        fid.write(str(model.param_init_net.Proto()))
diff --git a/detectron/utils/training_stats.py b/detectron/utils/training_stats.py
new file mode 100644
index 0000000000000000000000000000000000000000..36e586ef83d2a7656bd055053391fa651e62cc90
--- /dev/null
+++ b/detectron/utils/training_stats.py
@@ -0,0 +1,112 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Utilities for training."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import datetime
+import numpy as np
+
+from caffe2.python import utils as c2_py_utils
+
+from detectron.core.config import cfg
+from detectron.utils.logging import log_json_stats
+from detectron.utils.logging import SmoothedValue
+from detectron.utils.timer import Timer
+import detectron.utils.net as nu
+
+
+class TrainingStats(object):
+    """Track vital training statistics."""
+
+    def __init__(self, model):
+        # Window size for smoothing tracked values (with median filtering)
+        self.WIN_SZ = 20
+        # Output logging period in SGD iterations
+        self.LOG_PERIOD = 20
+        self.smoothed_losses_and_metrics = {
+            key: SmoothedValue(self.WIN_SZ)
+            for key in model.losses + model.metrics
+        }
+        self.losses_and_metrics = {
+            key: 0
+            for key in model.losses + model.metrics
+        }
+        self.smoothed_total_loss = SmoothedValue(self.WIN_SZ)
+        self.smoothed_mb_qsize = SmoothedValue(self.WIN_SZ)
+        self.iter_total_loss = np.nan
+        self.iter_timer = Timer()
+        self.model = model
+
+    def IterTic(self):
+        self.iter_timer.tic()
+
+    def IterToc(self):
+        return self.iter_timer.toc(average=False)
+
+    def ResetIterTimer(self):
+        self.iter_timer.reset()
+
+    def UpdateIterStats(self):
+        """Update tracked iteration statistics."""
+        for k in self.losses_and_metrics.keys():
+            if k in self.model.losses:
+                self.losses_and_metrics[k] = nu.sum_multi_gpu_blob(k)
+            else:
+                self.losses_and_metrics[k] = nu.average_multi_gpu_blob(k)
+        for k, v in self.smoothed_losses_and_metrics.items():
+            v.AddValue(self.losses_and_metrics[k])
+        self.iter_total_loss = np.sum(
+            np.array([self.losses_and_metrics[k] for k in self.model.losses])
+        )
+        self.smoothed_total_loss.AddValue(self.iter_total_loss)
+        self.smoothed_mb_qsize.AddValue(
+            self.model.roi_data_loader._minibatch_queue.qsize()
+        )
+
+    def LogIterStats(self, cur_iter, lr):
+        """Log the tracked statistics."""
+        if (cur_iter % self.LOG_PERIOD == 0 or
+                cur_iter == cfg.SOLVER.MAX_ITER - 1):
+            stats = self.GetStats(cur_iter, lr)
+            log_json_stats(stats)
+
+    def GetStats(self, cur_iter, lr):
+        eta_seconds = self.iter_timer.average_time * (
+            cfg.SOLVER.MAX_ITER - cur_iter
+        )
+        eta = str(datetime.timedelta(seconds=int(eta_seconds)))
+        mem_stats = c2_py_utils.GetGPUMemoryUsageStats()
+        mem_usage = np.max(mem_stats['max_by_gpu'][:cfg.NUM_GPUS])
+        stats = dict(
+            iter=cur_iter,
+            lr=float(lr),
+            time=self.iter_timer.average_time,
+            loss=self.smoothed_total_loss.GetMedianValue(),
+            eta=eta,
+            mb_qsize=int(
+                np.round(self.smoothed_mb_qsize.GetMedianValue())
+            ),
+            mem=int(np.ceil(mem_usage / 1024 / 1024))
+        )
+        for k, v in self.smoothed_losses_and_metrics.items():
+            stats[k] = v.GetMedianValue()
+        return stats
diff --git a/detectron/utils/vis.py b/detectron/utils/vis.py
new file mode 100644
index 0000000000000000000000000000000000000000..0234a1dd68821040160e91c4a4be5c392857e7aa
--- /dev/null
+++ b/detectron/utils/vis.py
@@ -0,0 +1,394 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+"""Detection output visualization module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import cv2
+import numpy as np
+import os
+
+import pycocotools.mask as mask_util
+
+from detectron.utils.colormap import colormap
+import detectron.utils.env as envu
+import detectron.utils.keypoints as keypoint_utils
+
+# Matplotlib requires certain adjustments in some environments
+# Must happen before importing matplotlib
+envu.set_up_matplotlib()
+import matplotlib.pyplot as plt
+from matplotlib.patches import Polygon
+
+plt.rcParams['pdf.fonttype'] = 42  # For editing in Adobe Illustrator
+
+
+_GRAY = (218, 227, 218)
+_GREEN = (18, 127, 15)
+_WHITE = (255, 255, 255)
+
+
+def kp_connections(keypoints):
+    kp_lines = [
+        [keypoints.index('left_eye'), keypoints.index('right_eye')],
+        [keypoints.index('left_eye'), keypoints.index('nose')],
+        [keypoints.index('right_eye'), keypoints.index('nose')],
+        [keypoints.index('right_eye'), keypoints.index('right_ear')],
+        [keypoints.index('left_eye'), keypoints.index('left_ear')],
+        [keypoints.index('right_shoulder'), keypoints.index('right_elbow')],
+        [keypoints.index('right_elbow'), keypoints.index('right_wrist')],
+        [keypoints.index('left_shoulder'), keypoints.index('left_elbow')],
+        [keypoints.index('left_elbow'), keypoints.index('left_wrist')],
+        [keypoints.index('right_hip'), keypoints.index('right_knee')],
+        [keypoints.index('right_knee'), keypoints.index('right_ankle')],
+        [keypoints.index('left_hip'), keypoints.index('left_knee')],
+        [keypoints.index('left_knee'), keypoints.index('left_ankle')],
+        [keypoints.index('right_shoulder'), keypoints.index('left_shoulder')],
+        [keypoints.index('right_hip'), keypoints.index('left_hip')],
+    ]
+    return kp_lines
+
+
+def convert_from_cls_format(cls_boxes, cls_segms, cls_keyps):
+    """Convert from the class boxes/segms/keyps format generated by the testing
+    code.
+    """
+    box_list = [b for b in cls_boxes if len(b) > 0]
+    if len(box_list) > 0:
+        boxes = np.concatenate(box_list)
+    else:
+        boxes = None
+    if cls_segms is not None:
+        segms = [s for slist in cls_segms for s in slist]
+    else:
+        segms = None
+    if cls_keyps is not None:
+        keyps = [k for klist in cls_keyps for k in klist]
+    else:
+        keyps = None
+    classes = []
+    for j in range(len(cls_boxes)):
+        classes += [j] * len(cls_boxes[j])
+    return boxes, segms, keyps, classes
+
+
+def get_class_string(class_index, score, dataset):
+    class_text = dataset.classes[class_index] if dataset is not None else \
+        'id{:d}'.format(class_index)
+    return class_text + ' {:0.2f}'.format(score).lstrip('0')
+
+
+def vis_mask(img, mask, col, alpha=0.4, show_border=True, border_thick=1):
+    """Visualizes a single binary mask."""
+
+    img = img.astype(np.float32)
+    idx = np.nonzero(mask)
+
+    img[idx[0], idx[1], :] *= 1.0 - alpha
+    img[idx[0], idx[1], :] += alpha * col
+
+    if show_border:
+        contours = cv2.findContours(
+            mask.copy(), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)[-2]
+        cv2.drawContours(img, contours, -1, _WHITE, border_thick, cv2.LINE_AA)
+
+    return img.astype(np.uint8)
+
+
+def vis_class(img, pos, class_str, font_scale=0.35):
+    """Visualizes the class."""
+    img = img.astype(np.uint8)
+    x0, y0 = int(pos[0]), int(pos[1])
+    # Compute text size.
+    txt = class_str
+    font = cv2.FONT_HERSHEY_SIMPLEX
+    ((txt_w, txt_h), _) = cv2.getTextSize(txt, font, font_scale, 1)
+    # Place text background.
+    back_tl = x0, y0 - int(1.3 * txt_h)
+    back_br = x0 + txt_w, y0
+    cv2.rectangle(img, back_tl, back_br, _GREEN, -1)
+    # Show text.
+    txt_tl = x0, y0 - int(0.3 * txt_h)
+    cv2.putText(img, txt, txt_tl, font, font_scale, _GRAY, lineType=cv2.LINE_AA)
+    return img
+
+
+def vis_bbox(img, bbox, thick=1):
+    """Visualizes a bounding box."""
+    img = img.astype(np.uint8)
+    (x0, y0, w, h) = bbox
+    x1, y1 = int(x0 + w), int(y0 + h)
+    x0, y0 = int(x0), int(y0)
+    cv2.rectangle(img, (x0, y0), (x1, y1), _GREEN, thickness=thick)
+    return img
+
+
+def vis_keypoints(img, kps, kp_thresh=2, alpha=0.7):
+    """Visualizes keypoints (adapted from vis_one_image).
+    kps has shape (4, #keypoints) where 4 rows are (x, y, logit, prob).
+    """
+    dataset_keypoints, _ = keypoint_utils.get_keypoints()
+    kp_lines = kp_connections(dataset_keypoints)
+
+    # Convert from plt 0-1 RGBA colors to 0-255 BGR colors for opencv.
+    cmap = plt.get_cmap('rainbow')
+    colors = [cmap(i) for i in np.linspace(0, 1, len(kp_lines) + 2)]
+    colors = [(c[2] * 255, c[1] * 255, c[0] * 255) for c in colors]
+
+    # Perform the drawing on a copy of the image, to allow for blending.
+    kp_mask = np.copy(img)
+
+    # Draw mid shoulder / mid hip first for better visualization.
+    mid_shoulder = (
+        kps[:2, dataset_keypoints.index('right_shoulder')] +
+        kps[:2, dataset_keypoints.index('left_shoulder')]) / 2.0
+    sc_mid_shoulder = np.minimum(
+        kps[2, dataset_keypoints.index('right_shoulder')],
+        kps[2, dataset_keypoints.index('left_shoulder')])
+    mid_hip = (
+        kps[:2, dataset_keypoints.index('right_hip')] +
+        kps[:2, dataset_keypoints.index('left_hip')]) / 2.0
+    sc_mid_hip = np.minimum(
+        kps[2, dataset_keypoints.index('right_hip')],
+        kps[2, dataset_keypoints.index('left_hip')])
+    nose_idx = dataset_keypoints.index('nose')
+    if sc_mid_shoulder > kp_thresh and kps[2, nose_idx] > kp_thresh:
+        cv2.line(
+            kp_mask, tuple(mid_shoulder), tuple(kps[:2, nose_idx]),
+            color=colors[len(kp_lines)], thickness=2, lineType=cv2.LINE_AA)
+    if sc_mid_shoulder > kp_thresh and sc_mid_hip > kp_thresh:
+        cv2.line(
+            kp_mask, tuple(mid_shoulder), tuple(mid_hip),
+            color=colors[len(kp_lines) + 1], thickness=2, lineType=cv2.LINE_AA)
+
+    # Draw the keypoints.
+    for l in range(len(kp_lines)):
+        i1 = kp_lines[l][0]
+        i2 = kp_lines[l][1]
+        p1 = kps[0, i1], kps[1, i1]
+        p2 = kps[0, i2], kps[1, i2]
+        if kps[2, i1] > kp_thresh and kps[2, i2] > kp_thresh:
+            cv2.line(
+                kp_mask, p1, p2,
+                color=colors[l], thickness=2, lineType=cv2.LINE_AA)
+        if kps[2, i1] > kp_thresh:
+            cv2.circle(
+                kp_mask, p1,
+                radius=3, color=colors[l], thickness=-1, lineType=cv2.LINE_AA)
+        if kps[2, i2] > kp_thresh:
+            cv2.circle(
+                kp_mask, p2,
+                radius=3, color=colors[l], thickness=-1, lineType=cv2.LINE_AA)
+
+    # Blend the keypoints.
+    return cv2.addWeighted(img, 1.0 - alpha, kp_mask, alpha, 0)
+
+
+def vis_one_image_opencv(
+        im, boxes, segms=None, keypoints=None, thresh=0.9, kp_thresh=2,
+        show_box=False, dataset=None, show_class=False):
+    """Constructs a numpy array with the detections visualized."""
+
+    if isinstance(boxes, list):
+        boxes, segms, keypoints, classes = convert_from_cls_format(
+            boxes, segms, keypoints)
+
+    if boxes is None or boxes.shape[0] == 0 or max(boxes[:, 4]) < thresh:
+        return im
+
+    if segms is not None and len(segms) > 0:
+        masks = mask_util.decode(segms)
+        color_list = colormap()
+        mask_color_id = 0
+
+    # Display in largest to smallest order to reduce occlusion
+    areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
+    sorted_inds = np.argsort(-areas)
+
+    for i in sorted_inds:
+        bbox = boxes[i, :4]
+        score = boxes[i, -1]
+        if score < thresh:
+            continue
+
+        # show box (off by default)
+        if show_box:
+            im = vis_bbox(
+                im, (bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]))
+
+        # show class (off by default)
+        if show_class:
+            class_str = get_class_string(classes[i], score, dataset)
+            im = vis_class(im, (bbox[0], bbox[1] - 2), class_str)
+
+        # show mask
+        if segms is not None and len(segms) > i:
+            color_mask = color_list[mask_color_id % len(color_list), 0:3]
+            mask_color_id += 1
+            im = vis_mask(im, masks[..., i], color_mask)
+
+        # show keypoints
+        if keypoints is not None and len(keypoints) > i:
+            im = vis_keypoints(im, keypoints[i], kp_thresh)
+
+    return im
+
+
+def vis_one_image(
+        im, im_name, output_dir, boxes, segms=None, keypoints=None, thresh=0.9,
+        kp_thresh=2, dpi=200, box_alpha=0.0, dataset=None, show_class=False,
+        ext='pdf', out_when_no_box=False):
+    """Visual debugging of detections."""
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+
+    if isinstance(boxes, list):
+        boxes, segms, keypoints, classes = convert_from_cls_format(
+            boxes, segms, keypoints)
+
+    if (boxes is None or boxes.shape[0] == 0 or max(boxes[:, 4]) < thresh) and not out_when_no_box:
+        return
+
+    dataset_keypoints, _ = keypoint_utils.get_keypoints()
+
+    if segms is not None and len(segms) > 0:
+        masks = mask_util.decode(segms)
+
+    color_list = colormap(rgb=True) / 255
+
+    kp_lines = kp_connections(dataset_keypoints)
+    cmap = plt.get_cmap('rainbow')
+    colors = [cmap(i) for i in np.linspace(0, 1, len(kp_lines) + 2)]
+
+    fig = plt.figure(frameon=False)
+    fig.set_size_inches(im.shape[1] / dpi, im.shape[0] / dpi)
+    ax = plt.Axes(fig, [0., 0., 1., 1.])
+    ax.axis('off')
+    fig.add_axes(ax)
+    ax.imshow(im)
+
+    if boxes is None:
+        sorted_inds = [] # avoid crash when 'boxes' is None
+    else:
+        # Display in largest to smallest order to reduce occlusion
+        areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
+        sorted_inds = np.argsort(-areas)
+
+    mask_color_id = 0
+    for i in sorted_inds:
+        bbox = boxes[i, :4]
+        score = boxes[i, -1]
+        if score < thresh:
+            continue
+
+        # show box (off by default)
+        ax.add_patch(
+            plt.Rectangle((bbox[0], bbox[1]),
+                          bbox[2] - bbox[0],
+                          bbox[3] - bbox[1],
+                          fill=False, edgecolor='g',
+                          linewidth=0.5, alpha=box_alpha))
+
+        if show_class:
+            ax.text(
+                bbox[0], bbox[1] - 2,
+                get_class_string(classes[i], score, dataset),
+                fontsize=3,
+                family='serif',
+                bbox=dict(
+                    facecolor='g', alpha=0.4, pad=0, edgecolor='none'),
+                color='white')
+
+        # show mask
+        if segms is not None and len(segms) > i:
+            img = np.ones(im.shape)
+            color_mask = color_list[mask_color_id % len(color_list), 0:3]
+            mask_color_id += 1
+
+            w_ratio = .4
+            for c in range(3):
+                color_mask[c] = color_mask[c] * (1 - w_ratio) + w_ratio
+            for c in range(3):
+                img[:, :, c] = color_mask[c]
+            e = masks[:, :, i]
+
+            contour = cv2.findContours(
+                e.copy(), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)[-2]
+
+            for c in contour:
+                polygon = Polygon(
+                    c.reshape((-1, 2)),
+                    fill=True, facecolor=color_mask,
+                    edgecolor='w', linewidth=1.2,
+                    alpha=0.5)
+                ax.add_patch(polygon)
+
+        # show keypoints
+        if keypoints is not None and len(keypoints) > i:
+            kps = keypoints[i]
+            plt.autoscale(False)
+            for l in range(len(kp_lines)):
+                i1 = kp_lines[l][0]
+                i2 = kp_lines[l][1]
+                if kps[2, i1] > kp_thresh and kps[2, i2] > kp_thresh:
+                    x = [kps[0, i1], kps[0, i2]]
+                    y = [kps[1, i1], kps[1, i2]]
+                    line = plt.plot(x, y)
+                    plt.setp(line, color=colors[l], linewidth=1.0, alpha=0.7)
+                if kps[2, i1] > kp_thresh:
+                    plt.plot(
+                        kps[0, i1], kps[1, i1], '.', color=colors[l],
+                        markersize=3.0, alpha=0.7)
+
+                if kps[2, i2] > kp_thresh:
+                    plt.plot(
+                        kps[0, i2], kps[1, i2], '.', color=colors[l],
+                        markersize=3.0, alpha=0.7)
+
+            # add mid shoulder / mid hip for better visualization
+            mid_shoulder = (
+                kps[:2, dataset_keypoints.index('right_shoulder')] +
+                kps[:2, dataset_keypoints.index('left_shoulder')]) / 2.0
+            sc_mid_shoulder = np.minimum(
+                kps[2, dataset_keypoints.index('right_shoulder')],
+                kps[2, dataset_keypoints.index('left_shoulder')])
+            mid_hip = (
+                kps[:2, dataset_keypoints.index('right_hip')] +
+                kps[:2, dataset_keypoints.index('left_hip')]) / 2.0
+            sc_mid_hip = np.minimum(
+                kps[2, dataset_keypoints.index('right_hip')],
+                kps[2, dataset_keypoints.index('left_hip')])
+            if (sc_mid_shoulder > kp_thresh and
+                    kps[2, dataset_keypoints.index('nose')] > kp_thresh):
+                x = [mid_shoulder[0], kps[0, dataset_keypoints.index('nose')]]
+                y = [mid_shoulder[1], kps[1, dataset_keypoints.index('nose')]]
+                line = plt.plot(x, y)
+                plt.setp(
+                    line, color=colors[len(kp_lines)], linewidth=1.0, alpha=0.7)
+            if sc_mid_shoulder > kp_thresh and sc_mid_hip > kp_thresh:
+                x = [mid_shoulder[0], mid_hip[0]]
+                y = [mid_shoulder[1], mid_hip[1]]
+                line = plt.plot(x, y)
+                plt.setp(
+                    line, color=colors[len(kp_lines) + 1], linewidth=1.0,
+                    alpha=0.7)
+
+    output_name = os.path.basename(im_name) + '.' + ext
+    fig.savefig(os.path.join(output_dir, '{}'.format(output_name)), dpi=dpi)
+    plt.close('all')
diff --git a/docker/Dockerfile b/docker/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..38cc0eaf6e2bbba79fef786dc7b35791688142bd
--- /dev/null
+++ b/docker/Dockerfile
@@ -0,0 +1,28 @@
+# Use Caffe2 image as parent image
+FROM caffe2/caffe2:snapshot-py2-cuda9.0-cudnn7-ubuntu16.04
+
+RUN mv /usr/local/caffe2 /usr/local/caffe2_build
+ENV Caffe2_DIR /usr/local/caffe2_build
+
+ENV PYTHONPATH /usr/local/caffe2_build:${PYTHONPATH}
+ENV LD_LIBRARY_PATH /usr/local/caffe2_build/lib:${LD_LIBRARY_PATH}
+
+# Clone the Detectron repository
+RUN git clone https://github.com/facebookresearch/detectron /detectron
+
+# Install Python dependencies
+RUN pip install -r /detectron/requirements.txt
+
+# Install the COCO API
+RUN git clone https://github.com/cocodataset/cocoapi.git /cocoapi
+WORKDIR /cocoapi/PythonAPI
+RUN make install
+
+# Go to Detectron root
+WORKDIR /detectron
+
+# Set up Python modules
+RUN make
+
+# [Optional] Build custom ops
+RUN make ops
diff --git a/projects/GN/README.md b/projects/GN/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..5757189a6f2971be4414570e33d53b9c3202e7f9
--- /dev/null
+++ b/projects/GN/README.md
@@ -0,0 +1,304 @@
+# Group Normalization for Mask R-CNN
+
+<div align="center">
+  <img src="gn.jpg" width="700px" />
+</div>
+
+## Introduction
+
+This file provides Mask R-CNN baseline results and models trained with [Group Normalization](https://arxiv.org/abs/1803.08494):
+
+```
+@article{GroupNorm2018,
+  title={Group Normalization},
+  author={Yuxin Wu and Kaiming He},
+  journal={arXiv:1803.08494},
+  year={2018}
+}
+```
+
+**Note:** This code uses the GroupNorm op implemented in CUDA, included in the Caffe2 repo. When writing this document, Caffe2 is being merged into PyTorch, and the GroupNorm op is located [here](https://github.com/pytorch/pytorch/blob/master/caffe2/operators/group_norm_op.cu). Make sure your Caffe2 is up to date.
+
+## Pretrained Models with GN
+
+These models are trained in Caffe2 on the standard ImageNet-1k dataset, using GroupNorm with 32 groups (G=32).
+
+- [R-50-GN.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47261647/R-50-GN.pkl): ResNet-50 with GN, 24.0\% top-1 error (center-crop).
+- [R-101-GN.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47592356/R-101-GN.pkl): ResNet-101 with GN, 22.6\% top-1 error (center-crop).
+
+## Results
+
+### Baselines with BN
+
+<table><tbody>
+<!-- START E2E MASK RCNN BN TABLE -->
+<!-- TABLE HEADER -->
+<!-- Info: we use wrap text in <sup><sub></sub><sup> to make is small -->
+<th valign="bottom"><sup><sub>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;case&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</sub></sup></th>
+<th valign="bottom"><sup><sub>type</sub></sup></th>
+<th valign="bottom"><sup><sub>lr<br/>schd</sub></sup></th>
+<th valign="bottom"><sup><sub>im/<br/>gpu</sub></sup></th>
+<th valign="bottom"><sup><sub>train<br/>mem<br/>(GB)</sub></sup></th>
+<th valign="bottom"><sup><sub>train<br/>time<br/>(s/iter)</sub></sup></th>
+<th valign="bottom"><sup><sub>train<br/>time<br/>total<br/>(hr)</sub></sup></th>
+<th valign="bottom"><sup><sub>inference<br/>time<br/>(s/im)</sub></sup></th>
+<th valign="bottom"><sup><sub>box<br/>AP</sub></sup></th>
+<th valign="bottom"><sup><sub>mask<br/>AP</sub></sup></th>
+<th valign="bottom"><sup><sub>model id</sub></sup></th>
+<tr>
+<td align="left"><sup><sub>R-50-FPN, BN*</sub></sup></td>
+<td align="left"><sup><sub>Mask R-CNN</sub></sup></td>
+<td align="left"><sup><sub>2x</sub></sup></td>
+<td align="right"><sup><sub>2</sub></sup></td>
+<td align="right"><sup><sub>8.6</sub></sup></td>
+<td align="right"><sup><sub>0.897</sub></sup></td>
+<td align="right"><sup><sub>44.9</sub></sup></td>
+<td align="right"><sup><sub>0.099&nbsp;+&nbsp;0.018</sub></sup></td>
+<td align="right"><sup><sub>38.6</sub></sup></td>
+<td align="right"><sup><sub>34.5</sub></sup></td>
+<td align="right"><sup><sub>35859007</sub></sup></td>
+</tr>
+<tr>
+<td align="left"><sup><sub>R-101-FPN, BN*</sub></sup></td>
+<td align="left"><sup><sub>Mask R-CNN</sub></sup></td>
+<td align="left"><sup><sub>2x</sub></sup></td>
+<td align="right"><sup><sub>2</sub></sup></td>
+<td align="right"><sup><sub>10.2</sub></sup></td>
+<td align="right"><sup><sub>0.993</sub></sup></td>
+<td align="right"><sup><sub>49.7</sub></sup></td>
+<td align="right"><sup><sub>0.126&nbsp;+&nbsp;0.017</sub></sup></td>
+<td align="right"><sup><sub>40.9</sub></sup></td>
+<td align="right"><sup><sub>36.4</sub></sup></td>
+<td align="right"><sup><sub>35861858</sub></sup></td>
+</tr>
+<!-- END E2E MASK RCNN BN TABLE -->
+</tbody></table>
+
+**Notes:**
+
+- This table is copied from [Detectron Model Zoo](https://github.com/facebookresearch/Detectron/blob/master/MODEL_ZOO.md#end-to-end-faster--mask-r-cnn-baselines).
+- BN<sup>*</sup> means that BatchNorm (BN) is used for pre-training and is frozen and turned into a per-channel linear layer when fine-tuning. This is the default of Faster/Mask R-CNN and Detectron.
+
+### Mask R-CNN with GN
+
+#### Standard Mask R-CNN recipe
+<table><tbody>
+<!-- START E2E MASK RCNN GN TABLE -->
+<!-- TABLE HEADER -->
+<!-- Info: we use wrap text in <sup><sub></sub><sup> to make is small -->
+<th valign="bottom"><sup><sub>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;case&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</sub></sup></th>
+<th valign="bottom"><sup><sub>type</sub></sup></th>
+<th valign="bottom"><sup><sub>lr<br/>schd</sub></sup></th>
+<th valign="bottom"><sup><sub>im/<br/>gpu</sub></sup></th>
+<th valign="bottom"><sup><sub>train<br/>mem<br/>(GB)</sub></sup></th>
+<th valign="bottom"><sup><sub>train<br/>time<br/>(s/iter)</sub></sup></th>
+<th valign="bottom"><sup><sub>train<br/>time<br/>total<br/>(hr)</sub></sup></th>
+<th valign="bottom"><sup><sub>inference<br/>time<br/>(s/im)</sub></sup></th>
+<th valign="bottom"><sup><sub>box<br/>AP</sub></sup></th>
+<th valign="bottom"><sup><sub>mask<br/>AP</sub></sup></th>
+<th valign="bottom"><sup><sub>model id</sub></sup></th>
+<th valign="bottom"><sup><sub>download<br/>links</sub></sup></th>
+<!-- TABLE BODY -->
+<tr>
+<td align="left"><sup><sub>R-50-FPN, GN</sub></sup></td>
+<td align="left"><sup><sub>Mask R-CNN</sub></sup></td>
+<td align="left"><sup><sub>2x</sub></sup></td>
+<td align="right"><sup><sub>2</sub></sup></td>
+<td align="right"><sup><sub>10.5</sub></sup></td>
+<td align="right"><sup><sub>1.017</sub></sup></td>
+<td align="right"><sup><sub>50.8</sub></sup></td>
+<td align="right"><sup><sub>0.146&nbsp;+&nbsp;0.017</sub></sup></td>
+<td align="right"><sup><sub>40.3</sub></sup></td>
+<td align="right"><sup><sub>35.7</sub></sup></td>
+<td align="right"><sup><sub>48616381</sub></sup></td>
+<td align="left"><sup><sub>
+  <a href="https://dl.fbaipublicfiles.com/detectron/GN/48616381/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn_0416.13_23_38.bTlTI97Q/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>
+  &nbsp;|&nbsp;
+  <a href="https://dl.fbaipublicfiles.com/detectron/GN/48616381/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn_0416.13_23_38.bTlTI97Q/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a>
+  &nbsp;|&nbsp;
+  <a href="https://dl.fbaipublicfiles.com/detectron/GN/48616381/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn_0416.13_23_38.bTlTI97Q/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json">masks</a></sub></sup></td>
+</tr>
+<tr>
+<td align="left"><sup><sub>R-101-FPN, GN</sub></sup></td>
+<td align="left"><sup><sub>Mask R-CNN</sub></sup></td>
+<td align="left"><sup><sub>2x</sub></sup></td>
+<td align="right"><sup><sub>2</sub></sup></td>
+<td align="right"><sup><sub>12.4</sub></sup></td>
+<td align="right"><sup><sub>1.151</sub></sup></td>
+<td align="right"><sup><sub>57.5</sub></sup></td>
+<td align="right"><sup><sub>0.180&nbsp;+&nbsp;0.015</sub></sup></td>
+<td align="right"><sup><sub>41.8</sub></sup></td>
+<td align="right"><sup><sub>36.8</sub></sup></td>
+<td align="right"><sup><sub>48616724</sub></sup></td>
+<td align="left"><sup><sub>
+  <a href="https://dl.fbaipublicfiles.com/detectron/GN/48616724/04_2018_gn_baselines/e2e_mask_rcnn_R-101-FPN_2x_gn_0416.13_26_34.GLnri4GR/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>
+  &nbsp;|&nbsp;
+  <a href="https://dl.fbaipublicfiles.com/detectron/GN/48616724/04_2018_gn_baselines/e2e_mask_rcnn_R-101-FPN_2x_gn_0416.13_26_34.GLnri4GR/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a>
+  &nbsp;|&nbsp;
+  <a href="https://dl.fbaipublicfiles.com/detectron/GN/48616724/04_2018_gn_baselines/e2e_mask_rcnn_R-101-FPN_2x_gn_0416.13_26_34.GLnri4GR/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json">masks</a></sub></sup></td>
+</tr>
+<!-- END E2E MASK RCNN GN TABLE -->
+</tbody></table>
+
+**Notes:**
+- GN is applied on: (i) ResNet layers inherited from pre-training, (ii) the FPN-specific layers, (iii) the RoI bbox head, and (iv) the RoI mask head.
+- These GN models use a 4conv+1fc RoI box head. The BN<sup>*</sup> counterpart with this head performs similarly with the default 2fc head: using this codebase, R-50-FPN BN<sup>\*</sup> with 4conv+1fc has 38.8/34.4 box/mask AP.
+- 2x is the default schedule (180k) in Detectron.
+
+#### Longer training schedule
+<table><tbody>
+<!-- START E2E MASK RCNN GN 3X TABLE -->
+<!-- TABLE HEADER -->
+<!-- Info: we use wrap text in <sup><sub></sub><sup> to make is small -->
+<th valign="bottom"><sup><sub>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;case&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</sub></sup></th>
+<th valign="bottom"><sup><sub>type</sub></sup></th>
+<th valign="bottom"><sup><sub>lr<br/>schd</sub></sup></th>
+<th valign="bottom"><sup><sub>im/<br/>gpu</sub></sup></th>
+<th valign="bottom"><sup><sub>train<br/>mem<br/>(GB)</sub></sup></th>
+<th valign="bottom"><sup><sub>train<br/>time<br/>(s/iter)</sub></sup></th>
+<th valign="bottom"><sup><sub>train<br/>time<br/>total<br/>(hr)</sub></sup></th>
+<th valign="bottom"><sup><sub>inference<br/>time<br/>(s/im)</sub></sup></th>
+<th valign="bottom"><sup><sub>box<br/>AP</sub></sup></th>
+<th valign="bottom"><sup><sub>mask<br/>AP</sub></sup></th>
+<th valign="bottom"><sup><sub>model id</sub></sup></th>
+<th valign="bottom"><sup><sub>download<br/>links</sub></sup></th>
+<!-- TABLE BODY -->
+<tr>
+<td align="left"><sup><sub>R-50-FPN, GN</sub></sup></td>
+<td align="left"><sup><sub>Mask R-CNN</sub></sup></td>
+<td align="left"><sup><sub><b>3x</b></sub></sup></td>
+<td align="right"><sup><sub>2</sub></sup></td>
+<td align="right"><sup><sub>10.5</sub></sup></td>
+<td align="right"><sup><sub>1.033</sub></sup></td>
+<td align="right"><sup><sub>77.4</sub></sup></td>
+<td align="right"><sup><sub>0.145&nbsp;+&nbsp;0.015</sub></sup></td>
+<td align="right"><sup><sub>40.8</sub></sup></td>
+<td align="right"><sup><sub>36.1</sub></sup></td>
+<td align="right"><sup><sub>48734751</sub></sup></td>
+<td align="left"><sup><sub>
+  <a href="https://dl.fbaipublicfiles.com/detectron/GN/48734751/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_3x_gn_0417.09_54_59.nwCTtPVk/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>
+  &nbsp;|&nbsp;
+  <a href="https://dl.fbaipublicfiles.com/detectron/GN/48734751/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_3x_gn_0417.09_54_59.nwCTtPVk/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a>
+  &nbsp;|&nbsp;
+  <a href="https://dl.fbaipublicfiles.com/detectron/GN/48734751/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_3x_gn_0417.09_54_59.nwCTtPVk/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json">masks</a></sub></sup></td>
+</tr>
+<tr>
+<td align="left"><sup><sub>R-101-FPN, GN</sub></sup></td>
+<td align="left"><sup><sub>Mask R-CNN</sub></sup></td>
+<td align="left"><sup><sub><b>3x</b></sub></sup></td>
+<td align="right"><sup><sub>2</sub></sup></td>
+<td align="right"><sup><sub>12.4</sub></sup></td>
+<td align="right"><sup><sub>1.171</sub></sup></td>
+<td align="right"><sup><sub>87.9</sub></sup></td>
+<td align="right"><sup><sub>0.180&nbsp;+&nbsp;0.014</sub></sup></td>
+<td align="right"><sup><sub>42.3</sub></sup></td>
+<td align="right"><sup><sub>37.2</sub></sup></td>
+<td align="right"><sup><sub>48734779</sub></sup></td>
+<td align="left"><sup><sub>
+  <a href="https://dl.fbaipublicfiles.com/detectron/GN/48734779/04_2018_gn_baselines/e2e_mask_rcnn_R-101-FPN_3x_gn_0417.09_55_23.HMtcR8wg/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl">model</a>
+  &nbsp;|&nbsp;
+  <a href="https://dl.fbaipublicfiles.com/detectron/GN/48734779/04_2018_gn_baselines/e2e_mask_rcnn_R-101-FPN_3x_gn_0417.09_55_23.HMtcR8wg/output/test/coco_2014_minival/generalized_rcnn/bbox_coco_2014_minival_results.json">boxes</a>
+  &nbsp;|&nbsp;
+  <a href="https://dl.fbaipublicfiles.com/detectron/GN/48734779/04_2018_gn_baselines/e2e_mask_rcnn_R-101-FPN_3x_gn_0417.09_55_23.HMtcR8wg/output/test/coco_2014_minival/generalized_rcnn/segmentations_coco_2014_minival_results.json">masks</a></sub></sup></td>
+</tr>
+<!-- END E2E MASK RCNN GN 3X TABLE -->
+</tbody></table>
+
+**Notes:**
+- 3x is a longer schedule (270k). GN can improve further when using the longer schedule, but its BN<sup>*</sup> counterpart remains similar (R-50-FPN BN<sup>\*</sup>: 38.9/34.3) with the longer schedule.
+- These models are **without** any scale augmentation that can further [improve results](https://github.com/facebookresearch/Detectron/blob/master/MODEL_ZOO.md#mask-r-cnn-with-bells--whistles).
+
+
+### Explorations
+
+#### Training Mask R-CNN from scratch
+
+GN enables to train Mask R-CNN *from scratch* without ImageNet pre-training, despite the small batch size.
+
+<table><tbody>
+<!-- START E2E MASK RCNN GN SCRATCH TABLE -->
+<!-- TABLE HEADER -->
+<!-- Info: we use wrap text in <sup><sub></sub><sup> to make is small -->
+<th valign="bottom"><sup><sub>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;case&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</sub></sup></th>
+<th valign="bottom"><sup><sub>type</sub></sup></th>
+<th valign="bottom"><sup><sub>lr<br/>schd</sub></sup></th>
+<th valign="bottom"><sup><sub>im/<br/>gpu</sub></sup></th>
+<th valign="bottom"><sup><sub>train<br/>mem<br/>(GB)</sub></sup></th>
+<th valign="bottom"><sup><sub>train<br/>time<br/>(s/iter)</sub></sup></th>
+<th valign="bottom"><sup><sub>train<br/>time<br/>total<br/>(hr)</sub></sup></th>
+<th valign="bottom"><sup><sub>inference<br/>time<br/>(s/im)</sub></sup></th>
+<th valign="bottom"><sup><sub>box<br/>AP</sub></sup></th>
+<th valign="bottom"><sup><sub>mask<br/>AP</sub></sup></th>
+<th valign="bottom"><sup><sub>model id</sub></sup></th>
+<!-- TABLE BODY -->
+<tr>
+<td align="left"><sup><sub>R-50-FPN, GN, scratch</sub></sup></td>
+<td align="left"><sup><sub>Mask R-CNN</sub></sup></td>
+<td align="left"><sup><sub>3x</sub></sup></td>
+<td align="right"><sup><sub>2</sub></sup></td>
+<td align="right"><sup><sub>10.8</sub></sup></td>
+<td align="right"><sup><sub>1.087</sub></sup></td>
+<td align="right"><sup><sub>81.5</sub></sup></td>
+<td align="right"><sup><sub>0.140&nbsp;+&nbsp;0.019</sub></sup></td>
+<td align="right"><sup><sub>39.5</sub></sup></td>
+<td align="right"><sup><sub>35.2</sub></sup></td>
+<td align="right"><sup><sub>56421872</sub></sup></td>
+</tr>
+<tr>
+<td align="left"><sup><sub>R-101-FPN, GN, scratch</sub></sup></td>
+<td align="left"><sup><sub>Mask R-CNN</sub></sup></td>
+<td align="left"><sup><sub>3x</sub></sup></td>
+<td align="right"><sup><sub>2</sub></sup></td>
+<td align="right"><sup><sub>12.7</sub></sup></td>
+<td align="right"><sup><sub>1.243</sub></sup></td>
+<td align="right"><sup><sub>93.2</sub></sup></td>
+<td align="right"><sup><sub>0.177&nbsp;+&nbsp;0.019</sub></sup></td>
+<td align="right"><sup><sub>41.0</sub></sup></td>
+<td align="right"><sup><sub>36.4</sub></sup></td>
+<td align="right"><sup><sub>56421911</sub></sup></td>
+</tr>
+<!-- END E2E MASK RCNN GN SCRATCH TABLE -->
+</tbody></table>
+
+**Notes:**
+- To reproduce these results, see the config yaml files starting with ```scratch ```.
+- These are results using ```freeze_at=0```. See this [commit](https://github.com/facebookresearch/Detectron/commit/f8ffc87ca442d8f6bd2b9aad11029b5db56d7260) about the related issue.
+
+&nbsp;
+
+<table><tbody>
+<!-- START E2E MASK RCNN GN SCRATCH TABLE -->
+<!-- TABLE HEADER -->
+<!-- Info: we use wrap text in <sup><sub></sub><sup> to make is small -->
+<!-- TABLE BODY -->
+<tr>
+<td align="left"><sup><sub><s>R-50-FPN, GN, scratch</s></sub></sup></td>
+<td align="left"><sup><sub><s>Mask R-CNN</s></sub></sup></td>
+<td align="left"><sup><sub><s>3x</s></sub></sup></td>
+<td align="right"><sup><sub><s>2</s></sub></sup></td>
+<td align="right"><sup><sub><s>10.5</s></sub></sup></td>
+<td align="right"><sup><sub><s>0.990</s></sub></sup></td>
+<td align="right"><sup><sub><s>74.3</s></sub></sup></td>
+<td align="right"><sup><sub><s>0.146&nbsp;+&nbsp;0.020</s></sub></sup></td>
+<td align="right"><sup><sub><s>36.2</s></sub></sup></td>
+<td align="right"><sup><sub><s>32.5</s></sub></sup></td>
+<td align="right"><sup><sub><s>49025460</s></sub></sup></td>
+</tr>
+<tr>
+<td align="left"><sup><sub><s>R-101-FPN, GN, scratch</s></sub></sup></td>
+<td align="left"><sup><sub><s>Mask R-CNN</s></sub></sup></td>
+<td align="left"><sup><sub><s>3x</s></sub></sup></td>
+<td align="right"><sup><sub><s>2</s></sub></sup></td>
+<td align="right"><sup><sub><s>12.4</s></sub></sup></td>
+<td align="right"><sup><sub><s>1.124</s></sub></sup></td>
+<td align="right"><sup><sub><s>84.3</s></sub></sup></td>
+<td align="right"><sup><sub><s>0.180&nbsp;+&nbsp;0.019</s></sub></sup></td>
+<td align="right"><sup><sub><s>37.5</s></sub></sup></td>
+<td align="right"><sup><sub><s>33.3</s></sub></sup></td>
+<td align="right"><sup><sub><s>49024951</s></sub></sup></td>
+</tr>
+<!-- END E2E MASK RCNN GN SCRATCH TABLE -->
+</tbody></table>
+
+**Notes:**
+- These are early results that followed the default training using ```freeze_at=2```. This means the layers of conv1 and res2 were simply random weights in the case of training from-scratch. See this [commit](https://github.com/facebookresearch/Detectron/commit/f8ffc87ca442d8f6bd2b9aad11029b5db56d7260) about the related issue.
diff --git a/projects/GN/gn.jpg b/projects/GN/gn.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..5f04015ab94ef3c8576f3f79e09544479458b961
Binary files /dev/null and b/projects/GN/gn.jpg differ
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..23a0ae0bf62dbf1f3be2b62964d4865ae4334dbc
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,11 @@
+numpy>=1.13
+pyyaml==3.12
+matplotlib
+opencv-python>=3.2
+setuptools
+Cython
+mock
+scipy
+six
+future
+protobuf
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..234e736c140575d81410e6d888cb2000cd840974
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from Cython.Build import cythonize
+from setuptools import Extension
+from setuptools import setup
+
+import numpy as np
+
+_NP_INCLUDE_DIRS = np.get_include()
+
+
+# Extension modules
+ext_modules = [
+    Extension(
+        name='detectron.utils.cython_bbox',
+        sources=[
+            'detectron/utils/cython_bbox.pyx'
+        ],
+        extra_compile_args=[
+            '-Wno-cpp'
+        ],
+        include_dirs=[
+            _NP_INCLUDE_DIRS
+        ]
+    ),
+    Extension(
+        name='detectron.utils.cython_nms',
+        sources=[
+            'detectron/utils/cython_nms.pyx'
+        ],
+        extra_compile_args=[
+            '-Wno-cpp'
+        ],
+        include_dirs=[
+            _NP_INCLUDE_DIRS
+        ]
+    )
+]
+
+setup(
+    name='Detectron',
+    packages=['detectron'],
+    ext_modules=cythonize(ext_modules)
+)
diff --git "a/\346\226\260\345\273\272\346\226\207\346\234\254\346\226\207\346\241\243.txt" "b/\346\226\260\345\273\272\346\226\207\346\234\254\346\226\207\346\241\243.txt"
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391

^_case	^_type	^_lr schd	^_im/ gpu	^{_{train mem (GB)}}	^{_{train time (s/iter)}}	^{_{train time total (hr)}}	^{_{inference time (s/im)}}	^_box AP	^_mask AP	^{_{model id}}
^{_{R-50-FPN, BN*}}	^{_{Mask R-CNN}}	^_2x	^₂	^_8.6	^_0.897	^_44.9	^{_{0.099 + 0.018}}	^_38.6	^_34.5	^_35859007
^{_{R-101-FPN, BN*}}	^{_{Mask R-CNN}}	^_2x	^₂	^_10.2	^_0.993	^_49.7	^{_{0.126 + 0.017}}	^_40.9	^_36.4	^_35861858
^_case	^_type	^_lr schd	^_im/ gpu	^{_{train mem (GB)}}	^{_{train time (s/iter)}}	^{_{train time total (hr)}}	^{_{inference time (s/im)}}	^_box AP	^_mask AP	^{_{model id}}	^{_{download links}}
^{_{R-50-FPN, GN}}	^{_{Mask R-CNN}}	^_2x	^₂	^_10.5	^_1.017	^_50.8	^{_{0.146 + 0.017}}	^_40.3	^_35.7	^_48616381	^{_{+ model + \| + boxes + \| + masks}}
^{_{R-101-FPN, GN}}	^{_{Mask R-CNN}}	^_2x	^₂	^_12.4	^_1.151	^_57.5	^{_{0.180 + 0.015}}	^_41.8	^_36.8	^_48616724	^{_{+ model + \| + boxes + \| + masks}}
^_case	^_type	^_lr schd	^_im/ gpu	^{_{train mem (GB)}}	^{_{train time (s/iter)}}	^{_{train time total (hr)}}	^{_{inference time (s/im)}}	^_box AP	^_mask AP	^{_{model id}}	^{_{download links}}
^{_{R-50-FPN, GN}}	^{_{Mask R-CNN}}	^_3x	^₂	^_10.5	^_1.033	^_77.4	^{_{0.145 + 0.015}}	^_40.8	^_36.1	^_48734751	^{_{+ model + \| + boxes + \| + masks}}
^{_{R-101-FPN, GN}}	^{_{Mask R-CNN}}	^_3x	^₂	^_12.4	^_1.171	^_87.9	^{_{0.180 + 0.014}}	^_42.3	^_37.2	^_48734779	^{_{+ model + \| + boxes + \| + masks}}
^_case	^_type	^_lr schd	^_im/ gpu	^{_{train mem (GB)}}	^{_{train time (s/iter)}}	^{_{train time total (hr)}}	^{_{inference time (s/im)}}	^_box AP	^_mask AP	^{_{model id}}
^{_{R-50-FPN, GN, scratch}}	^{_{Mask R-CNN}}	^_3x	^₂	^_10.8	^_1.087	^_81.5	^{_{0.140 + 0.019}}	^_39.5	^_35.2	^_56421872
^{_{R-101-FPN, GN, scratch}}	^{_{Mask R-CNN}}	^_3x	^₂	^_12.7	^_1.243	^_93.2	^{_{0.177 + 0.019}}	^_41.0	^_36.4	^_56421911