diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..e43b0f988953ae3a84b00331d0ccf5f7d51cb3cf
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+.DS_Store
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..6a6f414015af8bf5f0131c814a3251aad57061dc
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,377 @@
+# Copyright 2009 The RE2 Authors.  All Rights Reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# To build against ICU for full Unicode properties support,
+# uncomment the next two lines:
+# CCICU=$(shell pkg-config icu-uc --cflags) -DRE2_USE_ICU
+# LDICU=$(shell pkg-config icu-uc --libs)
+
+# To build against PCRE for testing or benchmarking,
+# uncomment the next two lines:
+# CCPCRE=-I/usr/local/include -DUSEPCRE
+# LDPCRE=-L/usr/local/lib -lpcre   
+
+CXX?=g++
+# can override
+CXXFLAGS?=-O3 -g
+LDFLAGS?=-lrure
+# required
+RE2_CXXFLAGS?=-std=c++11 -pthread -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -I. $(CCICU) $(CCPCRE)
+RE2_LDFLAGS?=-pthread $(LDICU) $(LDPCRE)
+AR?=ar
+ARFLAGS?=rsc
+NM?=nm
+NMFLAGS?=-p
+
+# Variables mandated by GNU, the arbiter of all good taste on the internet.
+# http://www.gnu.org/prep/standards/standards.html
+prefix=/usr/local
+exec_prefix=$(prefix)
+includedir=$(prefix)/include
+libdir=$(exec_prefix)/lib
+INSTALL=install
+INSTALL_DATA=$(INSTALL) -m 644
+
+# Work around the weirdness of sed(1) on Darwin. :/
+ifeq ($(shell uname),Darwin)
+SED_INPLACE=sed -i ''
+else ifeq ($(shell uname),SunOS)
+SED_INPLACE=sed -i
+else
+SED_INPLACE=sed -i
+endif
+
+# ABI version
+# http://tldp.org/HOWTO/Program-Library-HOWTO/shared-libraries.html
+SONAME=9
+
+# To rebuild the Tables generated by Perl and Python scripts (requires Internet
+# access for Unicode data), uncomment the following line:
+# REBUILD_TABLES=1
+
+# The SunOS linker does not support wildcards. :(
+ifeq ($(shell uname),Darwin)
+SOEXT=dylib
+SOEXTVER=$(SONAME).$(SOEXT)
+SOEXTVER00=$(SONAME).0.0.$(SOEXT)
+MAKE_SHARED_LIBRARY=$(CXX) -dynamiclib -Wl,-compatibility_version,$(SONAME),-current_version,$(SONAME).0.0,-install_name,$(libdir)/libre2.$(SOEXTVER),-exported_symbols_list,libre2.symbols.darwin $(RE2_LDFLAGS) $(LDFLAGS)
+else ifeq ($(shell uname),SunOS)
+SOEXT=so
+SOEXTVER=$(SOEXT).$(SONAME)
+SOEXTVER00=$(SOEXT).$(SONAME).0.0
+MAKE_SHARED_LIBRARY=$(CXX) -shared -Wl,-soname,libre2.$(SOEXTVER) $(RE2_LDFLAGS) $(LDFLAGS)
+else
+SOEXT=so
+SOEXTVER=$(SOEXT).$(SONAME)
+SOEXTVER00=$(SOEXT).$(SONAME).0.0
+MAKE_SHARED_LIBRARY=$(CXX) -shared -Wl,-soname,libre2.$(SOEXTVER),--version-script,libre2.symbols $(RE2_LDFLAGS) $(LDFLAGS)
+endif
+
+.PHONY: all
+all: obj/libre2.a obj/so/libre2.$(SOEXT)
+
+INSTALL_HFILES=\
+	re2/filtered_re2.h\
+	re2/re2.h\
+	re2/set.h\
+	re2/stringpiece.h\
+
+HFILES=\
+	util/benchmark.h\
+	util/flags.h\
+	util/logging.h\
+	util/malloc_counter.h\
+	util/mix.h\
+	util/mutex.h\
+	util/pcre.h\
+	util/strutil.h\
+	util/test.h\
+	util/utf.h\
+	util/util.h\
+	re2/filtered_re2.h\
+	re2/re2.h\
+	re2/set.h\
+	re2/stringpiece.h\
+	# re2/testing/exhaustive_tester.h\
+	# re2/testing/regexp_generator.h\
+	# re2/testing/string_generator.h\
+	# re2/testing/tester.h\
+
+# 仅保留接口stub
+OFILES=obj/re2/re2.o\
+	obj/re2/stringpiece.o\
+	obj/re2/set.o\
+	obj/re2/filtered_re2.o\
+
+	# obj/util/rune.o\
+	# obj/util/strutil.o\
+	# obj/re2/bitstate.o\
+	# obj/re2/compile.o\
+	# obj/re2/dfa.o\
+	# obj/re2/filtered_re2.o\
+	# obj/re2/mimics_pcre.o\
+	# obj/re2/nfa.o\
+	# obj/re2/onepass.o\
+	# obj/re2/parse.o\
+	# obj/re2/perl_groups.o\
+	# obj/re2/prefilter.o\
+	# obj/re2/prefilter_tree.o\
+	# obj/re2/prog.o\
+	# obj/re2/re2.o\
+	# obj/re2/regexp.o\
+	# obj/re2/set.o\
+	# obj/re2/simplify.o\
+	# obj/re2/stringpiece.o\
+	# obj/re2/tostring.o\
+	# obj/re2/unicode_casefold.o\
+	# obj/re2/unicode_groups.o\
+
+TESTOFILES=\
+	obj/util/pcre.o\
+	obj/util/strutil.o\
+
+	#obj/re2/testing/string_generator.o\
+	# obj/re2/testing/backtrack.o\
+	# obj/re2/testing/dump.o\
+	# obj/re2/testing/exhaustive_tester.o\
+	# obj/re2/testing/null_walker.o\
+	# obj/re2/testing/regexp_generator.o\
+	# obj/re2/testing/tester.o\
+
+TESTS=\
+	obj/test/re2_test\
+	obj/test/re2_arg_test\
+
+	#obj/test/set_test\
+	#obj/test/filtered_re2_test\
+
+	# obj/test/charclass_test\
+	# obj/test/compile_test\
+	# obj/test/mimics_pcre_test\
+	# obj/test/parse_test\
+	# obj/test/possible_match_test\
+	# obj/test/regexp_test\
+	# obj/test/required_prefix_test\
+	# obj/test/search_test\
+	# obj/test/simplify_test\
+	# obj/test/string_generator_test\
+
+BIGTESTS=\
+	obj/test/dfa_test\
+	obj/test/exhaustive1_test\
+	obj/test/exhaustive2_test\
+	obj/test/exhaustive3_test\
+	obj/test/exhaustive_test\
+	obj/test/random_test\
+
+SOFILES=$(patsubst obj/%,obj/so/%,$(OFILES))
+# We use TESTOFILES for testing the shared lib, only it is built differently.
+STESTS=$(patsubst obj/%,obj/so/%,$(TESTS))
+SBIGTESTS=$(patsubst obj/%,obj/so/%,$(BIGTESTS))
+
+DOFILES=$(patsubst obj/%,obj/dbg/%,$(OFILES))
+DTESTOFILES=$(patsubst obj/%,obj/dbg/%,$(TESTOFILES))
+DTESTS=$(patsubst obj/%,obj/dbg/%,$(TESTS))
+DBIGTESTS=$(patsubst obj/%,obj/dbg/%,$(BIGTESTS))
+
+.PRECIOUS: obj/%.o
+obj/%.o: %.cc $(HFILES)
+	@mkdir -p $$(dirname $@)
+	$(CXX) -c -o $@ $(CPPFLAGS) $(RE2_CXXFLAGS) $(CXXFLAGS) -DNDEBUG $*.cc
+
+.PRECIOUS: obj/dbg/%.o
+obj/dbg/%.o: %.cc $(HFILES)
+	@mkdir -p $$(dirname $@)
+	$(CXX) -c -o $@ $(CPPFLAGS) $(RE2_CXXFLAGS) $(CXXFLAGS) $*.cc
+
+.PRECIOUS: obj/so/%.o
+obj/so/%.o: %.cc $(HFILES)
+	@mkdir -p $$(dirname $@)
+	$(CXX) -c -o $@ -fPIC $(CPPFLAGS) $(RE2_CXXFLAGS) $(CXXFLAGS) -DNDEBUG $*.cc
+
+.PRECIOUS: obj/libre2.a
+obj/libre2.a: $(OFILES)
+	@mkdir -p obj
+	$(AR) $(ARFLAGS) obj/libre2.a $(OFILES)
+
+.PRECIOUS: obj/dbg/libre2.a
+obj/dbg/libre2.a: $(DOFILES)
+	@mkdir -p obj/dbg
+	$(AR) $(ARFLAGS) obj/dbg/libre2.a $(DOFILES)
+
+.PRECIOUS: obj/so/libre2.$(SOEXT)
+obj/so/libre2.$(SOEXT): $(SOFILES) libre2.symbols libre2.symbols.darwin
+	@mkdir -p obj/so
+	$(MAKE_SHARED_LIBRARY) -o obj/so/libre2.$(SOEXTVER) $(SOFILES)
+	ln -sf libre2.$(SOEXTVER) $@
+
+.PRECIOUS: obj/dbg/test/%
+obj/dbg/test/%: obj/dbg/libre2.a obj/dbg/re2/testing/%.o $(DTESTOFILES) obj/dbg/util/test.o
+	@mkdir -p obj/dbg/test
+	$(CXX) -o $@ obj/dbg/re2/testing/$*.o $(DTESTOFILES) obj/dbg/util/test.o obj/dbg/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
+
+.PRECIOUS: obj/test/%
+obj/test/%: obj/libre2.a obj/re2/testing/%.o $(TESTOFILES) obj/util/test.o
+	@mkdir -p obj/test
+	$(CXX) -o $@ obj/re2/testing/$*.o $(TESTOFILES) obj/util/test.o obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
+
+# Test the shared lib, falling back to the static lib for private symbols
+.PRECIOUS: obj/so/test/%
+obj/so/test/%: obj/so/libre2.$(SOEXT) obj/libre2.a obj/re2/testing/%.o $(TESTOFILES) obj/util/test.o
+	@mkdir -p obj/so/test
+	$(CXX) -o $@ obj/re2/testing/$*.o $(TESTOFILES) obj/util/test.o -Lobj/so -lre2 obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
+
+# Filter out dump.o because testing::TempDir() isn't available for it.
+obj/test/regexp_benchmark: obj/libre2.a obj/re2/testing/regexp_benchmark.o $(TESTOFILES) obj/util/benchmark.o
+	@mkdir -p obj/test
+	$(CXX) -o $@ obj/re2/testing/regexp_benchmark.o $(filter-out obj/re2/testing/dump.o, $(TESTOFILES)) obj/util/benchmark.o obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
+
+# re2_fuzzer is a target for fuzzers like libFuzzer and AFL. This fake fuzzing
+# is simply a way to check that the target builds and then to run it against a
+# fixed set of inputs. To perform real fuzzing, refer to the documentation for
+# libFuzzer (llvm.org/docs/LibFuzzer.html) and AFL (lcamtuf.coredump.cx/afl/).
+obj/test/re2_fuzzer: CXXFLAGS:=-I./re2/fuzzing/compiler-rt/include $(CXXFLAGS)
+obj/test/re2_fuzzer: obj/libre2.a obj/re2/fuzzing/re2_fuzzer.o obj/util/fuzz.o
+	@mkdir -p obj/test
+	$(CXX) -o $@ obj/re2/fuzzing/re2_fuzzer.o obj/util/fuzz.o obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
+
+ifdef REBUILD_TABLES
+.PRECIOUS: re2/perl_groups.cc
+re2/perl_groups.cc: re2/make_perl_groups.pl
+	perl $< > $@
+
+.PRECIOUS: re2/unicode_%.cc
+re2/unicode_%.cc: re2/make_unicode_%.py re2/unicode.py
+	python3 $< > $@
+endif
+
+.PHONY: distclean
+distclean: clean
+	rm -f re2/perl_groups.cc re2/unicode_casefold.cc re2/unicode_groups.cc
+
+.PHONY: clean
+clean:
+	rm -rf obj
+	rm -f re2/*.pyc
+
+.PHONY: testofiles
+testofiles: $(TESTOFILES)
+
+.PHONY: test
+test: $(DTESTS) $(TESTS) $(STESTS) debug-test static-test shared-test
+
+.PHONY: debug-test
+debug-test: $(DTESTS)
+	@./runtests $(DTESTS)
+
+.PHONY: static-test
+static-test: $(TESTS)
+	@./runtests $(TESTS)
+
+.PHONY: shared-test
+shared-test: $(STESTS)
+	@./runtests -shared-library-path obj/so $(STESTS)
+
+.PHONY: debug-bigtest
+debug-bigtest: $(DTESTS) $(DBIGTESTS)
+	@./runtests $(DTESTS) $(DBIGTESTS)
+
+.PHONY: static-bigtest
+static-bigtest: $(TESTS) $(BIGTESTS)
+	@./runtests $(TESTS) $(BIGTESTS)
+
+.PHONY: shared-bigtest
+shared-bigtest: $(STESTS) $(SBIGTESTS)
+	@./runtests -shared-library-path obj/so $(STESTS) $(SBIGTESTS)
+
+.PHONY: benchmark
+benchmark: obj/test/regexp_benchmark
+
+.PHONY: fuzz
+fuzz: obj/test/re2_fuzzer
+
+.PHONY: install
+install: static-install shared-install
+
+.PHONY: static
+static: obj/libre2.a
+
+.PHONY: static-install
+static-install: obj/libre2.a common-install
+	$(INSTALL) obj/libre2.a $(DESTDIR)$(libdir)/libre2.a
+
+.PHONY: shared
+shared: obj/so/libre2.$(SOEXT)
+
+.PHONY: shared-install
+shared-install: obj/so/libre2.$(SOEXT) common-install
+	$(INSTALL) obj/so/libre2.$(SOEXT) $(DESTDIR)$(libdir)/libre2.$(SOEXTVER00)
+	ln -sf libre2.$(SOEXTVER00) $(DESTDIR)$(libdir)/libre2.$(SOEXTVER)
+	ln -sf libre2.$(SOEXTVER00) $(DESTDIR)$(libdir)/libre2.$(SOEXT)
+
+.PHONY: common-install
+common-install:
+	mkdir -p $(DESTDIR)$(includedir)/re2 $(DESTDIR)$(libdir)/pkgconfig
+	$(INSTALL_DATA) $(INSTALL_HFILES) $(DESTDIR)$(includedir)/re2
+	$(INSTALL_DATA) re2.pc $(DESTDIR)$(libdir)/pkgconfig/re2.pc
+	$(SED_INPLACE) -e "s#@includedir@#$(includedir)#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc
+	$(SED_INPLACE) -e "s#@libdir@#$(libdir)#" $(DESTDIR)$(libdir)/pkgconfig/re2.pc
+
+.PHONY: testinstall
+testinstall: static-testinstall shared-testinstall
+	@echo
+	@echo Install tests passed.
+	@echo
+
+.PHONY: static-testinstall
+static-testinstall: CXXFLAGS:=-std=c++11 -pthread -I$(DESTDIR)$(includedir) $(CXXFLAGS)
+static-testinstall: LDFLAGS:=-pthread -L$(DESTDIR)$(libdir) -l:libre2.a $(LDICU) $(LDFLAGS)
+static-testinstall:
+	@mkdir -p obj
+	@cp testinstall.cc obj
+ifeq ($(shell uname),Darwin)
+	@echo Skipping test for libre2.a on Darwin.
+else ifeq ($(shell uname),SunOS)
+	@echo Skipping test for libre2.a on SunOS.
+else
+	(cd obj && $(CXX) testinstall.cc -o testinstall $(CXXFLAGS) $(LDFLAGS))
+	obj/testinstall
+endif
+
+.PHONY: shared-testinstall
+shared-testinstall: CXXFLAGS:=-std=c++11 -pthread -I$(DESTDIR)$(includedir) $(CXXFLAGS)
+shared-testinstall: LDFLAGS:=-pthread -L$(DESTDIR)$(libdir) -lre2 $(LDICU) $(LDFLAGS)
+shared-testinstall:
+	@mkdir -p obj
+	@cp testinstall.cc obj
+	(cd obj && $(CXX) testinstall.cc -o testinstall $(CXXFLAGS) $(LDFLAGS))
+ifeq ($(shell uname),Darwin)
+	DYLD_LIBRARY_PATH="$(DESTDIR)$(libdir):$(DYLD_LIBRARY_PATH)" obj/testinstall
+else
+	LD_LIBRARY_PATH="$(DESTDIR)$(libdir):$(LD_LIBRARY_PATH)" obj/testinstall
+endif
+
+.PHONY: benchlog
+benchlog: obj/test/regexp_benchmark
+	(echo '==BENCHMARK==' `hostname` `date`; \
+	  (uname -a; $(CXX) --version; git rev-parse --short HEAD; file obj/test/regexp_benchmark) | sed 's/^/# /'; \
+	  echo; \
+	  ./obj/test/regexp_benchmark 'PCRE|RE2') | tee -a benchlog.$$(hostname | sed 's/\..*//')
+
+.PHONY: log
+log:
+	$(MAKE) clean
+	$(MAKE) CXXFLAGS="$(CXXFLAGS) -DLOGGING=1" \
+		$(filter obj/test/exhaustive%_test,$(BIGTESTS))
+	echo '#' RE2 exhaustive tests built by make log >re2-exhaustive.txt
+	echo '#' $$(date) >>re2-exhaustive.txt
+	obj/test/exhaustive_test |grep -v '^PASS$$' >>re2-exhaustive.txt
+	obj/test/exhaustive1_test |grep -v '^PASS$$' >>re2-exhaustive.txt
+	obj/test/exhaustive2_test |grep -v '^PASS$$' >>re2-exhaustive.txt
+	obj/test/exhaustive3_test |grep -v '^PASS$$' >>re2-exhaustive.txt
+
+	$(MAKE) CXXFLAGS="$(CXXFLAGS) -DLOGGING=1" obj/test/search_test
+	echo '#' RE2 basic search tests built by make $@ >re2-search.txt
+	echo '#' $$(date) >>re2-search.txt
+	obj/test/search_test |grep -v '^PASS$$' >>re2-search.txt
diff --git a/README.en.md b/README.en.md
deleted file mode 100644
index 73d7961c0f9208a2c939b3dd6684c5c392bac0e3..0000000000000000000000000000000000000000
--- a/README.en.md
+++ /dev/null
@@ -1,36 +0,0 @@
-# re2-rust
-
-#### Description
-a compatible RE2 API  by calling Rust library regex(rure)
-
-#### Software Architecture
-Software architecture description
-
-#### Installation
-
-1.  xxxx
-2.  xxxx
-3.  xxxx
-
-#### Instructions
-
-1.  xxxx
-2.  xxxx
-3.  xxxx
-
-#### Contribution
-
-1.  Fork the repository
-2.  Create Feat_xxx branch
-3.  Commit your code
-4.  Create Pull Request
-
-
-#### Gitee Feature
-
-1.  You can use Readme\_XXX.md to support different languages, such as Readme\_en.md, Readme\_zh.md
-2.  Gitee blog [blog.gitee.com](https://blog.gitee.com)
-3.  Explore open source project [https://gitee.com/explore](https://gitee.com/explore)
-4.  The most valuable open source project [GVP](https://gitee.com/gvp)
-5.  The manual of Gitee [https://gitee.com/help](https://gitee.com/help)
-6.  The most popular members  [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/)
diff --git a/README.md b/README.md
index a459819322a568889d1a57acd7c04d73d2269189..b14f434d9bf05142a3276506d7402a6e962631c9 100644
--- a/README.md
+++ b/README.md
@@ -1,37 +1,55 @@
 # re2-rust
 
-#### 介绍
-a compatible RE2 API  by calling Rust library regex(rure)
+a compatible RE2 API(
+2021-11-01)  by calling Rust library [regex(rure)](https://github.com/rust-lang/regex)
 
-#### 软件架构
-软件架构说明
 
+``` Shell
 
-#### 安装教程
+$ git clone https://gitee.com/openeuler/re2-rust.git
+$ cd re2-rust
+```
 
-1.  xxxx
-2.  xxxx
-3.  xxxx
 
-#### 使用说明
+###  安装rure库
+安装过程如下：
+``` Shell
+$ git clone https://github.com/rust-lang/regex
+$ cd regex/regex-capi
+$ cargo build --verbose
+```
+对于编译完成的`librure.a`和`librure.so`文件需要进行手工安装
+``` Shell
+# put the librure.a and librure.so into the /usr/lib
 
-1.  xxxx
-2.  xxxx
-3.  xxxx
+$ sudo cp regex/target/debug/librure.a /usr/lib
+$ sudo cp regex/target/debug/librure.so /usr/lib
+```
+手工安装rure.h文件
+``` Shell
+# copy the rure.h
+$ sudo cp regex/regex-capi/include/rure.h /usr/include
+```
 
-#### 参与贡献
+使用rure库:
+使用regex/regex-capi/ctest/目录下的 test.c文件进行测试
+``` Shell
+$ gcc test.c -o test -lrure
+$ ./test
+```
 
-1.  Fork 本仓库
-2.  新建 Feat_xxx 分支
-3.  提交代码
-4.  新建 Pull Request
+### 编译、安装re2-rust
+
+``` Shell
+$ make
+$ sudo make install
+```
+
+# Links
+
+* https://github.com/rust-lang/regex
+* https://gitee.com/src-openeuler/re2
+* https://github.com/google/re2
 
 
-#### 特技
 
-1.  使用 Readme\_XXX.md 来支持不同的语言，例如 Readme\_en.md, Readme\_zh.md
-2.  Gitee 官方博客 [blog.gitee.com](https://blog.gitee.com)
-3.  你可以 [https://gitee.com/explore](https://gitee.com/explore) 这个地址来了解 Gitee 上的优秀开源项目
-4.  [GVP](https://gitee.com/gvp) 全称是 Gitee 最有价值开源项目，是综合评定出的优秀开源项目
-5.  Gitee 官方提供的使用手册 [https://gitee.com/help](https://gitee.com/help)
-6.  Gitee 封面人物是一档用来展示 Gitee 会员风采的栏目 [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/)
diff --git a/libre2.symbols b/libre2.symbols
new file mode 100644
index 0000000000000000000000000000000000000000..93b71b486233e22a440feeae7175b784cf06f151
--- /dev/null
+++ b/libre2.symbols
@@ -0,0 +1,19 @@
+{
+	global:
+		# re2::RE2*
+		_ZN3re23RE2*;
+		_ZNK3re23RE2*;
+		# re2::StringPiece*
+		_ZN3re211StringPiece*;
+		_ZNK3re211StringPiece*;
+		# re2::operator<<*
+		_ZN3re2ls*;
+		# re2::FilteredRE2*
+		_ZN3re211FilteredRE2*;
+		_ZNK3re211FilteredRE2*;
+		# re2::re2_internal*
+		_ZN3re212re2_internal*;
+		_ZNK3re212re2_internal*;
+	local:
+		*;
+};
diff --git a/libre2.symbols.darwin b/libre2.symbols.darwin
new file mode 100644
index 0000000000000000000000000000000000000000..41ac96f93b10cafc08f091dbc0eee6191566775a
--- /dev/null
+++ b/libre2.symbols.darwin
@@ -0,0 +1,15 @@
+# Linker doesn't like these unmangled:
+# re2::RE2*
+__ZN3re23RE2*
+__ZNK3re23RE2*
+# re2::StringPiece*
+__ZN3re211StringPiece*
+__ZNK3re211StringPiece*
+# re2::operator<<*
+__ZN3re2ls*
+# re2::FilteredRE2*
+__ZN3re211FilteredRE2*
+__ZNK3re211FilteredRE2*
+# re2::re2_internal*
+__ZN3re212re2_internal*
+__ZNK3re212re2_internal*
diff --git a/re2.pc b/re2.pc
new file mode 100644
index 0000000000000000000000000000000000000000..50fd637d4eebe977a607ba446000f6aaaf427365
--- /dev/null
+++ b/re2.pc
@@ -0,0 +1,8 @@
+includedir=@includedir@
+libdir=@libdir@
+
+Name: re2
+Description: RE2 is a fast, safe, thread-friendly regular expression engine.
+Version: 0.0.0
+Cflags: -std=c++11 -pthread -I${includedir}
+Libs: -pthread -L${libdir} -lre2
diff --git a/re2/filtered_re2.cc b/re2/filtered_re2.cc
new file mode 100644
index 0000000000000000000000000000000000000000..01b9e49ad450735fc06c1e2ebd2d05320863d275
--- /dev/null
+++ b/re2/filtered_re2.cc
@@ -0,0 +1,148 @@
+// Copyright 2009 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "re2/filtered_re2.h"
+
+#include <stddef.h>
+#include <string>
+#include <utility>
+
+#include "util/util.h"
+#include "util/logging.h"
+// #include "re2/prefilter.h"
+namespace re2 {
+class Prefilter {};
+// #include "re2/prefilter_tree.h"
+class PrefilterTree {
+   public:
+  PrefilterTree(){};
+  explicit PrefilterTree(int min_atom_len){};
+  ~PrefilterTree(){};
+};
+};
+
+namespace re2 {
+
+FilteredRE2::FilteredRE2()
+    : compiled_(false),
+      prefilter_tree_(new PrefilterTree()) {
+}
+
+FilteredRE2::FilteredRE2(int min_atom_len)
+    : compiled_(false),
+      prefilter_tree_(new PrefilterTree(min_atom_len)) {
+}
+
+FilteredRE2::~FilteredRE2() {
+  for (size_t i = 0; i < re2_vec_.size(); i++)
+    delete re2_vec_[i];
+}
+
+FilteredRE2::FilteredRE2(FilteredRE2&& other)
+    : re2_vec_(std::move(other.re2_vec_)),
+      compiled_(other.compiled_),
+      prefilter_tree_(std::move(other.prefilter_tree_)) {
+  other.re2_vec_.clear();
+  other.re2_vec_.shrink_to_fit();
+  other.compiled_ = false;
+  other.prefilter_tree_.reset(new PrefilterTree());
+}
+
+FilteredRE2& FilteredRE2::operator=(FilteredRE2&& other) {
+  this->~FilteredRE2();
+  (void) new (this) FilteredRE2(std::move(other));
+  return *this;
+}
+
+RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern,
+                                const RE2::Options& options, int* id) {
+  RE2* re = new RE2(pattern, options);
+  RE2::ErrorCode code = re->error_code();
+
+  if (!re->ok()) {
+    if (options.log_errors()) {
+      LOG(ERROR) << "Couldn't compile regular expression, skipping: "
+                 << pattern << " due to error " << re->error();
+    }
+    delete re;
+  } else {
+    *id = static_cast<int>(re2_vec_.size());
+    re2_vec_.push_back(re);
+  }
+
+  return code;
+}
+
+void FilteredRE2::Compile(std::vector<std::string>* atoms) {
+  // if (compiled_) {
+  //   LOG(ERROR) << "Compile called already.";
+  //   return;
+  // }
+
+  // if (re2_vec_.empty()) {
+  //   LOG(ERROR) << "Compile called before Add.";
+  //   return;
+  // }
+
+  // for (size_t i = 0; i < re2_vec_.size(); i++) {
+  //   Prefilter* prefilter = Prefilter::FromRE2(re2_vec_[i]);
+  //   prefilter_tree_->Add(prefilter);
+  // }
+  // atoms->clear();
+  // prefilter_tree_->Compile(atoms);
+  compiled_ = true;
+}
+
+int FilteredRE2::SlowFirstMatch(const StringPiece& text) const {
+  for (size_t i = 0; i < re2_vec_.size(); i++)
+    if (RE2::PartialMatch(text, *re2_vec_[i]))
+      return static_cast<int>(i);
+  return -1;
+}
+
+int FilteredRE2::FirstMatch(const StringPiece& text,
+                            const std::vector<int>& atoms) const {
+  // if (!compiled_) {
+  //   LOG(DFATAL) << "FirstMatch called before Compile.";
+  //   return -1;
+  // }
+  // std::vector<int> regexps;
+  // prefilter_tree_->RegexpsGivenStrings(atoms, &regexps);
+  // for (size_t i = 0; i < regexps.size(); i++)
+  //   if (RE2::PartialMatch(text, *re2_vec_[regexps[i]]))
+  //     return regexps[i];
+  // return -1;
+  return 0;
+}
+
+bool FilteredRE2::AllMatches(
+    const StringPiece& text,
+    const std::vector<int>& atoms,
+    std::vector<int>* matching_regexps) const {
+  // matching_regexps->clear();
+  // std::vector<int> regexps;
+  // prefilter_tree_->RegexpsGivenStrings(atoms, &regexps);
+  // for (size_t i = 0; i < regexps.size(); i++)
+  //   if (RE2::PartialMatch(text, *re2_vec_[regexps[i]]))
+  //     matching_regexps->push_back(regexps[i]);
+  // return !matching_regexps->empty();
+  return true;
+}
+
+void FilteredRE2::AllPotentials(
+    const std::vector<int>& atoms,
+    std::vector<int>* potential_regexps) const {
+  // prefilter_tree_->RegexpsGivenStrings(atoms, potential_regexps);
+}
+
+void FilteredRE2::RegexpsGivenStrings(const std::vector<int>& matched_atoms,
+                                      std::vector<int>* passed_regexps) {
+  // prefilter_tree_->RegexpsGivenStrings(matched_atoms, passed_regexps);
+}
+
+void FilteredRE2::PrintPrefilter(int regexpid) {
+  // prefilter_tree_->PrintPrefilter(regexpid);
+}
+
+}  // namespace re2
diff --git a/re2/filtered_re2.h b/re2/filtered_re2.h
new file mode 100644
index 0000000000000000000000000000000000000000..dd618c70e8bfee9cfc8e5118868f5f0a3cd298ee
--- /dev/null
+++ b/re2/filtered_re2.h
@@ -0,0 +1,114 @@
+// Copyright 2009 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef RE2_FILTERED_RE2_H_
+#define RE2_FILTERED_RE2_H_
+
+// The class FilteredRE2 is used as a wrapper to multiple RE2 regexps.
+// It provides a prefilter mechanism that helps in cutting down the
+// number of regexps that need to be actually searched.
+//
+// By design, it does not include a string matching engine. This is to
+// allow the user of the class to use their favorite string matching
+// engine. The overall flow is: Add all the regexps using Add, then
+// Compile the FilteredRE2. Compile returns strings that need to be
+// matched. Note that the returned strings are lowercased and distinct.
+// For applying regexps to a search text, the caller does the string
+// matching using the returned strings. When doing the string match,
+// note that the caller has to do that in a case-insensitive way or
+// on a lowercased version of the search text. Then call FirstMatch
+// or AllMatches with a vector of indices of strings that were found
+// in the text to get the actual regexp matches.
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "re2/re2.h"
+
+namespace re2 {
+
+class PrefilterTree;
+
+class FilteredRE2 {
+ public:
+  FilteredRE2();
+  explicit FilteredRE2(int min_atom_len);
+  ~FilteredRE2();
+
+  // Not copyable.
+  FilteredRE2(const FilteredRE2&) = delete;
+  FilteredRE2& operator=(const FilteredRE2&) = delete;
+  // Movable.
+  FilteredRE2(FilteredRE2&& other);
+  FilteredRE2& operator=(FilteredRE2&& other);
+
+  // Uses RE2 constructor to create a RE2 object (re). Returns
+  // re->error_code(). If error_code is other than NoError, then re is
+  // deleted and not added to re2_vec_.
+  RE2::ErrorCode Add(const StringPiece& pattern,
+                     const RE2::Options& options,
+                     int* id);
+
+  // Prepares the regexps added by Add for filtering.  Returns a set
+  // of strings that the caller should check for in candidate texts.
+  // The returned strings are lowercased and distinct. When doing
+  // string matching, it should be performed in a case-insensitive
+  // way or the search text should be lowercased first.  Call after
+  // all Add calls are done.
+  void Compile(std::vector<std::string>* strings_to_match);
+
+  // Returns the index of the first matching regexp.
+  // Returns -1 on no match. Can be called prior to Compile.
+  // Does not do any filtering: simply tries to Match the
+  // regexps in a loop.
+  int SlowFirstMatch(const StringPiece& text) const;
+
+  // Returns the index of the first matching regexp.
+  // Returns -1 on no match. Compile has to be called before
+  // calling this.
+  int FirstMatch(const StringPiece& text,
+                 const std::vector<int>& atoms) const;
+
+  // Returns the indices of all matching regexps, after first clearing
+  // matched_regexps.
+  bool AllMatches(const StringPiece& text,
+                  const std::vector<int>& atoms,
+                  std::vector<int>* matching_regexps) const;
+
+  // Returns the indices of all potentially matching regexps after first
+  // clearing potential_regexps.
+  // A regexp is potentially matching if it passes the filter.
+  // If a regexp passes the filter it may still not match.
+  // A regexp that does not pass the filter is guaranteed to not match.
+  void AllPotentials(const std::vector<int>& atoms,
+                     std::vector<int>* potential_regexps) const;
+
+  // The number of regexps added.
+  int NumRegexps() const { return static_cast<int>(re2_vec_.size()); }
+
+  // Get the individual RE2 objects.
+  const RE2& GetRE2(int regexpid) const { return *re2_vec_[regexpid]; }
+
+ private:
+  // Print prefilter.
+  void PrintPrefilter(int regexpid);
+
+  // Useful for testing and debugging.
+  void RegexpsGivenStrings(const std::vector<int>& matched_atoms,
+                           std::vector<int>* passed_regexps);
+
+  // All the regexps in the FilteredRE2.
+  std::vector<RE2*> re2_vec_;
+
+  // Has the FilteredRE2 been compiled using Compile()
+  bool compiled_;
+
+  // An AND-OR tree of string atoms used for filtering regexps.
+  std::unique_ptr<PrefilterTree> prefilter_tree_;
+};
+
+}  // namespace re2
+
+#endif  // RE2_FILTERED_RE2_H_
diff --git a/re2/fuzzing/compiler-rt/LICENSE b/re2/fuzzing/compiler-rt/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..f9dc50615d7ec2b9913dc434fb243fc30889d2a9
--- /dev/null
+++ b/re2/fuzzing/compiler-rt/LICENSE
@@ -0,0 +1,219 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+    1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+    2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+    3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+    4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+    5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+    6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+    7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+    8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+    9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+    END OF TERMS AND CONDITIONS
+
+    APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+    Copyright [yyyy] [name of copyright owner]
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+
+--- LLVM Exceptions to the Apache 2.0 License ----
+
+As an exception, if, as a result of your compiling your source code, portions
+of this Software are embedded into an Object form of such source code, you
+may redistribute such embedded portions in such Object form without complying
+with the conditions of Sections 4(a), 4(b) and 4(d) of the License.
+
+In addition, if you combine or link compiled forms of this Software with
+software that is licensed under the GPLv2 ("Combined Software") and if a
+court of competent jurisdiction determines that the patent provision (Section
+3), the indemnity provision (Section 9) or other Section of the License
+conflicts with the conditions of the GPLv2, you may retroactively and
+prospectively choose to deem waived or otherwise exclude such Section(s) of
+the License, but only in their entirety and only with respect to the Combined
+Software.
+
diff --git a/re2/fuzzing/compiler-rt/include/fuzzer/FuzzedDataProvider.h b/re2/fuzzing/compiler-rt/include/fuzzer/FuzzedDataProvider.h
new file mode 100644
index 0000000000000000000000000000000000000000..3e069eba69b46229aa765d36db84197698a5b42a
--- /dev/null
+++ b/re2/fuzzing/compiler-rt/include/fuzzer/FuzzedDataProvider.h
@@ -0,0 +1,305 @@
+//===- FuzzedDataProvider.h - Utility header for fuzz targets ---*- C++ -* ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// A single header library providing an utility class to break up an array of
+// bytes. Whenever run on the same input, provides the same output, as long as
+// its methods are called in the same order, with the same arguments.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
+#define LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
+
+#include <algorithm>
+#include <climits>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <initializer_list>
+#include <string>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+// In addition to the comments below, the API is also briefly documented at
+// https://github.com/google/fuzzing/blob/master/docs/split-inputs.md#fuzzed-data-provider
+class FuzzedDataProvider {
+ public:
+  // |data| is an array of length |size| that the FuzzedDataProvider wraps to
+  // provide more granular access. |data| must outlive the FuzzedDataProvider.
+  FuzzedDataProvider(const uint8_t *data, size_t size)
+      : data_ptr_(data), remaining_bytes_(size) {}
+  ~FuzzedDataProvider() = default;
+
+  // Returns a std::vector containing |num_bytes| of input data. If fewer than
+  // |num_bytes| of data remain, returns a shorter std::vector containing all
+  // of the data that's left. Can be used with any byte sized type, such as
+  // char, unsigned char, uint8_t, etc.
+  template <typename T> std::vector<T> ConsumeBytes(size_t num_bytes) {
+    num_bytes = std::min(num_bytes, remaining_bytes_);
+    return ConsumeBytes<T>(num_bytes, num_bytes);
+  }
+
+  // Similar to |ConsumeBytes|, but also appends the terminator value at the end
+  // of the resulting vector. Useful, when a mutable null-terminated C-string is
+  // needed, for example. But that is a rare case. Better avoid it, if possible,
+  // and prefer using |ConsumeBytes| or |ConsumeBytesAsString| methods.
+  template <typename T>
+  std::vector<T> ConsumeBytesWithTerminator(size_t num_bytes,
+                                            T terminator = 0) {
+    num_bytes = std::min(num_bytes, remaining_bytes_);
+    std::vector<T> result = ConsumeBytes<T>(num_bytes + 1, num_bytes);
+    result.back() = terminator;
+    return result;
+  }
+
+  // Returns a std::string containing |num_bytes| of input data. Using this and
+  // |.c_str()| on the resulting string is the best way to get an immutable
+  // null-terminated C string. If fewer than |num_bytes| of data remain, returns
+  // a shorter std::string containing all of the data that's left.
+  std::string ConsumeBytesAsString(size_t num_bytes) {
+    static_assert(sizeof(std::string::value_type) == sizeof(uint8_t),
+                  "ConsumeBytesAsString cannot convert the data to a string.");
+
+    num_bytes = std::min(num_bytes, remaining_bytes_);
+    std::string result(
+        reinterpret_cast<const std::string::value_type *>(data_ptr_),
+        num_bytes);
+    Advance(num_bytes);
+    return result;
+  }
+
+  // Returns a number in the range [min, max] by consuming bytes from the
+  // input data. The value might not be uniformly distributed in the given
+  // range. If there's no input data left, always returns |min|. |min| must
+  // be less than or equal to |max|.
+  template <typename T> T ConsumeIntegralInRange(T min, T max) {
+    static_assert(std::is_integral<T>::value, "An integral type is required.");
+    static_assert(sizeof(T) <= sizeof(uint64_t), "Unsupported integral type.");
+
+    if (min > max)
+      abort();
+
+    // Use the biggest type possible to hold the range and the result.
+    uint64_t range = static_cast<uint64_t>(max) - min;
+    uint64_t result = 0;
+    size_t offset = 0;
+
+    while (offset < sizeof(T) * CHAR_BIT && (range >> offset) > 0 &&
+           remaining_bytes_ != 0) {
+      // Pull bytes off the end of the seed data. Experimentally, this seems to
+      // allow the fuzzer to more easily explore the input space. This makes
+      // sense, since it works by modifying inputs that caused new code to run,
+      // and this data is often used to encode length of data read by
+      // |ConsumeBytes|. Separating out read lengths makes it easier modify the
+      // contents of the data that is actually read.
+      --remaining_bytes_;
+      result = (result << CHAR_BIT) | data_ptr_[remaining_bytes_];
+      offset += CHAR_BIT;
+    }
+
+    // Avoid division by 0, in case |range + 1| results in overflow.
+    if (range != std::numeric_limits<decltype(range)>::max())
+      result = result % (range + 1);
+
+    return static_cast<T>(min + result);
+  }
+
+  // Returns a std::string of length from 0 to |max_length|. When it runs out of
+  // input data, returns what remains of the input. Designed to be more stable
+  // with respect to a fuzzer inserting characters than just picking a random
+  // length and then consuming that many bytes with |ConsumeBytes|.
+  std::string ConsumeRandomLengthString(size_t max_length) {
+    // Reads bytes from the start of |data_ptr_|. Maps "\\" to "\", and maps "\"
+    // followed by anything else to the end of the string. As a result of this
+    // logic, a fuzzer can insert characters into the string, and the string
+    // will be lengthened to include those new characters, resulting in a more
+    // stable fuzzer than picking the length of a string independently from
+    // picking its contents.
+    std::string result;
+
+    // Reserve the anticipated capaticity to prevent several reallocations.
+    result.reserve(std::min(max_length, remaining_bytes_));
+    for (size_t i = 0; i < max_length && remaining_bytes_ != 0; ++i) {
+      char next = ConvertUnsignedToSigned<char>(data_ptr_[0]);
+      Advance(1);
+      if (next == '\\' && remaining_bytes_ != 0) {
+        next = ConvertUnsignedToSigned<char>(data_ptr_[0]);
+        Advance(1);
+        if (next != '\\')
+          break;
+      }
+      result += next;
+    }
+
+    result.shrink_to_fit();
+    return result;
+  }
+
+  // Returns a std::vector containing all remaining bytes of the input data.
+  template <typename T> std::vector<T> ConsumeRemainingBytes() {
+    return ConsumeBytes<T>(remaining_bytes_);
+  }
+
+  // Returns a std::string containing all remaining bytes of the input data.
+  // Prefer using |ConsumeRemainingBytes| unless you actually need a std::string
+  // object.
+  std::string ConsumeRemainingBytesAsString() {
+    return ConsumeBytesAsString(remaining_bytes_);
+  }
+
+  // Returns a number in the range [Type's min, Type's max]. The value might
+  // not be uniformly distributed in the given range. If there's no input data
+  // left, always returns |min|.
+  template <typename T> T ConsumeIntegral() {
+    return ConsumeIntegralInRange(std::numeric_limits<T>::min(),
+                                  std::numeric_limits<T>::max());
+  }
+
+  // Reads one byte and returns a bool, or false when no data remains.
+  bool ConsumeBool() { return 1 & ConsumeIntegral<uint8_t>(); }
+
+  // Returns a copy of the value selected from the given fixed-size |array|.
+  template <typename T, size_t size>
+  T PickValueInArray(const T (&array)[size]) {
+    static_assert(size > 0, "The array must be non empty.");
+    return array[ConsumeIntegralInRange<size_t>(0, size - 1)];
+  }
+
+  template <typename T>
+  T PickValueInArray(std::initializer_list<const T> list) {
+    // TODO(Dor1s): switch to static_assert once C++14 is allowed.
+    if (!list.size())
+      abort();
+
+    return *(list.begin() + ConsumeIntegralInRange<size_t>(0, list.size() - 1));
+  }
+
+  // Returns an enum value. The enum must start at 0 and be contiguous. It must
+  // also contain |kMaxValue| aliased to its largest (inclusive) value. Such as:
+  // enum class Foo { SomeValue, OtherValue, kMaxValue = OtherValue };
+  template <typename T> T ConsumeEnum() {
+    static_assert(std::is_enum<T>::value, "|T| must be an enum type.");
+    return static_cast<T>(ConsumeIntegralInRange<uint32_t>(
+        0, static_cast<uint32_t>(T::kMaxValue)));
+  }
+
+  // Returns a floating point number in the range [0.0, 1.0]. If there's no
+  // input data left, always returns 0.
+  template <typename T> T ConsumeProbability() {
+    static_assert(std::is_floating_point<T>::value,
+                  "A floating point type is required.");
+
+    // Use different integral types for different floating point types in order
+    // to provide better density of the resulting values.
+    using IntegralType =
+        typename std::conditional<(sizeof(T) <= sizeof(uint32_t)), uint32_t,
+                                  uint64_t>::type;
+
+    T result = static_cast<T>(ConsumeIntegral<IntegralType>());
+    result /= static_cast<T>(std::numeric_limits<IntegralType>::max());
+    return result;
+  }
+
+  // Returns a floating point value in the range [Type's lowest, Type's max] by
+  // consuming bytes from the input data. If there's no input data left, always
+  // returns approximately 0.
+  template <typename T> T ConsumeFloatingPoint() {
+    return ConsumeFloatingPointInRange<T>(std::numeric_limits<T>::lowest(),
+                                          std::numeric_limits<T>::max());
+  }
+
+  // Returns a floating point value in the given range by consuming bytes from
+  // the input data. If there's no input data left, returns |min|. Note that
+  // |min| must be less than or equal to |max|.
+  template <typename T> T ConsumeFloatingPointInRange(T min, T max) {
+    if (min > max)
+      abort();
+
+    T range = .0;
+    T result = min;
+    constexpr T zero(.0);
+    if (max > zero && min < zero && max > min + std::numeric_limits<T>::max()) {
+      // The diff |max - min| would overflow the given floating point type. Use
+      // the half of the diff as the range and consume a bool to decide whether
+      // the result is in the first of the second part of the diff.
+      range = (max / 2.0) - (min / 2.0);
+      if (ConsumeBool()) {
+        result += range;
+      }
+    } else {
+      range = max - min;
+    }
+
+    return result + range * ConsumeProbability<T>();
+  }
+
+  // Reports the remaining bytes available for fuzzed input.
+  size_t remaining_bytes() { return remaining_bytes_; }
+
+ private:
+  FuzzedDataProvider(const FuzzedDataProvider &) = delete;
+  FuzzedDataProvider &operator=(const FuzzedDataProvider &) = delete;
+
+  void Advance(size_t num_bytes) {
+    if (num_bytes > remaining_bytes_)
+      abort();
+
+    data_ptr_ += num_bytes;
+    remaining_bytes_ -= num_bytes;
+  }
+
+  template <typename T>
+  std::vector<T> ConsumeBytes(size_t size, size_t num_bytes_to_consume) {
+    static_assert(sizeof(T) == sizeof(uint8_t), "Incompatible data type.");
+
+    // The point of using the size-based constructor below is to increase the
+    // odds of having a vector object with capacity being equal to the length.
+    // That part is always implementation specific, but at least both libc++ and
+    // libstdc++ allocate the requested number of bytes in that constructor,
+    // which seems to be a natural choice for other implementations as well.
+    // To increase the odds even more, we also call |shrink_to_fit| below.
+    std::vector<T> result(size);
+    if (size == 0) {
+      if (num_bytes_to_consume != 0)
+        abort();
+      return result;
+    }
+
+    std::memcpy(result.data(), data_ptr_, num_bytes_to_consume);
+    Advance(num_bytes_to_consume);
+
+    // Even though |shrink_to_fit| is also implementation specific, we expect it
+    // to provide an additional assurance in case vector's constructor allocated
+    // a buffer which is larger than the actual amount of data we put inside it.
+    result.shrink_to_fit();
+    return result;
+  }
+
+  template <typename TS, typename TU> TS ConvertUnsignedToSigned(TU value) {
+    static_assert(sizeof(TS) == sizeof(TU), "Incompatible data types.");
+    static_assert(!std::numeric_limits<TU>::is_signed,
+                  "Source type must be unsigned.");
+
+    // TODO(Dor1s): change to `if constexpr` once C++17 becomes mainstream.
+    if (std::numeric_limits<TS>::is_modulo)
+      return static_cast<TS>(value);
+
+    // Avoid using implementation-defined unsigned to signer conversions.
+    // To learn more, see https://stackoverflow.com/questions/13150449.
+    if (value <= std::numeric_limits<TS>::max()) {
+      return static_cast<TS>(value);
+    } else {
+      constexpr auto TS_min = std::numeric_limits<TS>::min();
+      return TS_min + static_cast<char>(value - TS_min);
+    }
+  }
+
+  const uint8_t *data_ptr_;
+  size_t remaining_bytes_;
+};
+
+#endif // LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
diff --git a/re2/fuzzing/re2_fuzzer.cc b/re2/fuzzing/re2_fuzzer.cc
new file mode 100644
index 0000000000000000000000000000000000000000..3082a769252153f4f48622f24cdda838f0ab17e7
--- /dev/null
+++ b/re2/fuzzing/re2_fuzzer.cc
@@ -0,0 +1,247 @@
+// Copyright 2016 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <fuzzer/FuzzedDataProvider.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <algorithm>
+#include <string>
+#include <vector>
+
+#include "re2/re2.h"
+#include "re2/regexp.h"
+#include "re2/walker-inl.h"
+
+using re2::StringPiece;
+
+// NOT static, NOT signed.
+uint8_t dummy = 0;
+
+// Walks kRegexpConcat and kRegexpAlternate subexpressions
+// to determine their maximum length.
+class SubexpressionWalker : public re2::Regexp::Walker<int> {
+ public:
+  SubexpressionWalker() = default;
+  ~SubexpressionWalker() override = default;
+
+  int PostVisit(re2::Regexp* re, int parent_arg, int pre_arg,
+                int* child_args, int nchild_args) override {
+    switch (re->op()) {
+      case re2::kRegexpConcat:
+      case re2::kRegexpAlternate: {
+        int max = nchild_args;
+        for (int i = 0; i < nchild_args; i++)
+          max = std::max(max, child_args[i]);
+        return max;
+      }
+
+      default:
+        break;
+    }
+    return -1;
+  }
+
+  // Should never be called: we use Walk(), not WalkExponential().
+  int ShortVisit(re2::Regexp* re, int parent_arg) override {
+    return parent_arg;
+  }
+
+ private:
+  SubexpressionWalker(const SubexpressionWalker&) = delete;
+  SubexpressionWalker& operator=(const SubexpressionWalker&) = delete;
+};
+
+// Walks substrings (i.e. kRegexpLiteralString subexpressions)
+// to determine their maximum length... in runes, but avoiding
+// overheads due to UTF-8 encoding is worthwhile when fuzzing.
+class SubstringWalker : public re2::Regexp::Walker<int> {
+ public:
+  SubstringWalker() = default;
+  ~SubstringWalker() override = default;
+
+  int PostVisit(re2::Regexp* re, int parent_arg, int pre_arg,
+                int* child_args, int nchild_args) override {
+    switch (re->op()) {
+      case re2::kRegexpConcat:
+      case re2::kRegexpAlternate:
+      case re2::kRegexpStar:
+      case re2::kRegexpPlus:
+      case re2::kRegexpQuest:
+      case re2::kRegexpRepeat:
+      case re2::kRegexpCapture: {
+        int max = -1;
+        for (int i = 0; i < nchild_args; i++)
+          max = std::max(max, child_args[i]);
+        return max;
+      }
+
+      case re2::kRegexpLiteralString:
+        return re->nrunes();
+
+      default:
+        break;
+    }
+    return -1;
+  }
+
+  // Should never be called: we use Walk(), not WalkExponential().
+  int ShortVisit(re2::Regexp* re, int parent_arg) override {
+    return parent_arg;
+  }
+
+ private:
+  SubstringWalker(const SubstringWalker&) = delete;
+  SubstringWalker& operator=(const SubstringWalker&) = delete;
+};
+
+void TestOneInput(StringPiece pattern, const RE2::Options& options,
+                  StringPiece text) {
+  // Crudely limit the use of ., \p, \P, \d, \D, \s, \S, \w and \W.
+  // Otherwise, we will waste time on inputs that have long runs of various
+  // character classes. The fuzzer has shown itself to be easily capable of
+  // generating such patterns that fall within the other limits, but result
+  // in timeouts nonetheless. The marginal cost is high - even more so when
+  // counted repetition is involved - whereas the marginal benefit is zero.
+  // Crudely limit the use of 'k', 'K', 's' and 'S' too because they become
+  // three-element character classes when case-insensitive and using UTF-8.
+  // TODO(junyer): Handle [:isalnum:] et al. when they start to cause pain.
+  int char_class = 0;
+  int backslash_p = 0;  // very expensive, so handle specially
+  for (size_t i = 0; i < pattern.size(); i++) {
+    if (pattern[i] == '.' ||
+        pattern[i] == 'k' || pattern[i] == 'K' ||
+        pattern[i] == 's' || pattern[i] == 'S')
+      char_class++;
+    if (pattern[i] != '\\')
+      continue;
+    i++;
+    if (i >= pattern.size())
+      break;
+    if (pattern[i] == 'p' || pattern[i] == 'P' ||
+        pattern[i] == 'd' || pattern[i] == 'D' ||
+        pattern[i] == 's' || pattern[i] == 'S' ||
+        pattern[i] == 'w' || pattern[i] == 'W')
+      char_class++;
+    if (pattern[i] == 'p' || pattern[i] == 'P')
+      backslash_p++;
+  }
+  if (char_class > 9)
+    return;
+  if (backslash_p > 1)
+    return;
+
+  // The default is 1000. Even 100 turned out to be too generous
+  // for fuzzing, empirically speaking, so let's try 10 instead.
+  re2::Regexp::FUZZING_ONLY_set_maximum_repeat_count(10);
+
+  RE2 re(pattern, options);
+  if (!re.ok())
+    return;
+
+  // Don't waste time fuzzing programs with large subexpressions.
+  // They can cause bug reports due to fuzzer timeouts. And they
+  // aren't interesting for fuzzing purposes.
+  if (SubexpressionWalker().Walk(re.Regexp(), -1) > 9)
+    return;
+
+  // Don't waste time fuzzing programs with large substrings.
+  // They can cause bug reports due to fuzzer timeouts when they
+  // are repetitions (e.g. hundreds of NUL bytes) and matching is
+  // unanchored. And they aren't interesting for fuzzing purposes.
+  if (SubstringWalker().Walk(re.Regexp(), -1) > 9)
+    return;
+
+  // Don't waste time fuzzing high-size programs.
+  // They can cause bug reports due to fuzzer timeouts.
+  int size = re.ProgramSize();
+  if (size > 9999)
+    return;
+  int rsize = re.ReverseProgramSize();
+  if (rsize > 9999)
+    return;
+
+  // Don't waste time fuzzing high-fanout programs.
+  // They can cause bug reports due to fuzzer timeouts.
+  std::vector<int> histogram;
+  int fanout = re.ProgramFanout(&histogram);
+  if (fanout > 9)
+    return;
+  int rfanout = re.ReverseProgramFanout(&histogram);
+  if (rfanout > 9)
+    return;
+
+  if (re.NumberOfCapturingGroups() == 0) {
+    // Avoid early return due to too many arguments.
+    StringPiece sp = text;
+    RE2::FullMatch(sp, re);
+    RE2::PartialMatch(sp, re);
+    RE2::Consume(&sp, re);
+    sp = text;  // Reset.
+    RE2::FindAndConsume(&sp, re);
+  } else {
+    // Okay, we have at least one capturing group...
+    // Try conversion for variously typed arguments.
+    StringPiece sp = text;
+    short s;
+    RE2::FullMatch(sp, re, &s);
+    long l;
+    RE2::PartialMatch(sp, re, &l);
+    float f;
+    RE2::Consume(&sp, re, &f);
+    sp = text;  // Reset.
+    double d;
+    RE2::FindAndConsume(&sp, re, &d);
+  }
+
+  std::string s = std::string(text);
+  RE2::Replace(&s, re, "");
+  s = std::string(text);  // Reset.
+  RE2::GlobalReplace(&s, re, "");
+
+  std::string min, max;
+  re.PossibleMatchRange(&min, &max, /*maxlen=*/9);
+
+  // Exercise some other API functionality.
+  dummy += re.NamedCapturingGroups().size();
+  dummy += re.CapturingGroupNames().size();
+  dummy += RE2::QuoteMeta(pattern).size();
+}
+
+// Entry point for libFuzzer.
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+  // An input larger than 4 KiB probably isn't interesting. (This limit
+  // allows for fdp.ConsumeRandomLengthString()'s backslash behaviour.)
+  if (size == 0 || size > 4096)
+    return 0;
+
+  FuzzedDataProvider fdp(data, size);
+
+  // The convention here is that fdp.ConsumeBool() returning false sets
+  // the default value whereas returning true sets the alternate value:
+  // most options default to false and so can be set directly; encoding
+  // defaults to UTF-8; case_sensitive defaults to true. We do NOT want
+  // to log errors. max_mem is 64 MiB because we can afford to use more
+  // RAM in exchange for (hopefully) faster fuzzing.
+  RE2::Options options;
+  options.set_encoding(fdp.ConsumeBool() ? RE2::Options::EncodingLatin1
+                                         : RE2::Options::EncodingUTF8);
+  options.set_posix_syntax(fdp.ConsumeBool());
+  options.set_longest_match(fdp.ConsumeBool());
+  options.set_log_errors(false);
+  options.set_max_mem(64 << 20);
+  options.set_literal(fdp.ConsumeBool());
+  options.set_never_nl(fdp.ConsumeBool());
+  options.set_dot_nl(fdp.ConsumeBool());
+  options.set_never_capture(fdp.ConsumeBool());
+  options.set_case_sensitive(!fdp.ConsumeBool());
+  options.set_perl_classes(fdp.ConsumeBool());
+  options.set_word_boundary(fdp.ConsumeBool());
+  options.set_one_line(fdp.ConsumeBool());
+
+  std::string pattern = fdp.ConsumeRandomLengthString(999);
+  std::string text = fdp.ConsumeRandomLengthString(999);
+
+  TestOneInput(pattern, options, text);
+  return 0;
+}
diff --git a/re2/re2.cc b/re2/re2.cc
new file mode 100644
index 0000000000000000000000000000000000000000..73231287aa2b8e34a4022bed274fee4111c7d66f
--- /dev/null
+++ b/re2/re2.cc
@@ -0,0 +1,1335 @@
+// Copyright 2003-2009 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Regular expression interface RE2.
+//
+// Originally the PCRE C++ wrapper, but adapted to use
+// the new automata-based regular expression engines.
+
+#include "re2/re2.h"
+#include <iostream>
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <algorithm>
+#include <atomic>
+#include <iterator>
+#include <mutex>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "util/util.h"
+#include "util/logging.h"
+#include "util/strutil.h"
+#include "util/utf.h"
+// #include "re2/sparse_array.h"
+// #include "re2/prog.h"
+// #include "re2/regexp.h"
+#include "regex_internal.h"
+
+using namespace std;
+
+extern "C"
+{
+#include <rure.h>
+}
+
+
+namespace re2 {
+// Maximum number of args we can set
+static const int kMaxArgs = 16;
+static const int kVecSize = 1+kMaxArgs;
+
+const int RE2::Options::kDefaultMaxMem;  // initialized in re2.h
+
+RE2::Options::Options(RE2::CannedOptions opt)
+  : encoding_(opt == RE2::Latin1 ? EncodingLatin1 : EncodingUTF8),
+    posix_syntax_(opt == RE2::POSIX),
+    longest_match_(opt == RE2::POSIX),
+    log_errors_(opt != RE2::Quiet),
+    max_mem_(kDefaultMaxMem),
+    literal_(false),
+    never_nl_(false),
+    dot_nl_(false),
+    never_capture_(false),
+    case_sensitive_(true),
+    perl_classes_(false),
+    word_boundary_(false),
+    one_line_(false) {
+}
+
+// static empty objects for use as const references.
+// To avoid global constructors, allocated in RE2::Init().
+static const std::string* empty_string;
+static const std::map<std::string, int>* empty_named_groups;
+static const std::map<int, std::string>* empty_group_names;
+
+RE2::RE2(const char* pattern) {
+  Init(pattern, DefaultOptions);
+}
+
+RE2::RE2(const std::string& pattern) {
+  Init(pattern, DefaultOptions);
+}
+
+RE2::RE2(const StringPiece& pattern) {
+  Init(pattern, DefaultOptions);
+}
+
+RE2::RE2(const StringPiece& pattern, const Options& options) {
+  Init(pattern, options);
+}
+
+int RE2::Options::ParseFlags() const {
+  int flags = Regexp::ClassNL;
+  switch (encoding()) {
+    default:
+      if (log_errors())
+        LOG(ERROR) << "Unknown encoding " << encoding();
+      break;
+    case RE2::Options::EncodingUTF8:
+      break;
+    case RE2::Options::EncodingLatin1:
+      flags |= Regexp::Latin1;
+      break;
+  }
+
+  if (!posix_syntax())
+    flags |= Regexp::LikePerl;
+
+  if (literal())
+    flags |= Regexp::Literal;
+
+  if (never_nl())
+    flags |= Regexp::NeverNL;
+
+  if (dot_nl())
+    flags |= Regexp::DotNL;
+
+  if (never_capture())
+    flags |= Regexp::NeverCapture;
+
+  if (!case_sensitive())
+    flags |= Regexp::FoldCase;
+
+  if (perl_classes())
+    flags |= Regexp::PerlClasses;
+
+  if (word_boundary())
+    flags |= Regexp::PerlB;
+
+  if (one_line())
+    flags |= Regexp::OneLine;
+
+  return flags;
+}
+
+
+std::string encodingLatin1ToUTF8(std::string str)
+{
+    string strOut;
+    for (std::string::iterator it = str.begin(); it != str.end(); ++it)
+    {
+        uint8_t ch = *it;
+        if (ch < 0x80) {
+            strOut.push_back(ch);
+        }
+        else {
+            strOut.push_back(0xc0 | ch >> 6);
+            strOut.push_back(0x80 | (ch & 0x3f));
+        }
+    }
+    return strOut;
+}
+
+void RE2::Init(const StringPiece& pattern, const Options& options) {
+  const char *rure_str;       // 正则表达式UTF-8编码形式
+  static std::once_flag empty_once;
+  std::call_once(empty_once, []() {        //为了解决多线程中出现的资源竞争导致的数据不一致问题
+    empty_string = new std::string;
+    empty_named_groups = new std::map<std::string, int>;
+    empty_group_names = new std::map<int, std::string>;
+  });
+
+  pattern_.assign(pattern.data(), pattern.size());  //Set value to a C substring.
+  options_.Copy(options);  //option
+  entire_regexp_ = NULL;
+  error_ = empty_string;
+  error_code_ = NoError;  //Erases the string, making it empty.
+  error_arg_.clear();
+  prefix_.clear();
+  prefix_foldcase_ = false;
+  suffix_regexp_ = NULL;
+  prog_ = NULL;
+  num_captures_ = -1;
+  is_one_pass_ = false;
+
+  rprog_ = NULL;
+  named_groups_ = NULL;
+  group_names_ = NULL;
+
+  rure_error *err = rure_error_new();
+  // pattern --> rure --> Prog
+  // Compile
+  // 要对flages进行设置，对应RE2中传入的option
+  // 对传入的Latin-1编码的字符串要进行转换
+  if(options.encoding() == 1){  // UTF-8编码
+    rure_str = pattern.data();
+  }
+  else{  // Latin-1编码
+    rure_str = encodingLatin1ToUTF8(pattern.ToString()).c_str();  
+  }
+
+  // 空字符串的处理???
+  rure *re = rure_compile((const uint8_t *)rure_str, strlen(rure_str), RURE_DEFAULT_FLAGS, NULL, err);
+  const char *msg = rure_error_message(err);
+  
+
+  std::string empty_character_classes = "empty character classes are not allowed";
+  // 处理空字符集无法编译的问题
+  std::string empty_info = msg;
+
+  
+  //如果编译失败，打印错误信息
+  if (re == NULL) {
+    if(empty_info.find(empty_character_classes) != string::npos ){
+      rure_error_free(err);
+      rure_error *err_tmp = rure_error_new();
+      const char *empty_char = "";
+      re = rure_compile((const uint8_t *)empty_char, strlen(empty_char), RURE_DEFAULT_FLAGS, NULL, err_tmp);
+      prog_ = (Prog*)re;
+      rure_error_free(err_tmp);
+      // std::cout << "empty character classes are not allowed" << std::endl;
+    }
+    else{
+      if (options_.log_errors()) {
+        LOG(ERROR) << "Error Compile '" << pattern.data() << "':"<< msg << "'";
+      }
+      error_ = new std::string(msg);
+      error_code_ = ErrorInternal;   // 暂时对这个错误进行赋值，如何处理错误类型？？？
+      // rure_free(re);
+      // rure_error_free(err);
+      
+
+      return;
+    }
+
+  }
+  else{
+    prog_ = (Prog*)re;
+    error_ = empty_string;
+    error_code_ = RE2::NoError;
+  }
+  
+  
+  //获取捕获组的数量, 并对num_captures_其进行赋值
+  rure_captures *caps = rure_captures_new(re);
+  size_t captures_len = rure_captures_len(caps) - 1;
+  num_captures_ = (int)captures_len;
+
+
+  // 问题？？？
+  //rure_free和rure_captures_free是否要进行使用？
+  // error_code_如何进行赋值，RegexpErrorToRE2删除了？？？
+  // rure_free(re);
+
+}
+
+// Returns rprog_, computing it if needed.
+re2::Prog* RE2::ReverseProg() const {
+  // std::call_once(rprog_once_, [](const RE2* re) {
+  //   re->rprog_ =
+  //       re->suffix_regexp_->CompileToReverseProg(re->options_.max_mem() / 3);
+  //   if (re->rprog_ == NULL) {
+  //     if (re->options_.log_errors())
+  //       LOG(ERROR) << "Error reverse compiling '" << trunc(re->pattern_) << "'";
+  //     // We no longer touch error_ and error_code_ because failing to compile
+  //     // the reverse Prog is not a showstopper: falling back to NFA execution
+  //     // is fine. More importantly, an RE2 object is supposed to be logically
+  //     // immutable: whatever ok() would have returned after Init() completed,
+  //     // it should continue to return that no matter what ReverseProg() does.
+  //   }
+  // }, this);
+  return rprog_;
+}
+
+RE2::~RE2() {
+  if (suffix_regexp_)
+    // suffix_regexp_->Decref();
+  if (entire_regexp_)
+    // entire_regexp_->Decref();
+  // delete prog_;
+  // delete rprog_;
+  if (error_ != empty_string)
+    delete error_;
+  if (named_groups_ != NULL && named_groups_ != empty_named_groups)
+    delete named_groups_;
+  if (group_names_ != NULL &&  group_names_ != empty_group_names)
+    delete group_names_;
+}
+
+int RE2::ProgramSize() const {
+  // if (prog_ == NULL)
+  //   return -1;
+  // return prog_->size();
+  return 0;
+}
+
+int RE2::ReverseProgramSize() const {
+  // if (prog_ == NULL)
+  //   return -1;
+  // Prog* prog = ReverseProg();
+  // if (prog == NULL)
+  //   return -1;
+  // return prog->size();
+  return 0;
+}
+
+// // Finds the most significant non-zero bit in n.
+// static int FindMSBSet(uint32_t n) {
+//   DCHECK_NE(n, 0);
+// #if defined(__GNUC__)
+//   return 31 ^ __builtin_clz(n);
+// #elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
+//   unsigned long c;
+//   _BitScanReverse(&c, n);
+//   return static_cast<int>(c);
+// #else
+//   int c = 0;
+//   for (int shift = 1 << 4; shift != 0; shift >>= 1) {
+//     uint32_t word = n >> shift;
+//     if (word != 0) {
+//       n = word;
+//       c += shift;
+//     }
+//   }
+//   return c;
+// #endif
+// }
+
+// static int Fanout(Prog* prog, std::vector<int>* histogram) {
+//   SparseArray<int> fanout(prog->size());
+//   prog->Fanout(&fanout);
+//   int data[32] = {};
+//   int size = 0;
+//   for (SparseArray<int>::iterator i = fanout.begin(); i != fanout.end(); ++i) {
+//     if (i->value() == 0)
+//       continue;
+//     uint32_t value = i->value();
+//     int bucket = FindMSBSet(value);
+//     bucket += value & (value-1) ? 1 : 0;
+//     ++data[bucket];
+//     size = std::max(size, bucket+1);
+//   }
+//   if (histogram != NULL)
+//     histogram->assign(data, data+size);
+//   return size-1;
+// }
+
+int RE2::ProgramFanout(std::vector<int>* histogram) const {
+  // if (prog_ == NULL)
+  //   return -1;
+  // return Fanout(prog_, histogram);
+  return 0;
+}
+
+int RE2::ReverseProgramFanout(std::vector<int>* histogram) const {
+  // if (prog_ == NULL)
+  //   return -1;
+  // Prog* prog = ReverseProg();
+  // if (prog == NULL)
+  //   return -1;
+  // return Fanout(prog, histogram);
+  return 0;
+}
+
+// Returns named_groups_, computing it if needed.
+const std::map<std::string, int>& RE2::NamedCapturingGroups() const {
+  std::map<std::string, int> *temp = new std::map<std::string, int>;
+  std::string str;
+  char *name;
+  int i = 0;
+  rure_iter_capture_names *it = rure_iter_capture_names_new((rure *)prog_);
+  while (rure_iter_capture_names_next(it, &name))
+  {
+    str = name;
+    if(str.length()!=0)
+      temp->insert(make_pair(str, i));
+    ++i;
+  }
+  named_groups_ = temp;
+
+  return *named_groups_;
+}
+
+// Returns group_names_, computing it if needed.
+const std::map<int, std::string>& RE2::CapturingGroupNames() const {
+  std::map<int, std::string> *temp = new std::map<int, std::string>;
+  std::string str;
+  char *name;
+  int i = 0;
+  rure_iter_capture_names *it = rure_iter_capture_names_new((rure *)prog_);
+  while (rure_iter_capture_names_next(it, &name))
+  {
+    str = name;
+    if(str.length()!=0)
+      temp->insert(make_pair(i, str));
+    ++i;
+  }
+  group_names_ = temp;
+
+  return *group_names_;
+}
+
+/***** Convenience interfaces *****/
+
+bool RE2::FullMatchN(const StringPiece& text, const RE2& re,
+                     const Arg* const args[], int n) {
+  return re.DoMatch(text, ANCHOR_BOTH, NULL, args, n);
+}
+
+bool RE2::PartialMatchN(const StringPiece& text, const RE2& re,
+                        const Arg* const args[], int n) {
+  return re.DoMatch(text, UNANCHORED, NULL, args, n);
+}
+
+bool RE2::ConsumeN(StringPiece* input, const RE2& re,
+                   const Arg* const args[], int n) {
+  size_t consumed;
+  if (re.DoMatch(*input, ANCHOR_START, &consumed, args, n)) {
+    input->remove_prefix(consumed);
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool RE2::FindAndConsumeN(StringPiece* input, const RE2& re,
+                          const Arg* const args[], int n) {
+  size_t consumed;
+  if (re.DoMatch(*input, UNANCHORED, &consumed, args, n)) {
+    input->remove_prefix(consumed);
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool RE2::Replace(std::string* str,
+                  const RE2& re,
+                  const StringPiece& rewrite) {
+  StringPiece vec[kVecSize];
+  int nvec = 1 + MaxSubmatch(rewrite);
+  if (nvec > 1 + re.NumberOfCapturingGroups())
+    return false;
+  if (nvec > static_cast<int>(arraysize(vec)))
+    return false;
+  if (!re.Match(*str, 0, str->size(), UNANCHORED, vec, nvec))
+    return false;
+
+  std::string s;
+  if (!re.Rewrite(&s, rewrite, vec, nvec))
+    return false;
+
+  assert(vec[0].data() >= str->data());
+  assert(vec[0].data() + vec[0].size() <= str->data() + str->size());
+  str->replace(vec[0].data() - str->data(), vec[0].size(), s);
+  return true;
+}
+
+int RE2::GlobalReplace(std::string* str,
+                       const RE2& re,
+                       const StringPiece& rewrite) {
+//   StringPiece vec[kVecSize];
+//   int nvec = 1 + MaxSubmatch(rewrite);
+//   if (nvec > 1 + re.NumberOfCapturingGroups())
+//     return false;
+//   if (nvec > static_cast<int>(arraysize(vec)))
+//     return false;
+
+//   const char* p = str->data();
+//   const char* ep = p + str->size();
+//   const char* lastend = NULL;
+//   std::string out;
+//   int count = 0;
+// #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+//   // Iterate just once when fuzzing. Otherwise, we easily get bogged down
+//   // and coverage is unlikely to improve despite significant expense.
+//   while (p == str->data()) {
+// #else
+//   while (p <= ep) {
+// #endif
+//     if (!re.Match(*str, static_cast<size_t>(p - str->data()),
+//                   str->size(), UNANCHORED, vec, nvec))
+//       break;
+//     if (p < vec[0].data())
+//       out.append(p, vec[0].data() - p);
+//     if (vec[0].data() == lastend && vec[0].empty()) {
+//       // Disallow empty match at end of last match: skip ahead.
+//       //
+//       // fullrune() takes int, not ptrdiff_t. However, it just looks
+//       // at the leading byte and treats any length >= 4 the same.
+//       if (re.options().encoding() == RE2::Options::EncodingUTF8 &&
+//           fullrune(p, static_cast<int>(std::min(ptrdiff_t{4}, ep - p)))) {
+//         // re is in UTF-8 mode and there is enough left of str
+//         // to allow us to advance by up to UTFmax bytes.
+//         Rune r;
+//         int n = chartorune(&r, p);
+//         // Some copies of chartorune have a bug that accepts
+//         // encodings of values in (10FFFF, 1FFFFF] as valid.
+//         if (r > Runemax) {
+//           n = 1;
+//           r = Runeerror;
+//         }
+//         if (!(n == 1 && r == Runeerror)) {  // no decoding error
+//           out.append(p, n);
+//           p += n;
+//           continue;
+//         }
+//       }
+//       // Most likely, re is in Latin-1 mode. If it is in UTF-8 mode,
+//       // we fell through from above and the GIGO principle applies.
+//       if (p < ep)
+//         out.append(p, 1);
+//       p++;
+//       continue;
+//     }
+//     re.Rewrite(&out, rewrite, vec, nvec);
+//     p = vec[0].data() + vec[0].size();
+//     lastend = p;
+//     count++;
+//   }
+
+//   if (count == 0)
+//     return 0;
+
+//   if (p < ep)
+//     out.append(p, ep - p);
+//   using std::swap;
+//   swap(out, *str);
+//   return count;
+  return 0;
+}
+
+bool RE2::Extract(const StringPiece& text,
+                  const RE2& re,
+                  const StringPiece& rewrite,
+                  std::string* out) {
+  StringPiece vec[kVecSize];
+  int nvec = 1 + MaxSubmatch(rewrite);
+  if (nvec > 1 + re.NumberOfCapturingGroups())
+    return false;
+  if (nvec > static_cast<int>(arraysize(vec)))
+    return false;
+  if (!re.Match(text, 0, text.size(), UNANCHORED, vec, nvec))
+    return false;
+
+  out->clear();
+  return re.Rewrite(out, rewrite, vec, nvec);
+}
+
+std::string RE2::QuoteMeta(const StringPiece& unquoted) {
+  std::string result;
+  result.reserve(unquoted.size() << 1);
+
+  // Escape any ascii character not in [A-Za-z_0-9].
+  //
+  // Note that it's legal to escape a character even if it has no
+  // special meaning in a regular expression -- so this function does
+  // that.  (This also makes it identical to the perl function of the
+  // same name except for the null-character special case;
+  // see `perldoc -f quotemeta`.)
+  for (size_t ii = 0; ii < unquoted.size(); ++ii) {
+    // Note that using 'isalnum' here raises the benchmark time from
+    // 32ns to 58ns:
+    if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
+        (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
+        (unquoted[ii] < '0' || unquoted[ii] > '9') &&
+        unquoted[ii] != '_' && unquoted[ii] != '!' &&
+        unquoted[ii] != ' ' && unquoted[ii] != '\''&&
+        unquoted[ii] != '=' &&
+        // If this is the part of a UTF8 or Latin1 character, we need
+        // to copy this byte without escaping.  Experimentally this is
+        // what works correctly with the regexp library.
+        !(unquoted[ii] & 128)) {
+      if (unquoted[ii] == '\0') {  // Special handling for null chars.
+        // Note that this special handling is not strictly required for RE2,
+        // but this quoting is required for other regexp libraries such as
+        // PCRE.
+        // Can't use "\\0" since the next character might be a digit.
+        result += "\\x00";
+        continue;
+      }
+      result += '\\';
+    }
+    result += unquoted[ii];
+  }
+
+  return result;
+}
+
+bool RE2::PossibleMatchRange(std::string* min, std::string* max,
+                             int maxlen) const {
+  // if (prog_ == NULL)
+  //   return false;
+
+  // int n = static_cast<int>(prefix_.size());
+  // if (n > maxlen)
+  //   n = maxlen;
+
+  // // Determine initial min max from prefix_ literal.
+  // *min = prefix_.substr(0, n);
+  // *max = prefix_.substr(0, n);
+  // if (prefix_foldcase_) {
+  //   // prefix is ASCII lowercase; change *min to uppercase.
+  //   for (int i = 0; i < n; i++) {
+  //     char& c = (*min)[i];
+  //     if ('a' <= c && c <= 'z')
+  //       c += 'A' - 'a';
+  //   }
+  // }
+
+  // // Add to prefix min max using PossibleMatchRange on regexp.
+  // std::string dmin, dmax;
+  // maxlen -= n;
+  // if (maxlen > 0 && prog_->PossibleMatchRange(&dmin, &dmax, maxlen)) {
+  //   min->append(dmin);
+  //   max->append(dmax);
+  // } else if (!max->empty()) {
+  //   // prog_->PossibleMatchRange has failed us,
+  //   // but we still have useful information from prefix_.
+  //   // Round up *max to allow any possible suffix.
+  //   PrefixSuccessor(max);
+  // } else {
+  //   // Nothing useful.
+  //   *min = "";
+  //   *max = "";
+  //   return false;
+  // }
+
+  return true;
+}
+
+// // Avoid possible locale nonsense in standard strcasecmp.
+// // The string a is known to be all lowercase.
+// static int ascii_strcasecmp(const char* a, const char* b, size_t len) {
+//   const char* ae = a + len;
+
+//   for (; a < ae; a++, b++) {
+//     uint8_t x = *a;
+//     uint8_t y = *b;
+//     if ('A' <= y && y <= 'Z')
+//       y += 'a' - 'A';
+//     if (x != y)
+//       return x - y;
+//   }
+//   return 0;
+// }
+
+
+/***** Actual matching and rewriting code *****/
+
+bool RE2::Match(const StringPiece& text,
+                size_t startpos,
+                size_t endpos,
+                Anchor re_anchor,
+                StringPiece* submatch,
+                int nsubmatch) const {
+
+  if (!ok()) {
+    if (options_.log_errors())
+      LOG(ERROR) << "Invalid RE2: " << *error_;
+    return false;
+  }
+
+  if (startpos > endpos || endpos > text.size()) {
+    if (options_.log_errors())
+      LOG(ERROR) << "RE2: invalid startpos, endpos pair. ["
+                 << "startpos: " << startpos << ", "
+                 << "endpos: " << endpos << ", "
+                 << "text size: " << text.size() << "]";
+    return false;
+  }
+
+  const char *haystack = text.data();
+  rure *re = (rure*)prog_;
+  rure_match match = {0};
+  bool matched = rure_find(re, (const uint8_t *)haystack, strlen(haystack),0, &match);
+
+
+
+  switch (re_anchor){
+    // ANCHOR_BOTH FullMatch
+    case ANCHOR_BOTH:{
+      // 是否是FullMatch
+      if(nsubmatch != 0){
+
+        if(!matched){
+          return false;
+        }
+        else{
+          if(match.start != 0 || match.end != strlen(haystack)){
+            return false;
+          }
+        }
+      }
+      else{
+        if(matched && match.start == startpos && match.end == endpos){
+          return true;
+        }
+        else{
+          return false;
+        }
+      }
+      break;
+    }
+    // UNANCHORED  PartialMatch
+    case UNANCHORED:
+    {
+      if(nsubmatch != 0){
+        if(!matched){
+          return false;
+        }
+      }
+      else{
+        if(matched && match.end != 0) return true;
+        else return false;
+      }
+      break;
+
+    }
+    case ANCHOR_START:
+    {
+      if(nsubmatch == 0){
+        if(matched && match.start == startpos) return true;
+        else return false;
+      }
+      else{
+        if(!matched) return false;
+      }
+    }
+  }
+    
+  // Demo  获取捕获组内容，存储到submatch数组中  
+
+  size_t length = strlen(haystack);
+
+  rure_captures *caps = rure_captures_new(re);
+  rure_find_captures(re, (const uint8_t *)haystack,
+                                      length, 0, caps);
+  size_t captures_len = num_captures_ + 1;
+
+  rure_captures_at(caps, 0, &match);
+  if(re_anchor==ANCHOR_START && match.start!=0) return false;
+
+  for (size_t i = 0; i < captures_len; i++)
+  {
+    bool result = rure_captures_at(caps, i, &match);
+    if (result)
+    {
+      size_t start = match.start;
+      size_t end = match.end;
+      size_t len = end - start;
+      
+      submatch[i] = StringPiece(text.data()+start,static_cast<size_t>(len));
+      // std::cout << "i=" << i << ", start=" << start << ", submatch=" << submatch[i] << endl;
+    }
+    else
+    {
+      submatch[i] = StringPiece();
+    }
+  }
+
+
+  return true;  
+}
+
+// std::string_view in MSVC has iterators that aren't just pointers and
+// that don't allow comparisons between different objects - not even if
+// those objects are views into the same string! Thus, we provide these
+// conversion functions for convenience.
+static inline const char* BeginPtr(const StringPiece& s) {
+  return s.data();
+}
+static inline const char* EndPtr(const StringPiece& s) {
+  return s.data() + s.size();
+}
+
+// Internal matcher - like Match() but takes Args not StringPieces.
+bool RE2::DoMatch(const StringPiece& text,
+                  Anchor re_anchor,
+                  size_t* consumed,
+                  const Arg* const* args,
+                  int n) const {
+    // re是否成功创建
+    if (!ok()){
+      if (options_.log_errors())
+        LOG(ERROR) << "Invalid RE2: " << *error_;
+      return false;
+    }
+    // re的捕获组数目小于给定数目，返回flase
+    if (NumberOfCapturingGroups() < n){
+      // RE has fewer capturing groups than number of Arg pointers passed in.
+      return false;
+    }
+    
+    // 判断是否FullMatch, 判空
+    const char *haystack;
+    if(text.data() == NULL || text[0] == '\0'){
+      haystack = "";
+    }
+    else{
+      haystack = text.data();
+    }
+    
+
+    // Latin-1编码转换
+    if(options_.encoding() == 2){
+      // std::cout << "DoMatch-Latin-1\n";
+      haystack = encodingLatin1ToUTF8(text.as_string()).c_str();
+    } 
+    
+    
+    
+    rure *re = (rure*)prog_;
+    rure_match match = {0};
+    bool matched = rure_find(re, (const uint8_t *)haystack, strlen(haystack),0, &match);
+
+    // Count number of capture groups needed.
+    int nvec;
+    if (n == 0 && consumed == NULL)
+      nvec = 0;  //0个捕获组
+    else
+      nvec = n + 1;
+    // 0个捕获组的匹配判断
+    if(nvec==0)
+    {
+      switch(re_anchor)
+      {
+        // ANCHOR_BOTH FullMatch
+        case ANCHOR_BOTH:
+        {
+          if(!matched)
+          {
+            return false;
+          }
+          else
+          {
+            if(match.start == 0 && match.end == strlen(haystack))
+            {
+              // std::cout << "DoMatch : 0个捕获组, FullMatch成功!!\n";
+              return true;
+            }
+            else
+            {
+              // std::cout << "位置不对\n";
+              return false;
+            }
+          }
+          
+          break;
+        }
+        // ANCHOR_START
+        case ANCHOR_START:
+        {
+          if(!matched)
+          {
+            return false;
+          }
+          else
+          {
+            if(match.start == 0)
+            {
+              return true;
+            }
+            else
+            {
+              // std::cout << "位置不对\n";
+              return false;
+            }
+          }
+          break;
+
+        }
+
+        // UNANCHORED  PartialMatch
+        case UNANCHORED:
+        {
+          if(!matched)
+          {
+            return false;
+          }
+          else
+          {
+            return true;
+          }
+
+          break;
+        }
+      }
+    }
+
+    StringPiece *vec;
+    StringPiece stkvec[kVecSize];
+    StringPiece *heapvec = NULL;
+
+    // 判断是否超出已预定的内存
+    if (nvec <= static_cast<int>(arraysize(stkvec)))
+    {
+      vec = stkvec;
+    }
+    else
+    {
+      vec = new StringPiece[nvec];
+      heapvec = vec;
+    }
+
+    // 存在捕获组的判断
+
+    // 匹配失败，返回false
+    // startpos  endpos  
+    // vec 用于存放捕获到的数据
+    // nvec 表示需要捕获的数据的个数
+
+    //此处在改写的时候先不进行任何处理，直接使用之前的Match函数，完成之后在对Match进行改写
+    if (!Match(text, 0, text.size(), re_anchor, vec, nvec))
+    {
+  
+      // std::cout << "DoMatch : Match 带参 未匹配";
+      delete[] heapvec;
+      return false;
+    }
+
+
+
+    //  为consume赋值，consume的
+    if (consumed != NULL)
+      *consumed = static_cast<size_t>(EndPtr(vec[0]) - BeginPtr(text));
+
+    // 以上的代码已经完成了整个字符数是否和正则表达式全局匹配
+    // 结下来就是要对正表达式中存在的捕获组进行处理
+
+    // 如果不需要捕获组，直接返回true
+    // if (n == 0 || args == NULL)
+    // {
+    //   // We are not interested in results
+    //   delete[] heapvec;
+    //   return true;
+    // }
+
+
+    // If we got here, we must have matched the whole pattern.
+    for (int i = 0; i < n; i++)
+    {
+      // cout << vec[i].data() << endl;
+      const StringPiece &s = vec[i + 1];
+      // std::cout << s.data() << "-" << s.size() <<std::endl;
+     
+      if (!args[i]->Parse(s.data(), s.size()))
+      {
+        // TODO: Should we indicate what the error was?
+        delete[] heapvec;
+        return false;
+      }
+    }
+
+    delete[] heapvec;
+
+    return true;
+}
+
+// Checks that the rewrite string is well-formed with respect to this
+// regular expression.
+bool RE2::CheckRewriteString(const StringPiece& rewrite,
+                             std::string* error) const {
+  // int max_token = -1;
+  // for (const char *s = rewrite.data(), *end = s + rewrite.size();
+  //      s < end; s++) {
+  //   int c = *s;
+  //   if (c != '\\') {
+  //     continue;
+  //   }
+  //   if (++s == end) {
+  //     *error = "Rewrite schema error: '\\' not allowed at end.";
+  //     return false;
+  //   }
+  //   c = *s;
+  //   if (c == '\\') {
+  //     continue;
+  //   }
+  //   if (!isdigit(c)) {
+  //     *error = "Rewrite schema error: "
+  //              "'\\' must be followed by a digit or '\\'.";
+  //     return false;
+  //   }
+  //   int n = (c - '0');
+  //   if (max_token < n) {
+  //     max_token = n;
+  //   }
+  // }
+
+  // if (max_token > NumberOfCapturingGroups()) {
+  //   *error = StringPrintf(
+  //       "Rewrite schema requests %d matches, but the regexp only has %d "
+  //       "parenthesized subexpressions.",
+  //       max_token, NumberOfCapturingGroups());
+  //   return false;
+  // }
+  return true;
+}
+
+// Returns the maximum submatch needed for the rewrite to be done by Replace().
+// E.g. if rewrite == "foo \\2,\\1", returns 2.
+int RE2::MaxSubmatch(const StringPiece& rewrite) {
+  int max = 0;
+  for (const char *s = rewrite.data(), *end = s + rewrite.size();
+       s < end; s++) {
+    if (*s == '\\') {
+      s++;
+      int c = (s < end) ? *s : -1;
+      if (isdigit(c)) {
+        int n = (c - '0');
+        if (n > max)
+          max = n;
+      }
+    }
+  }
+  return max;
+}
+
+// Append the "rewrite" string, with backslash subsitutions from "vec",
+// to string "out".
+bool RE2::Rewrite(std::string* out,
+                  const StringPiece& rewrite,
+                  const StringPiece* vec,
+                  int veclen) const {
+  for (const char *s = rewrite.data(), *end = s + rewrite.size();
+       s < end; s++) {
+    if (*s != '\\') {
+      out->push_back(*s);
+      continue;
+    }
+    s++;
+    int c = (s < end) ? *s : -1;
+    if (isdigit(c)) {
+      int n = (c - '0');
+      if (n >= veclen) {
+        if (options_.log_errors()) {
+          LOG(ERROR) << "invalid substitution \\" << n
+                     << " from " << veclen << " groups";
+        }
+        return false;
+      }
+      StringPiece snip = vec[n];
+      if (!snip.empty())
+        out->append(snip.data(), snip.size());
+    } else if (c == '\\') {
+      out->push_back('\\');
+    } else {
+      if (options_.log_errors())
+        LOG(ERROR) << "invalid rewrite pattern: " << rewrite.data();
+      return false;
+    }
+  }
+  return true;
+}
+
+/***** Parsers for various types *****/
+
+namespace re2_internal {
+
+template <>
+bool Parse(const char* str, size_t n, void* dest) {
+  // We fail if somebody asked us to store into a non-NULL void* pointer
+  return (dest == NULL);
+}
+
+template <>
+bool Parse(const char* str, size_t n, std::string* dest) {
+  if (dest == NULL) return true;
+  dest->assign(str, n);
+  return true;
+}
+
+template <>
+bool Parse(const char* str, size_t n, StringPiece* dest) {
+  if (dest == NULL) return true;
+  *dest = StringPiece(str, n);
+  return true;
+}
+
+template <>
+bool Parse(const char* str, size_t n, char* dest) {
+  if (n != 1) return false;
+  if (dest == NULL) return true;
+  *dest = str[0];
+  return true;
+}
+
+template <>
+bool Parse(const char* str, size_t n, signed char* dest) {
+  if (n != 1) return false;
+  if (dest == NULL) return true;
+  *dest = str[0];
+  return true;
+}
+
+template <>
+bool Parse(const char* str, size_t n, unsigned char* dest) {
+  if (n != 1) return false;
+  if (dest == NULL) return true;
+  *dest = str[0];
+  return true;
+}
+
+// Largest number spec that we are willing to parse
+static const int kMaxNumberLength = 32;
+
+// REQUIRES "buf" must have length at least nbuf.
+// Copies "str" into "buf" and null-terminates.
+// Overwrites *np with the new length.
+static const char* TerminateNumber(char* buf, size_t nbuf, const char* str,
+                                   size_t* np, bool accept_spaces) {
+  size_t n = *np;
+  if (n == 0) return "";
+  if (n > 0 && isspace(*str)) {
+    // We are less forgiving than the strtoxxx() routines and do not
+    // allow leading spaces. We do allow leading spaces for floats.
+    if (!accept_spaces) {
+      return "";
+    }
+    while (n > 0 && isspace(*str)) {
+      n--;
+      str++;
+    }
+  }
+
+  // Although buf has a fixed maximum size, we can still handle
+  // arbitrarily large integers correctly by omitting leading zeros.
+  // (Numbers that are still too long will be out of range.)
+  // Before deciding whether str is too long,
+  // remove leading zeros with s/000+/00/.
+  // Leaving the leading two zeros in place means that
+  // we don't change 0000x123 (invalid) into 0x123 (valid).
+  // Skip over leading - before replacing.
+  bool neg = false;
+  if (n >= 1 && str[0] == '-') {
+    neg = true;
+    n--;
+    str++;
+  }
+
+  if (n >= 3 && str[0] == '0' && str[1] == '0') {
+    while (n >= 3 && str[2] == '0') {
+      n--;
+      str++;
+    }
+  }
+
+  if (neg) {  // make room in buf for -
+    n++;
+    str--;
+  }
+
+  if (n > nbuf-1) return "";
+
+  memmove(buf, str, n);
+  if (neg) {
+    buf[0] = '-';
+  }
+  buf[n] = '\0';
+  *np = n;
+  return buf;
+}
+
+template <>
+bool Parse(const char* str, size_t n, float* dest) {
+  if (n == 0) return false;
+  static const int kMaxLength = 200;
+  char buf[kMaxLength+1];
+  str = TerminateNumber(buf, sizeof buf, str, &n, true);
+  char* end;
+  errno = 0;
+  float r = strtof(str, &end);
+  if (end != str + n) return false;   // Leftover junk
+  if (errno) return false;
+  if (dest == NULL) return true;
+  *dest = r;
+  return true;
+}
+
+template <>
+bool Parse(const char* str, size_t n, double* dest) {
+  if (n == 0) return false;
+  static const int kMaxLength = 200;
+  char buf[kMaxLength+1];
+  str = TerminateNumber(buf, sizeof buf, str, &n, true);
+  char* end;
+  errno = 0;
+  double r = strtod(str, &end);
+  if (end != str + n) return false;   // Leftover junk
+  if (errno) return false;
+  if (dest == NULL) return true;
+  *dest = r;
+  return true;
+}
+
+template <>
+bool Parse(const char* str, size_t n, long* dest, int radix) {
+  if (n == 0) return false;
+  char buf[kMaxNumberLength+1];
+  str = TerminateNumber(buf, sizeof buf, str, &n, false);
+  char* end;
+  errno = 0;
+  long r = strtol(str, &end, radix);
+  if (end != str + n) return false;   // Leftover junk
+  if (errno) return false;
+  if (dest == NULL) return true;
+  *dest = r;
+  return true;
+}
+
+template <>
+bool Parse(const char* str, size_t n, unsigned long* dest, int radix) {
+  if (n == 0) return false;
+  char buf[kMaxNumberLength+1];
+  str = TerminateNumber(buf, sizeof buf, str, &n, false);
+  if (str[0] == '-') {
+    // strtoul() will silently accept negative numbers and parse
+    // them.  This module is more strict and treats them as errors.
+    return false;
+  }
+
+  char* end;
+  errno = 0;
+  unsigned long r = strtoul(str, &end, radix);
+  if (end != str + n) return false;   // Leftover junk
+  if (errno) return false;
+  if (dest == NULL) return true;
+  *dest = r;
+  return true;
+}
+
+template <>
+bool Parse(const char* str, size_t n, short* dest, int radix) {
+  long r;
+  if (!Parse(str, n, &r, radix)) return false;  // Could not parse
+  if ((short)r != r) return false;              // Out of range
+  if (dest == NULL) return true;
+  *dest = (short)r;
+  return true;
+}
+
+template <>
+bool Parse(const char* str, size_t n, unsigned short* dest, int radix) {
+  unsigned long r;
+  if (!Parse(str, n, &r, radix)) return false;  // Could not parse
+  if ((unsigned short)r != r) return false;     // Out of range
+  if (dest == NULL) return true;
+  *dest = (unsigned short)r;
+  return true;
+}
+
+template <>
+bool Parse(const char* str, size_t n, int* dest, int radix) {
+  long r;
+  if (!Parse(str, n, &r, radix)) return false;  // Could not parse
+  if ((int)r != r) return false;                // Out of range
+  if (dest == NULL) return true;
+  *dest = (int)r;
+  return true;
+}
+
+template <>
+bool Parse(const char* str, size_t n, unsigned int* dest, int radix) {
+  unsigned long r;
+  if (!Parse(str, n, &r, radix)) return false;  // Could not parse
+  if ((unsigned int)r != r) return false;       // Out of range
+  if (dest == NULL) return true;
+  *dest = (unsigned int)r;
+  return true;
+}
+
+template <>
+bool Parse(const char* str, size_t n, long long* dest, int radix) {
+  if (n == 0) return false;
+  char buf[kMaxNumberLength+1];
+  str = TerminateNumber(buf, sizeof buf, str, &n, false);
+  char* end;
+  errno = 0;
+  long long r = strtoll(str, &end, radix);
+  if (end != str + n) return false;   // Leftover junk
+  if (errno) return false;
+  if (dest == NULL) return true;
+  *dest = r;
+  return true;
+}
+
+template <>
+bool Parse(const char* str, size_t n, unsigned long long* dest, int radix) {
+  if (n == 0) return false;
+  char buf[kMaxNumberLength+1];
+  str = TerminateNumber(buf, sizeof buf, str, &n, false);
+  if (str[0] == '-') {
+    // strtoull() will silently accept negative numbers and parse
+    // them.  This module is more strict and treats them as errors.
+    return false;
+  }
+  char* end;
+  errno = 0;
+  unsigned long long r = strtoull(str, &end, radix);
+  if (end != str + n) return false;   // Leftover junk
+  if (errno) return false;
+  if (dest == NULL) return true;
+  *dest = r;
+  return true;
+}
+
+}  // namespace re2_internal
+
+namespace hooks {
+
+#ifdef RE2_HAVE_THREAD_LOCAL
+thread_local const RE2* context = NULL;
+#endif
+
+template <typename T>
+union Hook {
+  void Store(T* cb) { cb_.store(cb, std::memory_order_release); }
+  T* Load() const { return cb_.load(std::memory_order_acquire); }
+
+#if !defined(__clang__) && defined(_MSC_VER)
+  // Citing https://github.com/protocolbuffers/protobuf/pull/4777 as precedent,
+  // this is a gross hack to make std::atomic<T*> constant-initialized on MSVC.
+  static_assert(ATOMIC_POINTER_LOCK_FREE == 2,
+                "std::atomic<T*> must be always lock-free");
+  T* cb_for_constinit_;
+#endif
+
+  std::atomic<T*> cb_;
+};
+
+template <typename T>
+static void DoNothing(const T&) {}
+
+#define DEFINE_HOOK(type, name)                                       \
+  static Hook<type##Callback> name##_hook = {{&DoNothing<type>}};     \
+  void Set##type##Hook(type##Callback* cb) { name##_hook.Store(cb); } \
+  type##Callback* Get##type##Hook() { return name##_hook.Load(); }
+
+DEFINE_HOOK(DFAStateCacheReset, dfa_state_cache_reset)
+DEFINE_HOOK(DFASearchFailure, dfa_search_failure)
+
+#undef DEFINE_HOOK
+
+}  // namespace hooks
+
+}  // namespace re2
diff --git a/re2/re2.h b/re2/re2.h
new file mode 100644
index 0000000000000000000000000000000000000000..7fd2245cb35c070b81fb50429bf2af2fa48f4ac0
--- /dev/null
+++ b/re2/re2.h
@@ -0,0 +1,1017 @@
+// Copyright 2003-2009 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef RE2_RE2_H_
+#define RE2_RE2_H_
+
+// C++ interface to the re2 regular-expression library.
+// RE2 supports Perl-style regular expressions (with extensions like
+// \d, \w, \s, ...).
+//
+// -----------------------------------------------------------------------
+// REGEXP SYNTAX:
+//
+// This module uses the re2 library and hence supports
+// its syntax for regular expressions, which is similar to Perl's with
+// some of the more complicated things thrown away.  In particular,
+// backreferences and generalized assertions are not available, nor is \Z.
+//
+// See https://github.com/google/re2/wiki/Syntax for the syntax
+// supported by RE2, and a comparison with PCRE and PERL regexps.
+//
+// For those not familiar with Perl's regular expressions,
+// here are some examples of the most commonly used extensions:
+//
+//   "hello (\\w+) world"  -- \w matches a "word" character
+//   "version (\\d+)"      -- \d matches a digit
+//   "hello\\s+world"      -- \s matches any whitespace character
+//   "\\b(\\w+)\\b"        -- \b matches non-empty string at word boundary
+//   "(?i)hello"           -- (?i) turns on case-insensitive matching
+//   "/\\*(.*?)\\*/"       -- .*? matches . minimum no. of times possible
+//
+// The double backslashes are needed when writing C++ string literals.
+// However, they should NOT be used when writing C++11 raw string literals:
+//
+//   R"(hello (\w+) world)"  -- \w matches a "word" character
+//   R"(version (\d+))"      -- \d matches a digit
+//   R"(hello\s+world)"      -- \s matches any whitespace character
+//   R"(\b(\w+)\b)"          -- \b matches non-empty string at word boundary
+//   R"((?i)hello)"          -- (?i) turns on case-insensitive matching
+//   R"(/\*(.*?)\*/)"        -- .*? matches . minimum no. of times possible
+//
+// When using UTF-8 encoding, case-insensitive matching will perform
+// simple case folding, not full case folding.
+//
+// -----------------------------------------------------------------------
+// MATCHING INTERFACE:
+//
+// The "FullMatch" operation checks that supplied text matches a
+// supplied pattern exactly.
+//
+// Example: successful match
+//    CHECK(RE2::FullMatch("hello", "h.*o"));
+//
+// Example: unsuccessful match (requires full match):
+//    CHECK(!RE2::FullMatch("hello", "e"));
+//
+// -----------------------------------------------------------------------
+// UTF-8 AND THE MATCHING INTERFACE:
+//
+// By default, the pattern and input text are interpreted as UTF-8.
+// The RE2::Latin1 option causes them to be interpreted as Latin-1.
+//
+// Example:
+//    CHECK(RE2::FullMatch(utf8_string, RE2(utf8_pattern)));
+//    CHECK(RE2::FullMatch(latin1_string, RE2(latin1_pattern, RE2::Latin1)));
+//
+// -----------------------------------------------------------------------
+// MATCHING WITH SUBSTRING EXTRACTION:
+//
+// You can supply extra pointer arguments to extract matched substrings.
+// On match failure, none of the pointees will have been modified.
+// On match success, the substrings will be converted (as necessary) and
+// their values will be assigned to their pointees until all conversions
+// have succeeded or one conversion has failed.
+// On conversion failure, the pointees will be in an indeterminate state
+// because the caller has no way of knowing which conversion failed.
+// However, conversion cannot fail for types like string and StringPiece
+// that do not inspect the substring contents. Hence, in the common case
+// where all of the pointees are of such types, failure is always due to
+// match failure and thus none of the pointees will have been modified.
+//
+// Example: extracts "ruby" into "s" and 1234 into "i"
+//    int i;
+//    std::string s;
+//    CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
+//
+// Example: fails because string cannot be stored in integer
+//    CHECK(!RE2::FullMatch("ruby", "(.*)", &i));
+//
+// Example: fails because there aren't enough sub-patterns
+//    CHECK(!RE2::FullMatch("ruby:1234", "\\w+:\\d+", &s));
+//
+// Example: does not try to extract any extra sub-patterns
+//    CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s));
+//
+// Example: does not try to extract into NULL
+//    CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", NULL, &i));
+//
+// Example: integer overflow causes failure
+//    CHECK(!RE2::FullMatch("ruby:1234567891234", "\\w+:(\\d+)", &i));
+//
+// NOTE(rsc): Asking for substrings slows successful matches quite a bit.
+// This may get a little faster in the future, but right now is slower
+// than PCRE.  On the other hand, failed matches run *very* fast (faster
+// than PCRE), as do matches without substring extraction.
+//
+// -----------------------------------------------------------------------
+// PARTIAL MATCHES
+//
+// You can use the "PartialMatch" operation when you want the pattern
+// to match any substring of the text.
+//
+// Example: simple search for a string:
+//      CHECK(RE2::PartialMatch("hello", "ell"));
+//
+// Example: find first number in a string
+//      int number;
+//      CHECK(RE2::PartialMatch("x*100 + 20", "(\\d+)", &number));
+//      CHECK_EQ(number, 100);
+//
+// -----------------------------------------------------------------------
+// PRE-COMPILED REGULAR EXPRESSIONS
+//
+// RE2 makes it easy to use any string as a regular expression, without
+// requiring a separate compilation step.
+//
+// If speed is of the essence, you can create a pre-compiled "RE2"
+// object from the pattern and use it multiple times.  If you do so,
+// you can typically parse text faster than with sscanf.
+//
+// Example: precompile pattern for faster matching:
+//    RE2 pattern("h.*o");
+//    while (ReadLine(&str)) {
+//      if (RE2::FullMatch(str, pattern)) ...;
+//    }
+//
+// -----------------------------------------------------------------------
+// SCANNING TEXT INCREMENTALLY
+//
+// The "Consume" operation may be useful if you want to repeatedly
+// match regular expressions at the front of a string and skip over
+// them as they match.  This requires use of the "StringPiece" type,
+// which represents a sub-range of a real string.
+//
+// Example: read lines of the form "var = value" from a string.
+//      std::string contents = ...;     // Fill string somehow
+//      StringPiece input(contents);    // Wrap a StringPiece around it
+//
+//      std::string var;
+//      int value;
+//      while (RE2::Consume(&input, "(\\w+) = (\\d+)\n", &var, &value)) {
+//        ...;
+//      }
+//
+// Each successful call to "Consume" will set "var/value", and also
+// advance "input" so it points past the matched text.  Note that if the
+// regular expression matches an empty string, input will advance
+// by 0 bytes.  If the regular expression being used might match
+// an empty string, the loop body must check for this case and either
+// advance the string or break out of the loop.
+//
+// The "FindAndConsume" operation is similar to "Consume" but does not
+// anchor your match at the beginning of the string.  For example, you
+// could extract all words from a string by repeatedly calling
+//     RE2::FindAndConsume(&input, "(\\w+)", &word)
+//
+// -----------------------------------------------------------------------
+// USING VARIABLE NUMBER OF ARGUMENTS
+//
+// The above operations require you to know the number of arguments
+// when you write the code.  This is not always possible or easy (for
+// example, the regular expression may be calculated at run time).
+// You can use the "N" version of the operations when the number of
+// match arguments are determined at run time.
+//
+// Example:
+//   const RE2::Arg* args[10];
+//   int n;
+//   // ... populate args with pointers to RE2::Arg values ...
+//   // ... set n to the number of RE2::Arg objects ...
+//   bool match = RE2::FullMatchN(input, pattern, args, n);
+//
+// The last statement is equivalent to
+//
+//   bool match = RE2::FullMatch(input, pattern,
+//                               *args[0], *args[1], ..., *args[n - 1]);
+//
+// -----------------------------------------------------------------------
+// PARSING HEX/OCTAL/C-RADIX NUMBERS
+//
+// By default, if you pass a pointer to a numeric value, the
+// corresponding text is interpreted as a base-10 number.  You can
+// instead wrap the pointer with a call to one of the operators Hex(),
+// Octal(), or CRadix() to interpret the text in another base.  The
+// CRadix operator interprets C-style "0" (base-8) and "0x" (base-16)
+// prefixes, but defaults to base-10.
+//
+// Example:
+//   int a, b, c, d;
+//   CHECK(RE2::FullMatch("100 40 0100 0x40", "(.*) (.*) (.*) (.*)",
+//         RE2::Octal(&a), RE2::Hex(&b), RE2::CRadix(&c), RE2::CRadix(&d));
+// will leave 64 in a, b, c, and d.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <algorithm>
+#include <map>
+#include <mutex>
+#include <string>
+#include <type_traits>
+#include <vector>
+
+#if defined(__APPLE__)
+#include <TargetConditionals.h>
+#endif
+
+#include "re2/stringpiece.h"
+
+namespace re2 {
+class Prog;
+class Regexp;
+}  // namespace re2
+
+namespace re2 {
+
+// Interface for regular expression matching.  Also corresponds to a
+// pre-compiled regular expression.  An "RE2" object is safe for
+// concurrent use by multiple threads.
+class RE2 {
+ public:
+  // We convert user-passed pointers into special Arg objects
+  class Arg;
+  class Options;
+
+  // Defined in set.h.
+  class Set;
+
+  enum ErrorCode {
+    NoError = 0,
+
+    // Unexpected error
+    ErrorInternal,
+
+    // Parse errors
+    ErrorBadEscape,          // bad escape sequence
+    ErrorBadCharClass,       // bad character class
+    ErrorBadCharRange,       // bad character class range
+    ErrorMissingBracket,     // missing closing ]
+    ErrorMissingParen,       // missing closing )
+    ErrorUnexpectedParen,    // unexpected closing )
+    ErrorTrailingBackslash,  // trailing \ at end of regexp
+    ErrorRepeatArgument,     // repeat argument missing, e.g. "*"
+    ErrorRepeatSize,         // bad repetition argument
+    ErrorRepeatOp,           // bad repetition operator
+    ErrorBadPerlOp,          // bad perl operator
+    ErrorBadUTF8,            // invalid UTF-8 in regexp
+    ErrorBadNamedCapture,    // bad named capture group
+    ErrorPatternTooLarge     // pattern too large (compile failed)
+  };
+
+  // Predefined common options.
+  // If you need more complicated things, instantiate
+  // an Option class, possibly passing one of these to
+  // the Option constructor, change the settings, and pass that
+  // Option class to the RE2 constructor.
+  enum CannedOptions {
+    DefaultOptions = 0,
+    Latin1, // treat input as Latin-1 (default UTF-8)
+    POSIX, // POSIX syntax, leftmost-longest match
+    Quiet // do not log about regexp parse errors
+  };
+
+  // Need to have the const char* and const std::string& forms for implicit
+  // conversions when passing string literals to FullMatch and PartialMatch.
+  // Otherwise the StringPiece form would be sufficient.
+#ifndef SWIG
+  RE2(const char* pattern);
+  RE2(const std::string& pattern);
+#endif
+  RE2(const StringPiece& pattern);
+  RE2(const StringPiece& pattern, const Options& options);
+  ~RE2();
+
+  // Returns whether RE2 was created properly.
+  bool ok() const { return error_code() == NoError; }
+
+  // The string specification for this RE2.  E.g.
+  //   RE2 re("ab*c?d+");
+  //   re.pattern();    // "ab*c?d+"
+  const std::string& pattern() const { return pattern_; }
+
+  // If RE2 could not be created properly, returns an error string.
+  // Else returns the empty string.
+  const std::string& error() const { return *error_; }
+
+  // If RE2 could not be created properly, returns an error code.
+  // Else returns RE2::NoError (== 0).
+  ErrorCode error_code() const { return error_code_; }
+
+  // If RE2 could not be created properly, returns the offending
+  // portion of the regexp.
+  const std::string& error_arg() const { return error_arg_; }
+
+  // Returns the program size, a very approximate measure of a regexp's "cost".
+  // Larger numbers are more expensive than smaller numbers.
+  int ProgramSize() const;
+  int ReverseProgramSize() const;
+
+  // If histogram is not null, outputs the program fanout
+  // as a histogram bucketed by powers of 2.
+  // Returns the number of the largest non-empty bucket.
+  int ProgramFanout(std::vector<int>* histogram) const;
+  int ReverseProgramFanout(std::vector<int>* histogram) const;
+
+  // Returns the underlying Regexp; not for general use.
+  // Returns entire_regexp_ so that callers don't need
+  // to know about prefix_ and prefix_foldcase_.
+  re2::Regexp* Regexp() const { return entire_regexp_; }
+
+  /***** The array-based matching interface ******/
+
+  // The functions here have names ending in 'N' and are used to implement
+  // the functions whose names are the prefix before the 'N'. It is sometimes
+  // useful to invoke them directly, but the syntax is awkward, so the 'N'-less
+  // versions should be preferred.
+  static bool FullMatchN(const StringPiece& text, const RE2& re,
+                         const Arg* const args[], int n);
+  static bool PartialMatchN(const StringPiece& text, const RE2& re,
+                            const Arg* const args[], int n);
+  static bool ConsumeN(StringPiece* input, const RE2& re,
+                       const Arg* const args[], int n);
+  static bool FindAndConsumeN(StringPiece* input, const RE2& re,
+                              const Arg* const args[], int n);
+
+#ifndef SWIG
+ private:
+  template <typename F, typename SP>
+  static inline bool Apply(F f, SP sp, const RE2& re) {
+    return f(sp, re, NULL, 0);
+  }
+
+  template <typename F, typename SP, typename... A>
+  static inline bool Apply(F f, SP sp, const RE2& re, const A&... a) {
+    const Arg* const args[] = {&a...};
+    const int n = sizeof...(a);
+    return f(sp, re, args, n);
+  }
+
+ public:
+  // In order to allow FullMatch() et al. to be called with a varying number
+  // of arguments of varying types, we use two layers of variadic templates.
+  // The first layer constructs the temporary Arg objects. The second layer
+  // (above) constructs the array of pointers to the temporary Arg objects.
+
+  /***** The useful part: the matching interface *****/
+
+  // Matches "text" against "re".  If pointer arguments are
+  // supplied, copies matched sub-patterns into them.
+  //
+  // You can pass in a "const char*" or a "std::string" for "text".
+  // You can pass in a "const char*" or a "std::string" or a "RE2" for "re".
+  //
+  // The provided pointer arguments can be pointers to any scalar numeric
+  // type, or one of:
+  //    std::string     (matched piece is copied to string)
+  //    StringPiece     (StringPiece is mutated to point to matched piece)
+  //    T               (where "bool T::ParseFrom(const char*, size_t)" exists)
+  //    (void*)NULL     (the corresponding matched sub-pattern is not copied)
+  //
+  // Returns true iff all of the following conditions are satisfied:
+  //   a. "text" matches "re" fully - from the beginning to the end of "text".
+  //   b. The number of matched sub-patterns is >= number of supplied pointers.
+  //   c. The "i"th argument has a suitable type for holding the
+  //      string captured as the "i"th sub-pattern.  If you pass in
+  //      NULL for the "i"th argument, or pass fewer arguments than
+  //      number of sub-patterns, the "i"th captured sub-pattern is
+  //      ignored.
+  //
+  // CAVEAT: An optional sub-pattern that does not exist in the
+  // matched string is assigned the empty string.  Therefore, the
+  // following will return false (because the empty string is not a
+  // valid number):
+  //    int number;
+  //    RE2::FullMatch("abc", "[a-z]+(\\d+)?", &number);
+  template <typename... A>
+  static bool FullMatch(const StringPiece& text, const RE2& re, A&&... a) {
+    return Apply(FullMatchN, text, re, Arg(std::forward<A>(a))...);
+  }
+
+  // Like FullMatch(), except that "re" is allowed to match a substring
+  // of "text".
+  //
+  // Returns true iff all of the following conditions are satisfied:
+  //   a. "text" matches "re" partially - for some substring of "text".
+  //   b. The number of matched sub-patterns is >= number of supplied pointers.
+  //   c. The "i"th argument has a suitable type for holding the
+  //      string captured as the "i"th sub-pattern.  If you pass in
+  //      NULL for the "i"th argument, or pass fewer arguments than
+  //      number of sub-patterns, the "i"th captured sub-pattern is
+  //      ignored.
+  template <typename... A>
+  static bool PartialMatch(const StringPiece& text, const RE2& re, A&&... a) {
+    return Apply(PartialMatchN, text, re, Arg(std::forward<A>(a))...);
+  }
+
+  // Like FullMatch() and PartialMatch(), except that "re" has to match
+  // a prefix of the text, and "input" is advanced past the matched
+  // text.  Note: "input" is modified iff this routine returns true
+  // and "re" matched a non-empty substring of "input".
+  //
+  // Returns true iff all of the following conditions are satisfied:
+  //   a. "input" matches "re" partially - for some prefix of "input".
+  //   b. The number of matched sub-patterns is >= number of supplied pointers.
+  //   c. The "i"th argument has a suitable type for holding the
+  //      string captured as the "i"th sub-pattern.  If you pass in
+  //      NULL for the "i"th argument, or pass fewer arguments than
+  //      number of sub-patterns, the "i"th captured sub-pattern is
+  //      ignored.
+  template <typename... A>
+  static bool Consume(StringPiece* input, const RE2& re, A&&... a) {
+    return Apply(ConsumeN, input, re, Arg(std::forward<A>(a))...);
+  }
+
+  // Like Consume(), but does not anchor the match at the beginning of
+  // the text.  That is, "re" need not start its match at the beginning
+  // of "input".  For example, "FindAndConsume(s, "(\\w+)", &word)" finds
+  // the next word in "s" and stores it in "word".
+  //
+  // Returns true iff all of the following conditions are satisfied:
+  //   a. "input" matches "re" partially - for some substring of "input".
+  //   b. The number of matched sub-patterns is >= number of supplied pointers.
+  //   c. The "i"th argument has a suitable type for holding the
+  //      string captured as the "i"th sub-pattern.  If you pass in
+  //      NULL for the "i"th argument, or pass fewer arguments than
+  //      number of sub-patterns, the "i"th captured sub-pattern is
+  //      ignored.
+  template <typename... A>
+  static bool FindAndConsume(StringPiece* input, const RE2& re, A&&... a) {
+    return Apply(FindAndConsumeN, input, re, Arg(std::forward<A>(a))...);
+  }
+#endif
+
+  // Replace the first match of "re" in "str" with "rewrite".
+  // Within "rewrite", backslash-escaped digits (\1 to \9) can be
+  // used to insert text matching corresponding parenthesized group
+  // from the pattern.  \0 in "rewrite" refers to the entire matching
+  // text.  E.g.,
+  //
+  //   std::string s = "yabba dabba doo";
+  //   CHECK(RE2::Replace(&s, "b+", "d"));
+  //
+  // will leave "s" containing "yada dabba doo"
+  //
+  // Returns true if the pattern matches and a replacement occurs,
+  // false otherwise.
+  static bool Replace(std::string* str,
+                      const RE2& re,
+                      const StringPiece& rewrite);
+
+  // Like Replace(), except replaces successive non-overlapping occurrences
+  // of the pattern in the string with the rewrite. E.g.
+  //
+  //   std::string s = "yabba dabba doo";
+  //   CHECK(RE2::GlobalReplace(&s, "b+", "d"));
+  //
+  // will leave "s" containing "yada dada doo"
+  // Replacements are not subject to re-matching.
+  //
+  // Because GlobalReplace only replaces non-overlapping matches,
+  // replacing "ana" within "banana" makes only one replacement, not two.
+  //
+  // Returns the number of replacements made.
+  static int GlobalReplace(std::string* str,
+                           const RE2& re,
+                           const StringPiece& rewrite);
+
+  // Like Replace, except that if the pattern matches, "rewrite"
+  // is copied into "out" with substitutions.  The non-matching
+  // portions of "text" are ignored.
+  //
+  // Returns true iff a match occurred and the extraction happened
+  // successfully;  if no match occurs, the string is left unaffected.
+  //
+  // REQUIRES: "text" must not alias any part of "*out".
+  static bool Extract(const StringPiece& text,
+                      const RE2& re,
+                      const StringPiece& rewrite,
+                      std::string* out);
+
+  // Escapes all potentially meaningful regexp characters in
+  // 'unquoted'.  The returned string, used as a regular expression,
+  // will match exactly the original string.  For example,
+  //           1.5-2.0?
+  // may become:
+  //           1\.5\-2\.0\?
+  static std::string QuoteMeta(const StringPiece& unquoted);
+
+  // Computes range for any strings matching regexp. The min and max can in
+  // some cases be arbitrarily precise, so the caller gets to specify the
+  // maximum desired length of string returned.
+  //
+  // Assuming PossibleMatchRange(&min, &max, N) returns successfully, any
+  // string s that is an anchored match for this regexp satisfies
+  //   min <= s && s <= max.
+  //
+  // Note that PossibleMatchRange() will only consider the first copy of an
+  // infinitely repeated element (i.e., any regexp element followed by a '*' or
+  // '+' operator). Regexps with "{N}" constructions are not affected, as those
+  // do not compile down to infinite repetitions.
+  //
+  // Returns true on success, false on error.
+  bool PossibleMatchRange(std::string* min, std::string* max,
+                          int maxlen) const;
+
+  // Generic matching interface
+
+  // Type of match.
+  enum Anchor {
+    UNANCHORED,         // No anchoring
+    ANCHOR_START,       // Anchor at start only
+    ANCHOR_BOTH         // Anchor at start and end
+  };
+
+  // Return the number of capturing subpatterns, or -1 if the
+  // regexp wasn't valid on construction.  The overall match ($0)
+  // does not count: if the regexp is "(a)(b)", returns 2.
+  int NumberOfCapturingGroups() const { return num_captures_; }
+
+  // Return a map from names to capturing indices.
+  // The map records the index of the leftmost group
+  // with the given name.
+  // Only valid until the re is deleted.
+  const std::map<std::string, int>& NamedCapturingGroups() const;
+
+  // Return a map from capturing indices to names.
+  // The map has no entries for unnamed groups.
+  // Only valid until the re is deleted.
+  const std::map<int, std::string>& CapturingGroupNames() const;
+
+  // General matching routine.
+  // Match against text starting at offset startpos
+  // and stopping the search at offset endpos.
+  // Returns true if match found, false if not.
+  // On a successful match, fills in submatch[] (up to nsubmatch entries)
+  // with information about submatches.
+  // I.e. matching RE2("(foo)|(bar)baz") on "barbazbla" will return true, with
+  // submatch[0] = "barbaz", submatch[1].data() = NULL, submatch[2] = "bar",
+  // submatch[3].data() = NULL, ..., up to submatch[nsubmatch-1].data() = NULL.
+  // Caveat: submatch[] may be clobbered even on match failure.
+  //
+  // Don't ask for more match information than you will use:
+  // runs much faster with nsubmatch == 1 than nsubmatch > 1, and
+  // runs even faster if nsubmatch == 0.
+  // Doesn't make sense to use nsubmatch > 1 + NumberOfCapturingGroups(),
+  // but will be handled correctly.
+  //
+  // Passing text == StringPiece(NULL, 0) will be handled like any other
+  // empty string, but note that on return, it will not be possible to tell
+  // whether submatch i matched the empty string or did not match:
+  // either way, submatch[i].data() == NULL.
+  bool Match(const StringPiece& text,
+             size_t startpos,
+             size_t endpos,
+             Anchor re_anchor,
+             StringPiece* submatch,
+             int nsubmatch) const;
+
+  // Check that the given rewrite string is suitable for use with this
+  // regular expression.  It checks that:
+  //   * The regular expression has enough parenthesized subexpressions
+  //     to satisfy all of the \N tokens in rewrite
+  //   * The rewrite string doesn't have any syntax errors.  E.g.,
+  //     '\' followed by anything other than a digit or '\'.
+  // A true return value guarantees that Replace() and Extract() won't
+  // fail because of a bad rewrite string.
+  bool CheckRewriteString(const StringPiece& rewrite,
+                          std::string* error) const;
+
+  // Returns the maximum submatch needed for the rewrite to be done by
+  // Replace(). E.g. if rewrite == "foo \\2,\\1", returns 2.
+  static int MaxSubmatch(const StringPiece& rewrite);
+
+  // Append the "rewrite" string, with backslash subsitutions from "vec",
+  // to string "out".
+  // Returns true on success.  This method can fail because of a malformed
+  // rewrite string.  CheckRewriteString guarantees that the rewrite will
+  // be sucessful.
+  bool Rewrite(std::string* out,
+               const StringPiece& rewrite,
+               const StringPiece* vec,
+               int veclen) const;
+
+  // Constructor options
+  class Options {
+   public:
+    // The options are (defaults in parentheses):
+    //
+    //   utf8             (true)  text and pattern are UTF-8; otherwise Latin-1
+    //   posix_syntax     (false) restrict regexps to POSIX egrep syntax
+    //   longest_match    (false) search for longest match, not first match
+    //   log_errors       (true)  log syntax and execution errors to ERROR
+    //   max_mem          (see below)  approx. max memory footprint of RE2
+    //   literal          (false) interpret string as literal, not regexp
+    //   never_nl         (false) never match \n, even if it is in regexp
+    //   dot_nl           (false) dot matches everything including new line
+    //   never_capture    (false) parse all parens as non-capturing
+    //   case_sensitive   (true)  match is case-sensitive (regexp can override
+    //                              with (?i) unless in posix_syntax mode)
+    //
+    // The following options are only consulted when posix_syntax == true.
+    // When posix_syntax == false, these features are always enabled and
+    // cannot be turned off; to perform multi-line matching in that case,
+    // begin the regexp with (?m).
+    //   perl_classes     (false) allow Perl's \d \s \w \D \S \W
+    //   word_boundary    (false) allow Perl's \b \B (word boundary and not)
+    //   one_line         (false) ^ and $ only match beginning and end of text
+    //
+    // The max_mem option controls how much memory can be used
+    // to hold the compiled form of the regexp (the Prog) and
+    // its cached DFA graphs.  Code Search placed limits on the number
+    // of Prog instructions and DFA states: 10,000 for both.
+    // In RE2, those limits would translate to about 240 KB per Prog
+    // and perhaps 2.5 MB per DFA (DFA state sizes vary by regexp; RE2 does a
+    // better job of keeping them small than Code Search did).
+    // Each RE2 has two Progs (one forward, one reverse), and each Prog
+    // can have two DFAs (one first match, one longest match).
+    // That makes 4 DFAs:
+    //
+    //   forward, first-match    - used for UNANCHORED or ANCHOR_START searches
+    //                               if opt.longest_match() == false
+    //   forward, longest-match  - used for all ANCHOR_BOTH searches,
+    //                               and the other two kinds if
+    //                               opt.longest_match() == true
+    //   reverse, first-match    - never used
+    //   reverse, longest-match  - used as second phase for unanchored searches
+    //
+    // The RE2 memory budget is statically divided between the two
+    // Progs and then the DFAs: two thirds to the forward Prog
+    // and one third to the reverse Prog.  The forward Prog gives half
+    // of what it has left over to each of its DFAs.  The reverse Prog
+    // gives it all to its longest-match DFA.
+    //
+    // Once a DFA fills its budget, it flushes its cache and starts over.
+    // If this happens too often, RE2 falls back on the NFA implementation.
+
+    // For now, make the default budget something close to Code Search.
+    static const int kDefaultMaxMem = 8<<20;
+
+    enum Encoding {
+      EncodingUTF8 = 1,
+      EncodingLatin1
+    };
+
+    Options() :
+      encoding_(EncodingUTF8),
+      posix_syntax_(false),
+      longest_match_(false),
+      log_errors_(true),
+      max_mem_(kDefaultMaxMem),
+      literal_(false),
+      never_nl_(false),
+      dot_nl_(false),
+      never_capture_(false),
+      case_sensitive_(true),
+      perl_classes_(false),
+      word_boundary_(false),
+      one_line_(false) {
+    }
+
+    /*implicit*/ Options(CannedOptions);
+
+    Encoding encoding() const { return encoding_; }
+    void set_encoding(Encoding encoding) { encoding_ = encoding; }
+
+    bool posix_syntax() const { return posix_syntax_; }
+    void set_posix_syntax(bool b) { posix_syntax_ = b; }
+
+    bool longest_match() const { return longest_match_; }
+    void set_longest_match(bool b) { longest_match_ = b; }
+
+    bool log_errors() const { return log_errors_; }
+    void set_log_errors(bool b) { log_errors_ = b; }
+
+    int64_t max_mem() const { return max_mem_; }
+    void set_max_mem(int64_t m) { max_mem_ = m; }
+
+    bool literal() const { return literal_; }
+    void set_literal(bool b) { literal_ = b; }
+
+    bool never_nl() const { return never_nl_; }
+    void set_never_nl(bool b) { never_nl_ = b; }
+
+    bool dot_nl() const { return dot_nl_; }
+    void set_dot_nl(bool b) { dot_nl_ = b; }
+
+    bool never_capture() const { return never_capture_; }
+    void set_never_capture(bool b) { never_capture_ = b; }
+
+    bool case_sensitive() const { return case_sensitive_; }
+    void set_case_sensitive(bool b) { case_sensitive_ = b; }
+
+    bool perl_classes() const { return perl_classes_; }
+    void set_perl_classes(bool b) { perl_classes_ = b; }
+
+    bool word_boundary() const { return word_boundary_; }
+    void set_word_boundary(bool b) { word_boundary_ = b; }
+
+    bool one_line() const { return one_line_; }
+    void set_one_line(bool b) { one_line_ = b; }
+
+    void Copy(const Options& src) {
+      *this = src;
+    }
+
+    int ParseFlags() const;
+
+   private:
+    Encoding encoding_;
+    bool posix_syntax_;
+    bool longest_match_;
+    bool log_errors_;
+    int64_t max_mem_;
+    bool literal_;
+    bool never_nl_;
+    bool dot_nl_;
+    bool never_capture_;
+    bool case_sensitive_;
+    bool perl_classes_;
+    bool word_boundary_;
+    bool one_line_;
+  };
+
+  // Returns the options set in the constructor.
+  const Options& options() const { return options_; }
+
+  // Argument converters; see below.
+  template <typename T>
+  static Arg CRadix(T* ptr);
+  template <typename T>
+  static Arg Hex(T* ptr);
+  template <typename T>
+  static Arg Octal(T* ptr);
+
+ private:
+  void Init(const StringPiece& pattern, const Options& options);
+
+  bool DoMatch(const StringPiece& text,
+               Anchor re_anchor,
+               size_t* consumed,
+               const Arg* const args[],
+               int n) const;
+
+  re2::Prog* ReverseProg() const;
+
+  std::string pattern_;         // string regular expression
+  Options options_;             // option flags
+  re2::Regexp* entire_regexp_;  // parsed regular expression
+  const std::string* error_;    // error indicator (or points to empty string)
+  ErrorCode error_code_;        // error code
+  std::string error_arg_;       // fragment of regexp showing error
+  std::string prefix_;          // required prefix (before suffix_regexp_)
+  bool prefix_foldcase_;        // prefix_ is ASCII case-insensitive
+  re2::Regexp* suffix_regexp_;  // parsed regular expression, prefix_ removed
+  re2::Prog* prog_;             // compiled program for regexp
+  int num_captures_;            // number of capturing groups
+  bool is_one_pass_;            // can use prog_->SearchOnePass?
+
+  // Reverse Prog for DFA execution only
+  mutable re2::Prog* rprog_;
+  // Map from capture names to indices
+  mutable const std::map<std::string, int>* named_groups_;
+  // Map from capture indices to names
+  mutable const std::map<int, std::string>* group_names_;
+
+  mutable std::once_flag rprog_once_;
+  mutable std::once_flag named_groups_once_;
+  mutable std::once_flag group_names_once_;
+
+  RE2(const RE2&) = delete;
+  RE2& operator=(const RE2&) = delete;
+};
+
+/***** Implementation details *****/
+
+namespace re2_internal {
+
+// Types for which the 3-ary Parse() function template has specializations.
+template <typename T> struct Parse3ary : public std::false_type {};
+template <> struct Parse3ary<void> : public std::true_type {};
+template <> struct Parse3ary<std::string> : public std::true_type {};
+template <> struct Parse3ary<StringPiece> : public std::true_type {};
+template <> struct Parse3ary<char> : public std::true_type {};
+template <> struct Parse3ary<signed char> : public std::true_type {};
+template <> struct Parse3ary<unsigned char> : public std::true_type {};
+template <> struct Parse3ary<float> : public std::true_type {};
+template <> struct Parse3ary<double> : public std::true_type {};
+
+template <typename T>
+bool Parse(const char* str, size_t n, T* dest);
+
+// Types for which the 4-ary Parse() function template has specializations.
+template <typename T> struct Parse4ary : public std::false_type {};
+template <> struct Parse4ary<long> : public std::true_type {};
+template <> struct Parse4ary<unsigned long> : public std::true_type {};
+template <> struct Parse4ary<short> : public std::true_type {};
+template <> struct Parse4ary<unsigned short> : public std::true_type {};
+template <> struct Parse4ary<int> : public std::true_type {};
+template <> struct Parse4ary<unsigned int> : public std::true_type {};
+template <> struct Parse4ary<long long> : public std::true_type {};
+template <> struct Parse4ary<unsigned long long> : public std::true_type {};
+
+template <typename T>
+bool Parse(const char* str, size_t n, T* dest, int radix);
+
+}  // namespace re2_internal
+
+class RE2::Arg {
+ private:
+  template <typename T>
+  using CanParse3ary = typename std::enable_if<
+      re2_internal::Parse3ary<T>::value,
+      int>::type;
+
+  template <typename T>
+  using CanParse4ary = typename std::enable_if<
+      re2_internal::Parse4ary<T>::value,
+      int>::type;
+
+#if !defined(_MSC_VER)
+  template <typename T>
+  using CanParseFrom = typename std::enable_if<
+      std::is_member_function_pointer<
+          decltype(static_cast<bool (T::*)(const char*, size_t)>(
+              &T::ParseFrom))>::value,
+      int>::type;
+#endif
+
+ public:
+  Arg() : Arg(nullptr) {}
+  Arg(std::nullptr_t ptr) : arg_(ptr), parser_(DoNothing) {}
+
+  template <typename T, CanParse3ary<T> = 0>
+  Arg(T* ptr) : arg_(ptr), parser_(DoParse3ary<T>) {}
+
+  template <typename T, CanParse4ary<T> = 0>
+  Arg(T* ptr) : arg_(ptr), parser_(DoParse4ary<T>) {}
+
+#if !defined(_MSC_VER)
+  template <typename T, CanParseFrom<T> = 0>
+  Arg(T* ptr) : arg_(ptr), parser_(DoParseFrom<T>) {}
+#endif
+
+  typedef bool (*Parser)(const char* str, size_t n, void* dest);
+
+  template <typename T>
+  Arg(T* ptr, Parser parser) : arg_(ptr), parser_(parser) {}
+
+  bool Parse(const char* str, size_t n) const {
+    return (*parser_)(str, n, arg_);
+  }
+
+ private:
+  static bool DoNothing(const char* /*str*/, size_t /*n*/, void* /*dest*/) {
+    return true;
+  }
+
+  template <typename T>
+  static bool DoParse3ary(const char* str, size_t n, void* dest) {
+    return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest));
+  }
+
+  template <typename T>
+  static bool DoParse4ary(const char* str, size_t n, void* dest) {
+    return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 10);
+  }
+
+#if !defined(_MSC_VER)
+  template <typename T>
+  static bool DoParseFrom(const char* str, size_t n, void* dest) {
+    if (dest == NULL) return true;
+    return reinterpret_cast<T*>(dest)->ParseFrom(str, n);
+  }
+#endif
+
+  void*         arg_;
+  Parser        parser_;
+};
+
+template <typename T>
+inline RE2::Arg RE2::CRadix(T* ptr) {
+  return RE2::Arg(ptr, [](const char* str, size_t n, void* dest) -> bool {
+    return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 0);
+  });
+}
+
+template <typename T>
+inline RE2::Arg RE2::Hex(T* ptr) {
+  return RE2::Arg(ptr, [](const char* str, size_t n, void* dest) -> bool {
+    return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 16);
+  });
+}
+
+template <typename T>
+inline RE2::Arg RE2::Octal(T* ptr) {
+  return RE2::Arg(ptr, [](const char* str, size_t n, void* dest) -> bool {
+    return re2_internal::Parse(str, n, reinterpret_cast<T*>(dest), 8);
+  });
+}
+
+#ifndef SWIG
+// Silence warnings about missing initializers for members of LazyRE2.
+#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 6
+#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
+#endif
+
+// Helper for writing global or static RE2s safely.
+// Write
+//     static LazyRE2 re = {".*"};
+// and then use *re instead of writing
+//     static RE2 re(".*");
+// The former is more careful about multithreaded
+// situations than the latter.
+//
+// N.B. This class never deletes the RE2 object that
+// it constructs: that's a feature, so that it can be used
+// for global and function static variables.
+class LazyRE2 {
+ private:
+  struct NoArg {};
+
+ public:
+  typedef RE2 element_type;  // support std::pointer_traits
+
+  // Constructor omitted to preserve braced initialization in C++98.
+
+  // Pretend to be a pointer to Type (never NULL due to on-demand creation):
+  RE2& operator*() const { return *get(); }
+  RE2* operator->() const { return get(); }
+
+  // Named accessor/initializer:
+  RE2* get() const {
+    std::call_once(once_, &LazyRE2::Init, this);
+    return ptr_;
+  }
+
+  // All data fields must be public to support {"foo"} initialization.
+  const char* pattern_;
+  RE2::CannedOptions options_;
+  NoArg barrier_against_excess_initializers_;
+
+  mutable RE2* ptr_;
+  mutable std::once_flag once_;
+
+ private:
+  static void Init(const LazyRE2* lazy_re2) {
+    lazy_re2->ptr_ = new RE2(lazy_re2->pattern_, lazy_re2->options_);
+  }
+
+  void operator=(const LazyRE2&);  // disallowed
+};
+#endif
+
+namespace hooks {
+
+// Most platforms support thread_local. Older versions of iOS don't support
+// thread_local, but for the sake of brevity, we lump together all versions
+// of Apple platforms that aren't macOS. If an iOS application really needs
+// the context pointee someday, we can get more specific then...
+//
+// As per https://github.com/google/re2/issues/325, thread_local support in
+// MinGW seems to be buggy. (FWIW, Abseil folks also avoid it.)
+#define RE2_HAVE_THREAD_LOCAL
+#if (defined(__APPLE__) && !TARGET_OS_OSX) || defined(__MINGW32__)
+#undef RE2_HAVE_THREAD_LOCAL
+#endif
+
+// A hook must not make any assumptions regarding the lifetime of the context
+// pointee beyond the current invocation of the hook. Pointers and references
+// obtained via the context pointee should be considered invalidated when the
+// hook returns. Hence, any data about the context pointee (e.g. its pattern)
+// would have to be copied in order for it to be kept for an indefinite time.
+//
+// A hook must not use RE2 for matching. Control flow reentering RE2::Match()
+// could result in infinite mutual recursion. To discourage that possibility,
+// RE2 will not maintain the context pointer correctly when used in that way.
+#ifdef RE2_HAVE_THREAD_LOCAL
+extern thread_local const RE2* context;
+#endif
+
+struct DFAStateCacheReset {
+  int64_t state_budget;
+  size_t state_cache_size;
+};
+
+struct DFASearchFailure {
+  // Nothing yet...
+};
+
+#define DECLARE_HOOK(type)                  \
+  using type##Callback = void(const type&); \
+  void Set##type##Hook(type##Callback* cb); \
+  type##Callback* Get##type##Hook();
+
+DECLARE_HOOK(DFAStateCacheReset)
+DECLARE_HOOK(DFASearchFailure)
+
+#undef DECLARE_HOOK
+
+}  // namespace hooks
+
+}  // namespace re2
+
+using re2::RE2;
+using re2::LazyRE2;
+
+#endif  // RE2_RE2_H_
diff --git a/re2/regex_internal.h b/re2/regex_internal.h
new file mode 100644
index 0000000000000000000000000000000000000000..5824c8073bb55e07548a7d5f206f0f217cc13a51
--- /dev/null
+++ b/re2/regex_internal.h
@@ -0,0 +1,57 @@
+// #include "re2/sparse_array.h"
+#include <rure.h>
+namespace re2 {
+// #include "re2/prog.h"
+// Compiled form of regexp program.
+ class Prog {
+   //rure 更名为 Prog
+};
+ 
+// #include "re2/regexp.h"
+class Regexp {
+ public:
+
+  // Flags for parsing.  Can be ORed together.
+  enum ParseFlags {
+    NoParseFlags  = 0,
+    FoldCase      = 1<<0,   // Fold case during matching (case-insensitive).
+    Literal       = 1<<1,   // Treat s as literal string instead of a regexp.
+    ClassNL       = 1<<2,   // Allow char classes like [^a-z] and \D and \s
+                            // and [[:space:]] to match newline.
+    DotNL         = 1<<3,   // Allow . to match newline.
+    MatchNL       = ClassNL | DotNL,
+    OneLine       = 1<<4,   // Treat ^ and $ as only matching at beginning and
+                            // end of text, not around embedded newlines.
+                            // (Perl's default)
+    Latin1        = 1<<5,   // Regexp and text are in Latin1, not UTF-8.
+    NonGreedy     = 1<<6,   // Repetition operators are non-greedy by default.
+    PerlClasses   = 1<<7,   // Allow Perl character classes like \d.
+    PerlB         = 1<<8,   // Allow Perl's \b and \B.
+    PerlX         = 1<<9,   // Perl extensions:
+                            //   non-capturing parens - (?: )
+                            //   non-greedy operators - *? +? ?? {}?
+                            //   flag edits - (?i) (?-i) (?i: )
+                            //     i - FoldCase
+                            //     m - !OneLine
+                            //     s - DotNL
+                            //     U - NonGreedy
+                            //   line ends: \A \z
+                            //   \Q and \E to disable/enable metacharacters
+                            //   (?P<name>expr) for named captures
+                            //   \C to match any single byte
+    UnicodeGroups = 1<<10,  // Allow \p{Han} for Unicode Han group
+                            //   and \P{Han} for its negation.
+    NeverNL       = 1<<11,  // Never match NL, even if the regexp mentions
+                            //   it explicitly.
+    NeverCapture  = 1<<12,  // Parse all parens as non-capturing.
+
+    // As close to Perl as we can get.
+    LikePerl      = ClassNL | OneLine | PerlClasses | PerlB | PerlX |
+                    UnicodeGroups,
+
+    // Internal use only.
+    WasDollar     = 1<<13,  // on kRegexpEndText: was $ in regexp text
+    AllParseFlags = (1<<14)-1,
+  };
+};
+};
\ No newline at end of file
diff --git a/re2/set.cc b/re2/set.cc
new file mode 100644
index 0000000000000000000000000000000000000000..f62fcd7088a00f8eefac0b57e0fe14c5878e95a8
--- /dev/null
+++ b/re2/set.cc
@@ -0,0 +1,178 @@
+// Copyright 2010 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "re2/set.h"
+
+#include <stddef.h>
+#include <algorithm>
+#include <memory>
+#include <utility>
+
+#include "util/util.h"
+#include "util/logging.h"
+// #include "re2/pod_array.h"
+// #include "re2/prog.h"
+#include "re2/re2.h"
+// #include "re2/regexp.h"
+#include "regex_internal.h"
+#include "re2/stringpiece.h"
+
+namespace re2 {
+
+RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor)
+    : options_(options),
+      anchor_(anchor),
+      compiled_(false),
+      size_(0) {
+  options_.set_never_capture(true);  // might unblock some optimisations
+}
+
+RE2::Set::~Set() {
+  for (size_t i = 0; i < elem_.size(); i++)
+    ;// elem_[i].second->Decref();
+}
+
+RE2::Set::Set(Set&& other)
+    : options_(other.options_),
+      anchor_(other.anchor_),
+      elem_(std::move(other.elem_)),
+      compiled_(other.compiled_),
+      size_(other.size_),
+      prog_(std::move(other.prog_)) {
+  other.elem_.clear();
+  other.elem_.shrink_to_fit();
+  other.compiled_ = false;
+  other.size_ = 0;
+  other.prog_.reset();
+}
+
+RE2::Set& RE2::Set::operator=(Set&& other) {
+  this->~Set();
+  (void) new (this) Set(std::move(other));
+  return *this;
+}
+
+int RE2::Set::Add(const StringPiece& pattern, std::string* error) {
+  // if (compiled_) {
+  //   LOG(DFATAL) << "RE2::Set::Add() called after compiling";
+  //   return -1;
+  // }
+
+  // Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
+  //   options_.ParseFlags());
+  // RegexpStatus status;
+  // re2::Regexp* re = Regexp::Parse(pattern, pf, &status);
+  // if (re == NULL) {
+  //   if (error != NULL)
+  //     *error = status.Text();
+  //   if (options_.log_errors())
+  //     LOG(ERROR) << "Error parsing '" << pattern << "': " << status.Text();
+  //   return -1;
+  // }
+
+  // // Concatenate with match index and push on vector.
+  // int n = static_cast<int>(elem_.size());
+  // re2::Regexp* m = re2::Regexp::HaveMatch(n, pf);
+  // if (re->op() == kRegexpConcat) {
+  //   int nsub = re->nsub();
+  //   PODArray<re2::Regexp*> sub(nsub + 1);
+  //   for (int i = 0; i < nsub; i++)
+  //     sub[i] = re->sub()[i]->Incref();
+  //   sub[nsub] = m;
+  //   re->Decref();
+  //   re = re2::Regexp::Concat(sub.data(), nsub + 1, pf);
+  // } else {
+  //   re2::Regexp* sub[2];
+  //   sub[0] = re;
+  //   sub[1] = m;
+  //   re = re2::Regexp::Concat(sub, 2, pf);
+  // }
+  // elem_.emplace_back(std::string(pattern), re);
+  // return n;
+  return 0;
+}
+
+bool RE2::Set::Compile() {
+  // if (compiled_) {
+  //   LOG(DFATAL) << "RE2::Set::Compile() called more than once";
+  //   return false;
+  // }
+  // compiled_ = true;
+  // size_ = static_cast<int>(elem_.size());
+
+  // // Sort the elements by their patterns. This is good enough for now
+  // // until we have a Regexp comparison function. (Maybe someday...)
+  // std::sort(elem_.begin(), elem_.end(),
+  //           [](const Elem& a, const Elem& b) -> bool {
+  //             return a.first < b.first;
+  //           });
+
+  // PODArray<re2::Regexp*> sub(size_);
+  // for (int i = 0; i < size_; i++)
+  //   sub[i] = elem_[i].second;
+  // elem_.clear();
+  // elem_.shrink_to_fit();
+
+  // Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
+  //   options_.ParseFlags());
+  // re2::Regexp* re = re2::Regexp::Alternate(sub.data(), size_, pf);
+
+  // prog_.reset(Prog::CompileSet(re, anchor_, options_.max_mem()));
+  // re->Decref();
+  return prog_ != nullptr;
+}
+
+bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v) const {
+  return Match(text, v, NULL);
+}
+
+bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v,
+                     ErrorInfo* error_info) const {
+//   if (!compiled_) {
+//     LOG(DFATAL) << "RE2::Set::Match() called before compiling";
+//     if (error_info != NULL)
+//       error_info->kind = kNotCompiled;
+//     return false;
+//   }
+// #ifdef RE2_HAVE_THREAD_LOCAL
+//   hooks::context = NULL;
+// #endif
+//   bool dfa_failed = false;
+//   std::unique_ptr<SparseSet> matches;
+//   if (v != NULL) {
+//     matches.reset(new SparseSet(size_));
+//     v->clear();
+//   }
+//   bool ret = prog_->SearchDFA(text, text, Prog::kAnchored, Prog::kManyMatch,
+//                               NULL, &dfa_failed, matches.get());
+//   if (dfa_failed) {
+//     if (options_.log_errors())
+//       LOG(ERROR) << "DFA out of memory: "
+//                  << "program size " << prog_->size() << ", "
+//                  << "list count " << prog_->list_count() << ", "
+//                  << "bytemap range " << prog_->bytemap_range();
+//     if (error_info != NULL)
+//       error_info->kind = kOutOfMemory;
+//     return false;
+//   }
+//   if (ret == false) {
+//     if (error_info != NULL)
+//       error_info->kind = kNoError;
+//     return false;
+//   }
+//   if (v != NULL) {
+//     if (matches->empty()) {
+//       LOG(DFATAL) << "RE2::Set::Match() matched, but no matches returned?!";
+//       if (error_info != NULL)
+//         error_info->kind = kInconsistent;
+//       return false;
+//     }
+//     v->assign(matches->begin(), matches->end());
+//   }
+//   if (error_info != NULL)
+//     error_info->kind = kNoError;
+  return true;
+}
+
+}  // namespace re2
diff --git a/re2/set.h b/re2/set.h
new file mode 100644
index 0000000000000000000000000000000000000000..8d64f30ccd94073058de740e22fb110d013de506
--- /dev/null
+++ b/re2/set.h
@@ -0,0 +1,85 @@
+// Copyright 2010 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef RE2_SET_H_
+#define RE2_SET_H_
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "re2/re2.h"
+
+namespace re2 {
+class Prog;
+class Regexp;
+}  // namespace re2
+
+namespace re2 {
+
+// An RE2::Set represents a collection of regexps that can
+// be searched for simultaneously.
+class RE2::Set {
+ public:
+  enum ErrorKind {
+    kNoError = 0,
+    kNotCompiled,   // The set is not compiled.
+    kOutOfMemory,   // The DFA ran out of memory.
+    kInconsistent,  // The result is inconsistent. This should never happen.
+  };
+
+  struct ErrorInfo {
+    ErrorKind kind;
+  };
+
+  Set(const RE2::Options& options, RE2::Anchor anchor);
+  ~Set();
+
+  // Not copyable.
+  Set(const Set&) = delete;
+  Set& operator=(const Set&) = delete;
+  // Movable.
+  Set(Set&& other);
+  Set& operator=(Set&& other);
+
+  // Adds pattern to the set using the options passed to the constructor.
+  // Returns the index that will identify the regexp in the output of Match(),
+  // or -1 if the regexp cannot be parsed.
+  // Indices are assigned in sequential order starting from 0.
+  // Errors do not increment the index; if error is not NULL, *error will hold
+  // the error message from the parser.
+  int Add(const StringPiece& pattern, std::string* error);
+
+  // Compiles the set in preparation for matching.
+  // Returns false if the compiler runs out of memory.
+  // Add() must not be called again after Compile().
+  // Compile() must be called before Match().
+  bool Compile();
+
+  // Returns true if text matches at least one of the regexps in the set.
+  // Fills v (if not NULL) with the indices of the matching regexps.
+  // Callers must not expect v to be sorted.
+  bool Match(const StringPiece& text, std::vector<int>* v) const;
+
+  // As above, but populates error_info (if not NULL) when none of the regexps
+  // in the set matched. This can inform callers when DFA execution fails, for
+  // example, because they might wish to handle that case differently.
+  bool Match(const StringPiece& text, std::vector<int>* v,
+             ErrorInfo* error_info) const;
+
+ private:
+  typedef std::pair<std::string, re2::Regexp*> Elem;
+
+  RE2::Options options_;
+  RE2::Anchor anchor_;
+  std::vector<Elem> elem_;
+  bool compiled_;
+  int size_;
+  std::unique_ptr<re2::Prog> prog_;
+};
+
+}  // namespace re2
+
+#endif  // RE2_SET_H_
diff --git a/re2/stringpiece.cc b/re2/stringpiece.cc
new file mode 100644
index 0000000000000000000000000000000000000000..ef2e2874ead91d1324d08bf6a0e7c2c528e6e2d8
--- /dev/null
+++ b/re2/stringpiece.cc
@@ -0,0 +1,65 @@
+// Copyright 2004 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "re2/stringpiece.h"
+
+#include <ostream>
+
+#include "util/util.h"
+
+namespace re2 {
+
+const StringPiece::size_type StringPiece::npos;  // initialized in stringpiece.h
+
+StringPiece::size_type StringPiece::copy(char* buf, size_type n,
+                                         size_type pos) const {
+  size_type ret = std::min(size_ - pos, n);
+  memcpy(buf, data_ + pos, ret);
+  return ret;
+}
+
+StringPiece StringPiece::substr(size_type pos, size_type n) const {
+  if (pos > size_) pos = size_;
+  if (n > size_ - pos) n = size_ - pos;
+  return StringPiece(data_ + pos, n);
+}
+
+StringPiece::size_type StringPiece::find(const StringPiece& s,
+                                         size_type pos) const {
+  if (pos > size_) return npos;
+  const_pointer result = std::search(data_ + pos, data_ + size_,
+                                     s.data_, s.data_ + s.size_);
+  size_type xpos = result - data_;
+  return xpos + s.size_ <= size_ ? xpos : npos;
+}
+
+StringPiece::size_type StringPiece::find(char c, size_type pos) const {
+  if (size_ <= 0 || pos >= size_) return npos;
+  const_pointer result = std::find(data_ + pos, data_ + size_, c);
+  return result != data_ + size_ ? result - data_ : npos;
+}
+
+StringPiece::size_type StringPiece::rfind(const StringPiece& s,
+                                          size_type pos) const {
+  if (size_ < s.size_) return npos;
+  if (s.size_ == 0) return std::min(size_, pos);
+  const_pointer last = data_ + std::min(size_ - s.size_, pos) + s.size_;
+  const_pointer result = std::find_end(data_, last, s.data_, s.data_ + s.size_);
+  return result != last ? result - data_ : npos;
+}
+
+StringPiece::size_type StringPiece::rfind(char c, size_type pos) const {
+  if (size_ <= 0) return npos;
+  for (size_t i = std::min(pos + 1, size_); i != 0;) {
+    if (data_[--i] == c) return i;
+  }
+  return npos;
+}
+
+std::ostream& operator<<(std::ostream& o, const StringPiece& p) {
+  o.write(p.data(), p.size());
+  return o;
+}
+
+}  // namespace re2
diff --git a/re2/stringpiece.h b/re2/stringpiece.h
new file mode 100644
index 0000000000000000000000000000000000000000..1d9c2d3d2c34d245d4dd78978c363e6b694041c5
--- /dev/null
+++ b/re2/stringpiece.h
@@ -0,0 +1,210 @@
+// Copyright 2001-2010 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef RE2_STRINGPIECE_H_
+#define RE2_STRINGPIECE_H_
+
+// A string-like object that points to a sized piece of memory.
+//
+// Functions or methods may use const StringPiece& parameters to accept either
+// a "const char*" or a "string" value that will be implicitly converted to
+// a StringPiece.  The implicit conversion means that it is often appropriate
+// to include this .h file in other files rather than forward-declaring
+// StringPiece as would be appropriate for most other Google classes.
+//
+// Systematic usage of StringPiece is encouraged as it will reduce unnecessary
+// conversions from "const char*" to "string" and back again.
+//
+//
+// Arghh!  I wish C++ literals were "string".
+
+// Doing this simplifies the logic below.
+#ifndef __has_include
+#define __has_include(x) 0
+#endif
+
+#include <stddef.h>
+#include <string.h>
+#include <algorithm>
+#include <iosfwd>
+#include <iterator>
+#include <string>
+#if __has_include(<string_view>) && __cplusplus >= 201703L
+#include <string_view>
+#endif
+
+namespace re2 {
+
+class StringPiece {
+ public:
+  typedef std::char_traits<char> traits_type;
+  typedef char value_type;
+  typedef char* pointer;
+  typedef const char* const_pointer;
+  typedef char& reference;
+  typedef const char& const_reference;
+  typedef const char* const_iterator;
+  typedef const_iterator iterator;
+  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+  typedef const_reverse_iterator reverse_iterator;
+  typedef size_t size_type;
+  typedef ptrdiff_t difference_type;
+  static const size_type npos = static_cast<size_type>(-1);
+
+  // We provide non-explicit singleton constructors so users can pass
+  // in a "const char*" or a "string" wherever a "StringPiece" is
+  // expected.
+  StringPiece()
+      : data_(NULL), size_(0) {}
+#if __has_include(<string_view>) && __cplusplus >= 201703L
+  StringPiece(const std::string_view& str)
+      : data_(str.data()), size_(str.size()) {}
+#endif
+  StringPiece(const std::string& str)
+      : data_(str.data()), size_(str.size()) {}
+  StringPiece(const char* str)
+      : data_(str), size_(str == NULL ? 0 : strlen(str)) {}
+  StringPiece(const char* str, size_type len)
+      : data_(str), size_(len) {}
+
+  const_iterator begin() const { return data_; }
+  const_iterator end() const { return data_ + size_; }
+  const_reverse_iterator rbegin() const {
+    return const_reverse_iterator(data_ + size_);
+  }
+  const_reverse_iterator rend() const {
+    return const_reverse_iterator(data_);
+  }
+
+  size_type size() const { return size_; }
+  size_type length() const { return size_; }
+  bool empty() const { return size_ == 0; }
+
+  const_reference operator[](size_type i) const { return data_[i]; }
+  const_pointer data() const { return data_; }
+
+  void remove_prefix(size_type n) {
+    data_ += n;
+    size_ -= n;
+  }
+
+  void remove_suffix(size_type n) {
+    size_ -= n;
+  }
+
+  void set(const char* str) {
+    data_ = str;
+    size_ = str == NULL ? 0 : strlen(str);
+  }
+
+  void set(const char* str, size_type len) {
+    data_ = str;
+    size_ = len;
+  }
+
+  // Converts to `std::basic_string`.
+  template <typename A>
+  explicit operator std::basic_string<char, traits_type, A>() const {
+    if (!data_) return {};
+    return std::basic_string<char, traits_type, A>(data_, size_);
+  }
+
+  std::string as_string() const {
+    return std::string(data_, size_);
+  }
+
+  // We also define ToString() here, since many other string-like
+  // interfaces name the routine that converts to a C++ string
+  // "ToString", and it's confusing to have the method that does that
+  // for a StringPiece be called "as_string()".  We also leave the
+  // "as_string()" method defined here for existing code.
+  std::string ToString() const {
+    return std::string(data_, size_);
+  }
+
+  void CopyToString(std::string* target) const {
+    target->assign(data_, size_);
+  }
+
+  void AppendToString(std::string* target) const {
+    target->append(data_, size_);
+  }
+
+  size_type copy(char* buf, size_type n, size_type pos = 0) const;
+  StringPiece substr(size_type pos = 0, size_type n = npos) const;
+
+  int compare(const StringPiece& x) const {
+    size_type min_size = std::min(size(), x.size());
+    if (min_size > 0) {
+      int r = memcmp(data(), x.data(), min_size);
+      if (r < 0) return -1;
+      if (r > 0) return 1;
+    }
+    if (size() < x.size()) return -1;
+    if (size() > x.size()) return 1;
+    return 0;
+  }
+
+  // Does "this" start with "x"?
+  bool starts_with(const StringPiece& x) const {
+    return x.empty() ||
+           (size() >= x.size() && memcmp(data(), x.data(), x.size()) == 0);
+  }
+
+  // Does "this" end with "x"?
+  bool ends_with(const StringPiece& x) const {
+    return x.empty() ||
+           (size() >= x.size() &&
+            memcmp(data() + (size() - x.size()), x.data(), x.size()) == 0);
+  }
+
+  bool contains(const StringPiece& s) const {
+    return find(s) != npos;
+  }
+
+  size_type find(const StringPiece& s, size_type pos = 0) const;
+  size_type find(char c, size_type pos = 0) const;
+  size_type rfind(const StringPiece& s, size_type pos = npos) const;
+  size_type rfind(char c, size_type pos = npos) const;
+
+ private:
+  const_pointer data_;
+  size_type size_;
+};
+
+inline bool operator==(const StringPiece& x, const StringPiece& y) {
+  StringPiece::size_type len = x.size();
+  if (len != y.size()) return false;
+  return x.data() == y.data() || len == 0 ||
+         memcmp(x.data(), y.data(), len) == 0;
+}
+
+inline bool operator!=(const StringPiece& x, const StringPiece& y) {
+  return !(x == y);
+}
+
+inline bool operator<(const StringPiece& x, const StringPiece& y) {
+  StringPiece::size_type min_size = std::min(x.size(), y.size());
+  int r = min_size == 0 ? 0 : memcmp(x.data(), y.data(), min_size);
+  return (r < 0) || (r == 0 && x.size() < y.size());
+}
+
+inline bool operator>(const StringPiece& x, const StringPiece& y) {
+  return y < x;
+}
+
+inline bool operator<=(const StringPiece& x, const StringPiece& y) {
+  return !(x > y);
+}
+
+inline bool operator>=(const StringPiece& x, const StringPiece& y) {
+  return !(x < y);
+}
+
+// Allow StringPiece to be logged.
+std::ostream& operator<<(std::ostream& o, const StringPiece& p);
+
+}  // namespace re2
+
+#endif  // RE2_STRINGPIECE_H_
diff --git a/re2/testing/filtered_re2_test.cc b/re2/testing/filtered_re2_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..c788fdadc49b2f7ae280fef9289f79a5ee172fde
--- /dev/null
+++ b/re2/testing/filtered_re2_test.cc
@@ -0,0 +1,340 @@
+// Copyright 2009 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stddef.h>
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <vector>
+#include <utility>
+
+#include "util/test.h"
+#include "util/logging.h"
+#include "re2/filtered_re2.h"
+#include "re2/re2.h"
+
+namespace re2 {
+
+struct FilterTestVars {
+  FilterTestVars() {}
+  explicit FilterTestVars(int min_atom_len) : f(min_atom_len) {}
+
+  std::vector<std::string> atoms;
+  std::vector<int> atom_indices;
+  std::vector<int> matches;
+  RE2::Options opts;
+  FilteredRE2 f;
+};
+
+TEST(FilteredRE2Test, EmptyTest) {
+  FilterTestVars v;
+
+  v.f.Compile(&v.atoms);
+  EXPECT_EQ(0, v.atoms.size());
+
+  // Compile has no effect at all when called before Add: it will not
+  // record that it has been called and it will not clear the vector.
+  // The second point does not matter here, but the first point means
+  // that an error will be logged during the call to AllMatches.
+  v.f.AllMatches("foo", v.atom_indices, &v.matches);
+  EXPECT_EQ(0, v.matches.size());
+}
+
+TEST(FilteredRE2Test, SmallOrTest) {
+  FilterTestVars v(4);  // override the minimum atom length
+  int id;
+  v.f.Add("(foo|bar)", v.opts, &id);
+
+  v.f.Compile(&v.atoms);
+  EXPECT_EQ(0, v.atoms.size());
+
+  v.f.AllMatches("lemurs bar", v.atom_indices, &v.matches);
+  EXPECT_EQ(1, v.matches.size());
+  EXPECT_EQ(id, v.matches[0]);
+}
+
+TEST(FilteredRE2Test, SmallLatinTest) {
+  FilterTestVars v;
+  int id;
+
+  v.opts.set_encoding(RE2::Options::EncodingLatin1);
+  v.f.Add("\xde\xadQ\xbe\xef", v.opts, &id);
+  v.f.Compile(&v.atoms);
+  EXPECT_EQ(1, v.atoms.size());
+  EXPECT_EQ(v.atoms[0], "\xde\xadq\xbe\xef");
+
+  v.atom_indices.push_back(0);
+  v.f.AllMatches("foo\xde\xadQ\xbe\xeflemur", v.atom_indices, &v.matches);
+  EXPECT_EQ(1, v.matches.size());
+  EXPECT_EQ(id, v.matches[0]);
+}
+
+struct AtomTest {
+  const char* testname;
+  // If any test needs more than this many regexps or atoms, increase
+  // the size of the corresponding array.
+  const char* regexps[20];
+  const char* atoms[20];
+};
+
+AtomTest atom_tests[] = {
+  {
+    // This test checks to make sure empty patterns are allowed.
+    "CheckEmptyPattern",
+    {""},
+    {}
+  }, {
+    // This test checks that all atoms of length greater than min length
+    // are found, and no atoms that are of smaller length are found.
+    "AllAtomsGtMinLengthFound", {
+      "(abc123|def456|ghi789).*mnop[x-z]+",
+      "abc..yyy..zz",
+      "mnmnpp[a-z]+PPP"
+    }, {
+      "abc123",
+      "def456",
+      "ghi789",
+      "mnop",
+      "abc",
+      "yyy",
+      "mnmnpp",
+      "ppp"
+    }
+  }, {
+    // Test to make sure that any atoms that have another atom as a
+    // substring in an OR are removed; that is, only the shortest
+    // substring is kept.
+    "SubstrAtomRemovesSuperStrInOr", {
+      "(abc123|abc|ghi789|abc1234).*[x-z]+",
+      "abcd..yyy..yyyzzz",
+      "mnmnpp[a-z]+PPP"
+    }, {
+      "abc",
+      "ghi789",
+      "abcd",
+      "yyy",
+      "yyyzzz",
+      "mnmnpp",
+      "ppp"
+    }
+  }, {
+    // Test character class expansion.
+    "CharClassExpansion", {
+      "m[a-c][d-f]n.*[x-z]+",
+      "[x-y]bcde[ab]"
+    }, {
+      "madn", "maen", "mafn",
+      "mbdn", "mben", "mbfn",
+      "mcdn", "mcen", "mcfn",
+      "xbcdea", "xbcdeb",
+      "ybcdea", "ybcdeb"
+    }
+  }, {
+    // Test upper/lower of non-ASCII.
+    "UnicodeLower", {
+      "(?i)ΔδΠϖπΣςσ",
+      "ΛΜΝΟΠ",
+      "ψρστυ",
+    }, {
+      "δδπππσσσ",
+      "λμνοπ",
+      "ψρστυ",
+    },
+  },
+};
+
+void AddRegexpsAndCompile(const char* regexps[],
+                          size_t n,
+                          struct FilterTestVars* v) {
+  for (size_t i = 0; i < n; i++) {
+    int id;
+    v->f.Add(regexps[i], v->opts, &id);
+  }
+  v->f.Compile(&v->atoms);
+}
+
+bool CheckExpectedAtoms(const char* atoms[],
+                        size_t n,
+                        const char* testname,
+                        struct FilterTestVars* v) {
+  std::vector<std::string> expected;
+  for (size_t i = 0; i < n; i++)
+    expected.push_back(atoms[i]);
+
+  bool pass = expected.size() == v->atoms.size();
+
+  std::sort(v->atoms.begin(), v->atoms.end());
+  std::sort(expected.begin(), expected.end());
+  for (size_t i = 0; pass && i < n; i++)
+      pass = pass && expected[i] == v->atoms[i];
+
+  if (!pass) {
+    LOG(ERROR) << "Failed " << testname;
+    LOG(ERROR) << "Expected #atoms = " << expected.size();
+    for (size_t i = 0; i < expected.size(); i++)
+      LOG(ERROR) << expected[i];
+    LOG(ERROR) << "Found #atoms = " << v->atoms.size();
+    for (size_t i = 0; i < v->atoms.size(); i++)
+      LOG(ERROR) << v->atoms[i];
+  }
+
+  return pass;
+}
+
+TEST(FilteredRE2Test, AtomTests) {
+  int nfail = 0;
+  for (size_t i = 0; i < arraysize(atom_tests); i++) {
+    FilterTestVars v;
+    AtomTest* t = &atom_tests[i];
+    size_t nregexp, natom;
+    for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++)
+      if (t->regexps[nregexp] == NULL)
+        break;
+    for (natom = 0; natom < arraysize(t->atoms); natom++)
+      if (t->atoms[natom] == NULL)
+        break;
+    AddRegexpsAndCompile(t->regexps, nregexp, &v);
+    if (!CheckExpectedAtoms(t->atoms, natom, t->testname, &v))
+      nfail++;
+  }
+  EXPECT_EQ(0, nfail);
+}
+
+void FindAtomIndices(const std::vector<std::string>& atoms,
+                     const std::vector<std::string>& matched_atoms,
+                     std::vector<int>* atom_indices) {
+  atom_indices->clear();
+  for (size_t i = 0; i < matched_atoms.size(); i++) {
+    for (size_t j = 0; j < atoms.size(); j++) {
+      if (matched_atoms[i] == atoms[j]) {
+        atom_indices->push_back(static_cast<int>(j));
+        break;
+      }
+    }
+  }
+}
+
+TEST(FilteredRE2Test, MatchEmptyPattern) {
+  FilterTestVars v;
+  AtomTest* t = &atom_tests[0];
+  // We are using the regexps used in one of the atom tests
+  // for this test. Adding the EXPECT here to make sure
+  // the index we use for the test is for the correct test.
+  EXPECT_EQ("CheckEmptyPattern", std::string(t->testname));
+  size_t nregexp;
+  for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++)
+    if (t->regexps[nregexp] == NULL)
+      break;
+  AddRegexpsAndCompile(t->regexps, nregexp, &v);
+  std::string text = "0123";
+  std::vector<int> atom_ids;
+  std::vector<int> matching_regexps;
+  EXPECT_EQ(0, v.f.FirstMatch(text, atom_ids));
+}
+
+TEST(FilteredRE2Test, MatchTests) {
+  FilterTestVars v;
+  AtomTest* t = &atom_tests[2];
+  // We are using the regexps used in one of the atom tests
+  // for this test.
+  EXPECT_EQ("SubstrAtomRemovesSuperStrInOr", std::string(t->testname));
+  size_t nregexp;
+  for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++)
+    if (t->regexps[nregexp] == NULL)
+      break;
+  AddRegexpsAndCompile(t->regexps, nregexp, &v);
+
+  std::string text = "abc121212xyz";
+  // atoms = abc
+  std::vector<int> atom_ids;
+  std::vector<std::string> atoms;
+  atoms.push_back("abc");
+  FindAtomIndices(v.atoms, atoms, &atom_ids);
+  std::vector<int> matching_regexps;
+  v.f.AllMatches(text, atom_ids, &matching_regexps);
+  EXPECT_EQ(1, matching_regexps.size());
+
+  text = "abc12312yyyzzz";
+  atoms.clear();
+  atoms.push_back("abc");
+  atoms.push_back("yyy");
+  atoms.push_back("yyyzzz");
+  FindAtomIndices(v.atoms, atoms, &atom_ids);
+  v.f.AllMatches(text, atom_ids, &matching_regexps);
+  EXPECT_EQ(1, matching_regexps.size());
+
+  text = "abcd12yyy32yyyzzz";
+  atoms.clear();
+  atoms.push_back("abc");
+  atoms.push_back("abcd");
+  atoms.push_back("yyy");
+  atoms.push_back("yyyzzz");
+  FindAtomIndices(v.atoms, atoms, &atom_ids);
+  LOG(INFO) << "S: " << atom_ids.size();
+  for (size_t i = 0; i < atom_ids.size(); i++)
+    LOG(INFO) << "i: " << i << " : " << atom_ids[i];
+  v.f.AllMatches(text, atom_ids, &matching_regexps);
+  EXPECT_EQ(2, matching_regexps.size());
+}
+
+TEST(FilteredRE2Test, EmptyStringInStringSetBug) {
+  // Bug due to find() finding "" at the start of everything in a string
+  // set and thus SimplifyStringSet() would end up erasing everything.
+  // In order to test this, we have to keep PrefilterTree from discarding
+  // the OR entirely, so we have to make the minimum atom length zero.
+
+  FilterTestVars v(0);  // override the minimum atom length
+  const char* regexps[] = {"-R.+(|ADD=;AA){12}}"};
+  const char* atoms[] = {"", "-r", "add=;aa", "}"};
+  AddRegexpsAndCompile(regexps, arraysize(regexps), &v);
+  EXPECT_TRUE(CheckExpectedAtoms(atoms, arraysize(atoms),
+                                 "EmptyStringInStringSetBug", &v));
+}
+
+TEST(FilteredRE2Test, MoveSemantics) {
+  FilterTestVars v1;
+  int id;
+  v1.f.Add("foo\\d+", v1.opts, &id);
+  EXPECT_EQ(0, id);
+  v1.f.Compile(&v1.atoms);
+  EXPECT_EQ(1, v1.atoms.size());
+  EXPECT_EQ("foo", v1.atoms[0]);
+  v1.f.AllMatches("abc foo1 xyz", {0}, &v1.matches);
+  EXPECT_EQ(1, v1.matches.size());
+  EXPECT_EQ(0, v1.matches[0]);
+  v1.f.AllMatches("abc bar2 xyz", {0}, &v1.matches);
+  EXPECT_EQ(0, v1.matches.size());
+
+  // The moved-to object should do what the moved-from object did.
+  FilterTestVars v2;
+  v2.f = std::move(v1.f);
+  v2.f.AllMatches("abc foo1 xyz", {0}, &v2.matches);
+  EXPECT_EQ(1, v2.matches.size());
+  EXPECT_EQ(0, v2.matches[0]);
+  v2.f.AllMatches("abc bar2 xyz", {0}, &v2.matches);
+  EXPECT_EQ(0, v2.matches.size());
+
+  // The moved-from object should have been reset and be reusable.
+  v1.f.Add("bar\\d+", v1.opts, &id);
+  EXPECT_EQ(0, id);
+  v1.f.Compile(&v1.atoms);
+  EXPECT_EQ(1, v1.atoms.size());
+  EXPECT_EQ("bar", v1.atoms[0]);
+  v1.f.AllMatches("abc foo1 xyz", {0}, &v1.matches);
+  EXPECT_EQ(0, v1.matches.size());
+  v1.f.AllMatches("abc bar2 xyz", {0}, &v1.matches);
+  EXPECT_EQ(1, v1.matches.size());
+  EXPECT_EQ(0, v1.matches[0]);
+
+  // Verify that "overwriting" works and also doesn't leak memory.
+  // (The latter will need a leak detector such as LeakSanitizer.)
+  v1.f = std::move(v2.f);
+  v1.f.AllMatches("abc foo1 xyz", {0}, &v1.matches);
+  EXPECT_EQ(1, v1.matches.size());
+  EXPECT_EQ(0, v1.matches[0]);
+  v1.f.AllMatches("abc bar2 xyz", {0}, &v1.matches);
+  EXPECT_EQ(0, v1.matches.size());
+}
+
+}  //  namespace re2
diff --git a/re2/testing/re2_arg_test.cc b/re2/testing/re2_arg_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..f62e17cf4772c688a661cc262007c462ae373dd2
--- /dev/null
+++ b/re2/testing/re2_arg_test.cc
@@ -0,0 +1,160 @@
+// Copyright 2005 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This tests to make sure numbers are parsed from strings
+// correctly.
+// Todo: Expand the test to validate strings parsed to the other types
+// supported by RE2::Arg class
+
+#include <stdint.h>
+#include <string.h>
+
+#include "util/test.h"
+#include "util/logging.h"
+#include "re2/re2.h"
+
+namespace re2 {
+
+struct SuccessTable {
+  const char * value_string;
+  int64_t value;
+  bool success[6];
+};
+
+// Test boundary cases for different integral sizes.
+// Specifically I want to make sure that values outside the boundries
+// of an integral type will fail and that negative numbers will fail
+// for unsigned types. The following table contains the boundaries for
+// the various integral types and has entries for whether or not each
+// type can contain the given value.
+const SuccessTable kSuccessTable[] = {
+// string       integer value     i16    u16    i32    u32    i64    u64
+// 0 to 2^7-1
+{ "0",          0,              { true,  true,  true,  true,  true,  true  }},
+{ "127",        127,            { true,  true,  true,  true,  true,  true  }},
+
+// -1 to -2^7
+{ "-1",         -1,             { true,  false, true,  false, true,  false }},
+{ "-128",       -128,           { true,  false, true,  false, true,  false }},
+
+// 2^7 to 2^8-1
+{ "128",        128,            { true,  true,  true,  true,  true,  true  }},
+{ "255",        255,            { true,  true,  true,  true,  true,  true  }},
+
+// 2^8 to 2^15-1
+{ "256",        256,            { true,  true,  true,  true,  true,  true  }},
+{ "32767",      32767,          { true,  true,  true,  true,  true,  true  }},
+
+// -2^7-1 to -2^15
+{ "-129",       -129,           { true,  false, true,  false, true,  false }},
+{ "-32768",     -32768,         { true,  false, true,  false, true,  false }},
+
+// 2^15 to 2^16-1
+{ "32768",      32768,          { false, true,  true,  true,  true,  true  }},
+{ "65535",      65535,          { false, true,  true,  true,  true,  true  }},
+
+// 2^16 to 2^31-1
+{ "65536",      65536,          { false, false, true,  true,  true,  true  }},
+{ "2147483647", 2147483647,     { false, false, true,  true,  true,  true  }},
+
+// -2^15-1 to -2^31
+{ "-32769",     -32769,         { false, false, true,  false, true,  false }},
+{ "-2147483648", static_cast<int64_t>(0xFFFFFFFF80000000LL),
+  { false, false, true,  false, true,  false }},
+
+// 2^31 to 2^32-1
+{ "2147483648", 2147483648U,    { false, false, false, true,  true,  true  }},
+{ "4294967295", 4294967295U,    { false, false, false, true,  true,  true  }},
+
+// 2^32 to 2^63-1
+{ "4294967296", 4294967296LL,   { false, false, false, false, true,  true  }},
+{ "9223372036854775807",
+  9223372036854775807LL,        { false, false, false, false, true,  true  }},
+
+// -2^31-1 to -2^63
+{ "-2147483649", -2147483649LL, { false, false, false, false, true,  false }},
+{ "-9223372036854775808", static_cast<int64_t>(0x8000000000000000LL),
+  { false, false, false, false, true,  false }},
+
+// 2^63 to 2^64-1
+{ "9223372036854775808", static_cast<int64_t>(9223372036854775808ULL),
+  { false, false, false, false, false, true  }},
+{ "18446744073709551615", static_cast<int64_t>(18446744073709551615ULL),
+  { false, false, false, false, false, true  }},
+
+// >= 2^64
+{ "18446744073709551616", 0,    { false, false, false, false, false, false }},
+};
+
+const int kNumStrings = arraysize(kSuccessTable);
+
+// It's ugly to use a macro, but we apparently can't use the EXPECT_EQ
+// macro outside of a TEST block and this seems to be the only way to
+// avoid code duplication.  I can also pull off a couple nice tricks
+// using concatenation for the type I'm checking against.
+#define PARSE_FOR_TYPE(type, column) {                                   \
+  type r;                                                                \
+  for (int i = 0; i < kNumStrings; ++i) {                                \
+    RE2::Arg arg(&r);                                                    \
+    const char* const p = kSuccessTable[i].value_string;                 \
+    bool retval = arg.Parse(p, strlen(p));                               \
+    bool success = kSuccessTable[i].success[column];                     \
+    EXPECT_EQ(retval, success)                                           \
+        << "Parsing '" << p << "' for type " #type " should return "     \
+        << success;                                                      \
+    if (success) {                                                       \
+      EXPECT_EQ(r, (type)kSuccessTable[i].value);                        \
+    }                                                                    \
+  }                                                                      \
+}
+
+TEST(RE2ArgTest, Int16Test) {
+  PARSE_FOR_TYPE(int16_t, 0);
+}
+
+TEST(RE2ArgTest, Uint16Test) {
+  PARSE_FOR_TYPE(uint16_t, 1);
+}
+
+TEST(RE2ArgTest, Int32Test) {
+  PARSE_FOR_TYPE(int32_t, 2);
+}
+
+TEST(RE2ArgTest, Uint32Test) {
+  PARSE_FOR_TYPE(uint32_t, 3);
+}
+
+TEST(RE2ArgTest, Int64Test) {
+  PARSE_FOR_TYPE(int64_t, 4);
+}
+
+TEST(RE2ArgTest, Uint64Test) {
+  PARSE_FOR_TYPE(uint64_t, 5);
+}
+
+TEST(RE2ArgTest, ParseFromTest) {
+#if !defined(_MSC_VER)
+  struct {
+    bool ParseFrom(const char* str, size_t n) {
+      LOG(INFO) << "str = " << str << ", n = " << n;
+      return true;
+    }
+  } obj1;
+  RE2::Arg arg1(&obj1);
+  EXPECT_TRUE(arg1.Parse("one", 3));
+
+  struct {
+    bool ParseFrom(const char* str, size_t n) {
+      LOG(INFO) << "str = " << str << ", n = " << n;
+      return false;
+    }
+    // Ensure that RE2::Arg works even with overloaded ParseFrom().
+    void ParseFrom(const char* str) {}
+  } obj2;
+  RE2::Arg arg2(&obj2);
+  EXPECT_FALSE(arg2.Parse("two", 3));
+#endif
+}
+
+}  // namespace re2
diff --git a/re2/testing/re2_test.cc b/re2/testing/re2_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..3ee20531e4f0b2f5d3900702d216e33c4a03b691
--- /dev/null
+++ b/re2/testing/re2_test.cc
@@ -0,0 +1,1660 @@
+// -*- coding: utf-8 -*-
+// Copyright 2002-2009 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// TODO: Test extractions for PartialMatch/Consume
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <map>
+#include <string>
+#include <utility>
+#include <vector>
+#if !defined(_MSC_VER) && !defined(__CYGWIN__) && !defined(__MINGW32__)
+#include <sys/mman.h>
+#include <unistd.h>  /* for sysconf */
+#endif
+
+#include "util/test.h"
+#include "util/logging.h"
+#include "util/strutil.h"
+#include "re2/re2.h"
+
+
+namespace re2 {
+
+TEST(RE2, HexTests) {
+#define ASSERT_HEX(type, value)                                         \
+  do {                                                                  \
+    type v;                                                             \
+    ASSERT_TRUE(                                                        \
+        RE2::FullMatch(#value, "([0-9a-fA-F]+)[uUlL]*", RE2::Hex(&v))); \
+    ASSERT_EQ(v, 0x##value);                                            \
+    ASSERT_TRUE(RE2::FullMatch("0x" #value, "([0-9a-fA-FxX]+)[uUlL]*",  \
+                               RE2::CRadix(&v)));                       \
+    ASSERT_EQ(v, 0x##value);                                            \
+  } while (0)
+
+  ASSERT_HEX(short,              2bad);
+  ASSERT_HEX(unsigned short,     2badU);
+  ASSERT_HEX(int,                dead);
+  ASSERT_HEX(unsigned int,       deadU);
+  ASSERT_HEX(long,               7eadbeefL);
+  ASSERT_HEX(unsigned long,      deadbeefUL);
+  ASSERT_HEX(long long,          12345678deadbeefLL);
+  ASSERT_HEX(unsigned long long, cafebabedeadbeefULL);
+
+#undef ASSERT_HEX
+}
+
+TEST(RE2, OctalTests) {
+#define ASSERT_OCTAL(type, value)                                           \
+  do {                                                                      \
+    type v;                                                                 \
+    ASSERT_TRUE(RE2::FullMatch(#value, "([0-7]+)[uUlL]*", RE2::Octal(&v))); \
+    ASSERT_EQ(v, 0##value);                                                 \
+    ASSERT_TRUE(RE2::FullMatch("0" #value, "([0-9a-fA-FxX]+)[uUlL]*",       \
+                               RE2::CRadix(&v)));                           \
+    ASSERT_EQ(v, 0##value);                                                 \
+  } while (0)
+
+  ASSERT_OCTAL(short,              77777);
+  ASSERT_OCTAL(unsigned short,     177777U);
+  ASSERT_OCTAL(int,                17777777777);
+  ASSERT_OCTAL(unsigned int,       37777777777U);
+  ASSERT_OCTAL(long,               17777777777L);
+  ASSERT_OCTAL(unsigned long,      37777777777UL);
+  ASSERT_OCTAL(long long,          777777777777777777777LL);
+  ASSERT_OCTAL(unsigned long long, 1777777777777777777777ULL);
+
+#undef ASSERT_OCTAL
+}
+
+TEST(RE2, DecimalTests) {
+#define ASSERT_DECIMAL(type, value)                                            \
+  do {                                                                         \
+    type v;                                                                    \
+    ASSERT_TRUE(RE2::FullMatch(#value, "(-?[0-9]+)[uUlL]*", &v));              \
+    ASSERT_EQ(v, value);                                                       \
+    ASSERT_TRUE(                                                               \
+        RE2::FullMatch(#value, "(-?[0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v))); \
+    ASSERT_EQ(v, value);                                                       \
+  } while (0)
+
+  ASSERT_DECIMAL(short,              -1);
+  ASSERT_DECIMAL(unsigned short,     9999);
+  ASSERT_DECIMAL(int,                -1000);
+  ASSERT_DECIMAL(unsigned int,       12345U);
+  ASSERT_DECIMAL(long,               -10000000L);
+  ASSERT_DECIMAL(unsigned long,      3083324652U);
+  ASSERT_DECIMAL(long long,          -100000000000000LL);
+  ASSERT_DECIMAL(unsigned long long, 1234567890987654321ULL);
+
+#undef ASSERT_DECIMAL
+}
+
+// TEST(RE2, Replace) {
+//   struct ReplaceTest {
+//     const char *regexp;
+//     const char *rewrite;
+//     const char *original;
+//     const char *single;
+//     const char *global;
+//     int        greplace_count;
+//   };
+//   static const ReplaceTest tests[] = {
+//     { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
+//       "\\2\\1ay",
+//       "the quick brown fox jumps over the lazy dogs.",
+//       "ethay quick brown fox jumps over the lazy dogs.",
+//       "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
+//       9 },
+//     { "\\w+",
+//       "\\0-NOSPAM",
+//       "abcd.efghi@google.com",
+//       "abcd-NOSPAM.efghi@google.com",
+//       "abcd-NOSPAM.efghi-NOSPAM@google-NOSPAM.com-NOSPAM",
+//       4 },
+//     { "^",
+//       "(START)",
+//       "foo",
+//       "(START)foo",
+//       "(START)foo",
+//       1 },
+//     { "^",
+//       "(START)",
+//       "",
+//       "(START)",
+//       "(START)",
+//       1 },
+//     { "$",
+//       "(END)",
+//       "",
+//       "(END)",
+//       "(END)",
+//       1 },
+//     { "b",
+//       "bb",
+//       "ababababab",
+//       "abbabababab",
+//       "abbabbabbabbabb",
+//       5 },
+//     { "b",
+//       "bb",
+//       "bbbbbb",
+//       "bbbbbbb",
+//       "bbbbbbbbbbbb",
+//       6 },
+//     { "b+",
+//       "bb",
+//       "bbbbbb",
+//       "bb",
+//       "bb",
+//       1 },
+//     { "b*",
+//       "bb",
+//       "bbbbbb",
+//       "bb",
+//       "bb",
+//       1 },
+//     { "b*",
+//       "bb",
+//       "aaaaa",
+//       "bbaaaaa",
+//       "bbabbabbabbabbabb",
+//       6 },
+//     // Check newline handling
+//     { "a.*a",
+//       "(\\0)",
+//       "aba\naba",
+//       "(aba)\naba",
+//       "(aba)\n(aba)",
+//       2 },
+//     { "", NULL, NULL, NULL, NULL, 0 }
+//   };
+
+//   for (const ReplaceTest* t = tests; t->original != NULL; t++) {
+//     std::string one(t->original);
+//     ASSERT_TRUE(RE2::Replace(&one, t->regexp, t->rewrite));
+//     ASSERT_EQ(one, t->single);
+//     std::string all(t->original);
+//     ASSERT_EQ(RE2::GlobalReplace(&all, t->regexp, t->rewrite), t->greplace_count)
+//       << "Got: " << all;
+//     ASSERT_EQ(all, t->global);
+//   }
+// }
+
+// static void TestCheckRewriteString(const char* regexp, const char* rewrite,
+//                               bool expect_ok) {
+//   std::string error;
+//   RE2 exp(regexp);
+//   bool actual_ok = exp.CheckRewriteString(rewrite, &error);
+//   EXPECT_EQ(expect_ok, actual_ok) << " for " << rewrite << " error: " << error;
+// }
+
+// TEST(CheckRewriteString, all) {
+//   TestCheckRewriteString("abc", "foo", true);
+//   TestCheckRewriteString("abc", "foo\\", false);
+//   TestCheckRewriteString("abc", "foo\\0bar", true);
+
+//   TestCheckRewriteString("a(b)c", "foo", true);
+//   TestCheckRewriteString("a(b)c", "foo\\0bar", true);
+//   TestCheckRewriteString("a(b)c", "foo\\1bar", true);
+//   TestCheckRewriteString("a(b)c", "foo\\2bar", false);
+//   TestCheckRewriteString("a(b)c", "f\\\\2o\\1o", true);
+
+//   TestCheckRewriteString("a(b)(c)", "foo\\12", true);
+//   TestCheckRewriteString("a(b)(c)", "f\\2o\\1o", true);
+//   TestCheckRewriteString("a(b)(c)", "f\\oo\\1", false);
+// }
+
+// TEST(RE2, Extract) {
+//   std::string s;
+
+//   ASSERT_TRUE(RE2::Extract("boris@kremvax.ru", "(.*)@([^.]*)", "\\2!\\1", &s));
+//   ASSERT_EQ(s, "kremvax!boris");
+
+//   ASSERT_TRUE(RE2::Extract("foo", ".*", "'\\0'", &s));
+//   ASSERT_EQ(s, "'foo'");
+//   // check that false match doesn't overwrite
+//   ASSERT_FALSE(RE2::Extract("baz", "bar", "'\\0'", &s));
+//   ASSERT_EQ(s, "'foo'");
+// }
+
+// TEST(RE2, MaxSubmatchTooLarge) {
+//   std::string s;
+//   ASSERT_FALSE(RE2::Extract("foo", "f(o+)", "\\1\\2", &s));
+//   s = "foo";
+//   ASSERT_FALSE(RE2::Replace(&s, "f(o+)", "\\1\\2"));
+//   s = "foo";
+//   ASSERT_FALSE(RE2::GlobalReplace(&s, "f(o+)", "\\1\\2"));
+// }
+
+TEST(RE2, Consume) {
+  RE2 r("\\s*(\\w+)");    // matches a word, possibly proceeded by whitespace
+  std::string word;
+
+  std::string s("   aaa b!@#$@#$cccc");
+  StringPiece input(s);
+
+  ASSERT_TRUE(RE2::Consume(&input, r, &word));
+  ASSERT_EQ(word, "aaa") << " input: " << input;
+  ASSERT_TRUE(RE2::Consume(&input, r, &word));
+  ASSERT_EQ(word, "b") << " input: " << input;
+  ASSERT_FALSE(RE2::Consume(&input, r, &word)) << " input: " << input;
+}
+
+TEST(RE2, ConsumeN) {
+  const std::string s(" one two three 4");
+  StringPiece input(s);
+
+  RE2::Arg argv[2];
+  const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
+
+  // 0 arg
+  EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 0));  // Skips "one".
+
+  // 1 arg
+  std::string word;
+  argv[0] = &word;
+  EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 1));
+  EXPECT_EQ("two", word);
+
+  // Multi-args
+  int n;
+  argv[1] = &n;
+  EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)\\s*(\\d+)", args, 2));
+  EXPECT_EQ("three", word);
+  EXPECT_EQ(4, n);
+}
+
+TEST(RE2, FindAndConsume) {
+  RE2 r("(\\w+)");      // matches a word
+  std::string word;
+
+  std::string s("   aaa b!@#$@#$cccc");
+  StringPiece input(s);
+
+  ASSERT_TRUE(RE2::FindAndConsume(&input, r, &word));
+  ASSERT_EQ(word, "aaa");
+  ASSERT_TRUE(RE2::FindAndConsume(&input, r, &word));
+  ASSERT_EQ(word, "b");
+  ASSERT_TRUE(RE2::FindAndConsume(&input, r, &word));
+  ASSERT_EQ(word, "cccc");
+  ASSERT_FALSE(RE2::FindAndConsume(&input, r, &word));
+
+  // Check that FindAndConsume works without any submatches.
+  // Earlier version used uninitialized data for
+  // length to consume.
+  input = "aaa";
+  ASSERT_TRUE(RE2::FindAndConsume(&input, "aaa"));
+  ASSERT_EQ(input, "");
+}
+
+TEST(RE2, FindAndConsumeN) {
+  const std::string s(" one two three 4");
+  StringPiece input(s);
+
+  RE2::Arg argv[2];
+  const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
+
+  // 0 arg
+  EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 0));  // Skips "one".
+
+  // 1 arg
+  std::string word;
+  argv[0] = &word;
+  EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 1));
+  EXPECT_EQ("two", word);
+
+  // Multi-args
+  int n;
+  argv[1] = &n;
+  EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)\\s*(\\d+)", args, 2));
+  EXPECT_EQ("three", word);
+  EXPECT_EQ(4, n);
+}
+
+TEST(RE2, MatchNumberPeculiarity) {
+  RE2 r("(foo)|(bar)|(baz)");
+  std::string word1;
+  std::string word2;
+  std::string word3;
+
+  ASSERT_TRUE(RE2::PartialMatch("foo", r, &word1, &word2, &word3));
+  ASSERT_EQ(word1, "foo");
+  ASSERT_EQ(word2, "");
+  ASSERT_EQ(word3, "");
+  ASSERT_TRUE(RE2::PartialMatch("bar", r, &word1, &word2, &word3));
+  ASSERT_EQ(word1, "");
+  ASSERT_EQ(word2, "bar");
+  ASSERT_EQ(word3, "");
+  ASSERT_TRUE(RE2::PartialMatch("baz", r, &word1, &word2, &word3));
+  ASSERT_EQ(word1, "");
+  ASSERT_EQ(word2, "");
+  ASSERT_EQ(word3, "baz");
+  ASSERT_FALSE(RE2::PartialMatch("f", r, &word1, &word2, &word3));
+
+  std::string a;
+  ASSERT_TRUE(RE2::FullMatch("hello", "(foo)|hello", &a));
+  ASSERT_EQ(a, "");
+}
+
+TEST(RE2, Match) {
+  RE2 re("((\\w+):([0-9]+))");   // extracts host and port
+  StringPiece group[4];
+
+  // No match.
+  StringPiece s = "zyzzyva";
+  ASSERT_FALSE(
+      re.Match(s, 0, s.size(), RE2::UNANCHORED, group, arraysize(group)));
+
+  // Matches and extracts.
+  s = "a chrisr:9000 here";
+  ASSERT_TRUE(
+      re.Match(s, 0, s.size(), RE2::UNANCHORED, group, arraysize(group)));
+  ASSERT_EQ(group[0], "chrisr:9000");
+  ASSERT_EQ(group[1], "chrisr:9000");
+  ASSERT_EQ(group[2], "chrisr");
+  ASSERT_EQ(group[3], "9000");
+
+  std::string all, host;
+  int port;
+  ASSERT_TRUE(RE2::PartialMatch("a chrisr:9000 here", re, &all, &host, &port));
+  ASSERT_EQ(all, "chrisr:9000");
+  ASSERT_EQ(host, "chrisr");
+  ASSERT_EQ(port, 9000);
+}
+
+static void TestRecursion(int size, const char* pattern) {
+  // Fill up a string repeating the pattern given
+  std::string domain;
+  domain.resize(size);
+  size_t patlen = strlen(pattern);
+  for (int i = 0; i < size; i++) {
+    domain[i] = pattern[i % patlen];
+  }
+  // Just make sure it doesn't crash due to too much recursion.
+  RE2 re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", RE2::Quiet);
+  RE2::FullMatch(domain, re);
+}
+
+// A meta-quoted string, interpreted as a pattern, should always match
+// the original unquoted string.
+static void TestQuoteMeta(const std::string& unquoted,
+                          const RE2::Options& options = RE2::DefaultOptions) {
+  std::string quoted = RE2::QuoteMeta(unquoted);
+  RE2 re(quoted, options);
+  EXPECT_TRUE(RE2::FullMatch(unquoted, re))
+      << "Unquoted='" << unquoted << "', quoted='" << quoted << "'.";
+}
+
+// A meta-quoted string, interpreted as a pattern, should always match
+// the original unquoted string.
+static void NegativeTestQuoteMeta(
+    const std::string& unquoted, const std::string& should_not_match,
+    const RE2::Options& options = RE2::DefaultOptions) {
+  std::string quoted = RE2::QuoteMeta(unquoted);
+  RE2 re(quoted, options);
+  EXPECT_FALSE(RE2::FullMatch(should_not_match, re))
+      << "Unquoted='" << unquoted << "', quoted='" << quoted << "'.";
+}
+
+// Tests that quoted meta characters match their original strings,
+// and that a few things that shouldn't match indeed do not.
+TEST(QuoteMeta, Simple) {
+  TestQuoteMeta("foo");
+  TestQuoteMeta("foo.bar");
+  TestQuoteMeta("foo\\.bar");
+  TestQuoteMeta("[1-9]");
+  TestQuoteMeta("1.5-2.0?");
+  TestQuoteMeta("\\d");
+  TestQuoteMeta("Who doesn't like ice cream?");
+  TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
+  TestQuoteMeta("((?!)xxx).*yyy");
+  TestQuoteMeta("([");
+}
+TEST(QuoteMeta, SimpleNegative) {
+  NegativeTestQuoteMeta("foo", "bar");
+  NegativeTestQuoteMeta("...", "bar");
+  NegativeTestQuoteMeta("\\.", ".");
+  NegativeTestQuoteMeta("\\.", "..");
+  NegativeTestQuoteMeta("(a)", "a");
+  NegativeTestQuoteMeta("(a|b)", "a");
+  NegativeTestQuoteMeta("(a|b)", "(a)");
+  NegativeTestQuoteMeta("(a|b)", "a|b");
+  NegativeTestQuoteMeta("[0-9]", "0");
+  NegativeTestQuoteMeta("[0-9]", "0-9");
+  NegativeTestQuoteMeta("[0-9]", "[9]");
+  NegativeTestQuoteMeta("((?!)xxx)", "xxx");
+}
+
+TEST(QuoteMeta, Latin1) {
+  TestQuoteMeta("3\xb2 = 9", RE2::Latin1);
+}
+
+TEST(QuoteMeta, UTF8) {
+  TestQuoteMeta("Plácido Domingo");
+  TestQuoteMeta("xyz");  // No fancy utf8.
+  TestQuoteMeta("\xc2\xb0");  // 2-byte utf8 -- a degree symbol.
+  TestQuoteMeta("27\xc2\xb0 degrees");  // As a middle character.
+  TestQuoteMeta("\xe2\x80\xb3");  // 3-byte utf8 -- a double prime.
+  TestQuoteMeta("\xf0\x9d\x85\x9f");  // 4-byte utf8 -- a music note.
+  TestQuoteMeta("27\xc2\xb0");  // Interpreted as Latin-1, this should
+                                // still work.
+  NegativeTestQuoteMeta("27\xc2\xb0",
+                        "27\\\xc2\\\xb0");  // 2-byte utf8 -- a degree symbol.
+}
+
+// TEST(QuoteMeta, HasNull) {
+//   std::string has_null;
+
+//   // string with one null character
+//   has_null += '\0';
+//   TestQuoteMeta(has_null);
+//   NegativeTestQuoteMeta(has_null, "");
+
+//   // Don't want null-followed-by-'1' to be interpreted as '\01'.
+//   has_null += '1';
+//   TestQuoteMeta(has_null);
+//   NegativeTestQuoteMeta(has_null, "\1");
+// }
+
+// TEST(ProgramSize, BigProgram) {
+//   RE2 re_simple("simple regexp");
+//   RE2 re_medium("medium.*regexp");
+//   RE2 re_complex("complex.{1,128}regexp");
+
+//   ASSERT_GT(re_simple.ProgramSize(), 0);
+//   ASSERT_GT(re_medium.ProgramSize(), re_simple.ProgramSize());
+//   ASSERT_GT(re_complex.ProgramSize(), re_medium.ProgramSize());
+
+//   ASSERT_GT(re_simple.ReverseProgramSize(), 0);
+//   ASSERT_GT(re_medium.ReverseProgramSize(), re_simple.ReverseProgramSize());
+//   ASSERT_GT(re_complex.ReverseProgramSize(), re_medium.ReverseProgramSize());
+// }
+
+// TEST(ProgramFanout, BigProgram) {
+//   RE2 re1("(?:(?:(?:(?:(?:.)?){1})*)+)");
+//   RE2 re10("(?:(?:(?:(?:(?:.)?){10})*)+)");
+//   RE2 re100("(?:(?:(?:(?:(?:.)?){100})*)+)");
+//   RE2 re1000("(?:(?:(?:(?:(?:.)?){1000})*)+)");
+
+//   std::vector<int> histogram;
+
+//   // 3 is the largest non-empty bucket and has 2 element.
+//   ASSERT_EQ(3, re1.ProgramFanout(&histogram));
+//   ASSERT_EQ(2, histogram[3]);
+
+//   // 6 is the largest non-empty bucket and has 11 elements.
+//   ASSERT_EQ(6, re10.ProgramFanout(&histogram));
+//   ASSERT_EQ(11, histogram[6]);
+
+//   // 9 is the largest non-empty bucket and has 101 elements.
+//   ASSERT_EQ(9, re100.ProgramFanout(&histogram));
+//   ASSERT_EQ(101, histogram[9]);
+
+//   // 13 is the largest non-empty bucket and has 1001 elements.
+//   ASSERT_EQ(13, re1000.ProgramFanout(&histogram));
+//   ASSERT_EQ(1001, histogram[13]);
+
+//   // 2 is the largest non-empty bucket and has 2 element.
+//   ASSERT_EQ(2, re1.ReverseProgramFanout(&histogram));
+//   ASSERT_EQ(2, histogram[2]);
+
+//   // 5 is the largest non-empty bucket and has 11 elements.
+//   ASSERT_EQ(5, re10.ReverseProgramFanout(&histogram));
+//   ASSERT_EQ(11, histogram[5]);
+
+//   // 9 is the largest non-empty bucket and has 101 elements.
+//   ASSERT_EQ(9, re100.ReverseProgramFanout(&histogram));
+//   ASSERT_EQ(101, histogram[9]);
+
+//   // 12 is the largest non-empty bucket and has 1001 elements.
+//   ASSERT_EQ(12, re1000.ReverseProgramFanout(&histogram));
+//   ASSERT_EQ(1001, histogram[12]);
+// }
+
+// Issue 956519: handling empty character sets was
+// causing NULL dereference.  This tests a few empty character sets.
+// (The way to get an empty character set is to negate a full one.)
+TEST(EmptyCharset, Fuzz) {
+  static const char *empties[] = {
+    "[^\\S\\s]",
+    "[^\\S[:space:]]",
+    "[^\\D\\d]",
+    "[^\\D[:digit:]]"
+  };
+  for (size_t i = 0; i < arraysize(empties); i++)
+    ASSERT_FALSE(RE2(empties[i]).Match("abc", 0, 3, RE2::UNANCHORED, NULL, 0));
+}
+
+// Bitstate assumes that kInstFail instructions in
+// alternations or capture groups have been "compiled away".
+TEST(EmptyCharset, BitstateAssumptions) {
+  // Captures trigger use of Bitstate.
+  static const char *nop_empties[] = {
+    "((((()))))" "[^\\S\\s]?",
+    "((((()))))" "([^\\S\\s])?",
+    "((((()))))" "([^\\S\\s]|[^\\S\\s])?",
+    "((((()))))" "(([^\\S\\s]|[^\\S\\s])|)"
+  };
+  StringPiece group[6];
+  for (size_t i = 0; i < arraysize(nop_empties); i++)
+    ASSERT_TRUE(RE2(nop_empties[i]).Match("", 0, 0, RE2::UNANCHORED, group, 6));
+}
+
+// Test that named groups work correctly.
+TEST(Capture, NamedGroups) {
+  {
+    RE2 re("(hello world)");
+    ASSERT_EQ(re.NumberOfCapturingGroups(), 1);
+    const std::map<std::string, int>& m = re.NamedCapturingGroups();
+    ASSERT_EQ(m.size(), 0);
+  }
+
+  {
+    RE2 re("(?P<A>expr(?P<B>expr)(?P<C>expr))((expr)(?P<D>expr))");
+    ASSERT_EQ(re.NumberOfCapturingGroups(), 6);
+    const std::map<std::string, int>& m = re.NamedCapturingGroups();
+    ASSERT_EQ(m.size(), 4);
+    ASSERT_EQ(m.find("A")->second, 1);
+    ASSERT_EQ(m.find("B")->second, 2);
+    ASSERT_EQ(m.find("C")->second, 3);
+    ASSERT_EQ(m.find("D")->second, 6);  // $4 and $5 are anonymous
+  }
+}
+
+TEST(RE2, CapturedGroupTest) {
+  RE2 re("directions from (?P<S>.*) to (?P<D>.*)");
+  int num_groups = re.NumberOfCapturingGroups();
+  EXPECT_EQ(2, num_groups);
+  std::string args[4];
+  RE2::Arg arg0(&args[0]);
+  RE2::Arg arg1(&args[1]);
+  RE2::Arg arg2(&args[2]);
+  RE2::Arg arg3(&args[3]);
+
+  const RE2::Arg* const matches[4] = {&arg0, &arg1, &arg2, &arg3};
+  EXPECT_TRUE(RE2::FullMatchN("directions from mountain view to san jose",
+                              re, matches, num_groups));
+  const std::map<std::string, int>& named_groups = re.NamedCapturingGroups();
+  EXPECT_TRUE(named_groups.find("S") != named_groups.end());
+  EXPECT_TRUE(named_groups.find("D") != named_groups.end());
+
+  // The named group index is 1-based.
+  int source_group_index = named_groups.find("S")->second;
+  int destination_group_index = named_groups.find("D")->second;
+  EXPECT_EQ(1, source_group_index);
+  EXPECT_EQ(2, destination_group_index);
+
+  // The args is zero-based.
+  EXPECT_EQ("mountain view", args[source_group_index - 1]);
+  EXPECT_EQ("san jose", args[destination_group_index - 1]);
+}
+
+TEST(RE2, FullMatchWithNoArgs) {
+  ASSERT_TRUE(RE2::FullMatch("h", "h"));
+  ASSERT_TRUE(RE2::FullMatch("hello", "hello"));
+  ASSERT_TRUE(RE2::FullMatch("hello", "h.*o"));
+  ASSERT_FALSE(RE2::FullMatch("othello", "h.*o"));  // Must be anchored at front
+  ASSERT_FALSE(RE2::FullMatch("hello!", "h.*o"));   // Must be anchored at end
+}
+
+TEST(RE2, PartialMatch) {
+  ASSERT_TRUE(RE2::PartialMatch("x", "x"));
+  ASSERT_TRUE(RE2::PartialMatch("hello", "h.*o"));
+  ASSERT_TRUE(RE2::PartialMatch("othello", "h.*o"));
+  ASSERT_TRUE(RE2::PartialMatch("hello!", "h.*o"));
+  ASSERT_TRUE(RE2::PartialMatch("x", "((((((((((((((((((((x))))))))))))))))))))"));
+}
+
+TEST(RE2, PartialMatchN) {
+  RE2::Arg argv[2];
+  const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
+
+  // 0 arg
+  EXPECT_TRUE(RE2::PartialMatchN("hello", "e.*o", args, 0));
+  EXPECT_FALSE(RE2::PartialMatchN("othello", "a.*o", args, 0));
+
+  // 1 arg
+  int i;
+  argv[0] = &i;
+  EXPECT_TRUE(RE2::PartialMatchN("1001 nights", "(\\d+)", args, 1));
+  EXPECT_EQ(1001, i);
+  EXPECT_FALSE(RE2::PartialMatchN("three", "(\\d+)", args, 1));
+
+  // Multi-arg
+  std::string s;
+  argv[1] = &s;
+  EXPECT_TRUE(RE2::PartialMatchN("answer: 42:life", "(\\d+):(\\w+)", args, 2));
+  EXPECT_EQ(42, i);
+  EXPECT_EQ("life", s);
+  EXPECT_FALSE(RE2::PartialMatchN("hi1", "(\\w+)(1)", args, 2));
+}
+
+TEST(RE2, FullMatchZeroArg) {
+  // Zero-arg
+  ASSERT_TRUE(RE2::FullMatch("1001", "\\d+"));
+}
+
+TEST(RE2, FullMatchOneArg) {
+  int i;
+
+  // Single-arg
+  ASSERT_TRUE(RE2::FullMatch("1001", "(\\d+)",   &i));
+  ASSERT_EQ(i, 1001);
+  ASSERT_TRUE(RE2::FullMatch("-123", "(-?\\d+)", &i));
+  ASSERT_EQ(i, -123);
+  ASSERT_FALSE(RE2::FullMatch("10", "()\\d+", &i));
+  ASSERT_FALSE(
+      RE2::FullMatch("1234567890123456789012345678901234567890", "(\\d+)", &i));
+}
+
+TEST(RE2, FullMatchIntegerArg) {
+  int i;
+
+  // Digits surrounding integer-arg
+  ASSERT_TRUE(RE2::FullMatch("1234", "1(\\d*)4", &i));
+  ASSERT_EQ(i, 23);
+  ASSERT_TRUE(RE2::FullMatch("1234", "(\\d)\\d+", &i));
+  ASSERT_EQ(i, 1);
+  ASSERT_TRUE(RE2::FullMatch("-1234", "(-\\d)\\d+", &i));
+  ASSERT_EQ(i, -1);
+  ASSERT_TRUE(RE2::PartialMatch("1234", "(\\d)", &i));
+  ASSERT_EQ(i, 1);
+  ASSERT_TRUE(RE2::PartialMatch("-1234", "(-\\d)", &i));
+  ASSERT_EQ(i, -1);
+}
+
+TEST(RE2, FullMatchStringArg) {
+  std::string s;
+  // String-arg
+  ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", &s));
+  ASSERT_EQ(s, std::string("ell"));
+}
+
+TEST(RE2, FullMatchStringPieceArg) {
+  int i;
+  // StringPiece-arg
+  StringPiece sp;
+  ASSERT_TRUE(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &sp, &i));
+  ASSERT_EQ(sp.size(), 4);
+  ASSERT_TRUE(memcmp(sp.data(), "ruby", 4) == 0);
+  ASSERT_EQ(i, 1234);
+}
+
+TEST(RE2, FullMatchMultiArg) {
+  int i;
+  std::string s;
+  // Multi-arg
+  ASSERT_TRUE(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
+  ASSERT_EQ(s, std::string("ruby"));
+  ASSERT_EQ(i, 1234);
+}
+
+TEST(RE2, FullMatchN) {
+  RE2::Arg argv[2];
+  const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
+
+  // 0 arg
+  EXPECT_TRUE(RE2::FullMatchN("hello", "h.*o", args, 0));
+  EXPECT_FALSE(RE2::FullMatchN("othello", "h.*o", args, 0));
+
+  // 1 arg
+  int i;
+  argv[0] = &i;
+  EXPECT_TRUE(RE2::FullMatchN("1001", "(\\d+)", args, 1));
+  EXPECT_EQ(1001, i);
+  EXPECT_FALSE(RE2::FullMatchN("three", "(\\d+)", args, 1));
+
+  // Multi-arg
+  std::string s;
+  argv[1] = &s;
+  EXPECT_TRUE(RE2::FullMatchN("42:life", "(\\d+):(\\w+)", args, 2));
+  EXPECT_EQ(42, i);
+  EXPECT_EQ("life", s);
+  EXPECT_FALSE(RE2::FullMatchN("hi1", "(\\w+)(1)", args, 2));
+}
+
+TEST(RE2, FullMatchIgnoredArg) {
+  int i;
+  std::string s;
+
+  // Old-school NULL should be ignored.
+  ASSERT_TRUE(
+      RE2::FullMatch("ruby:1234", "(\\w+)(:)(\\d+)", &s, (void*)NULL, &i));
+  ASSERT_EQ(s, std::string("ruby"));
+  ASSERT_EQ(i, 1234);
+
+  // C++11 nullptr should also be ignored.
+  ASSERT_TRUE(RE2::FullMatch("rubz:1235", "(\\w+)(:)(\\d+)", &s, nullptr, &i));
+  ASSERT_EQ(s, std::string("rubz"));
+  ASSERT_EQ(i, 1235);
+}
+
+TEST(RE2, FullMatchTypedNullArg) {
+  std::string s;
+
+  // Ignore non-void* NULL arg
+  ASSERT_TRUE(RE2::FullMatch("hello", "he(.*)lo", (char*)NULL));
+  ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", (std::string*)NULL));
+  ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", (StringPiece*)NULL));
+  ASSERT_TRUE(RE2::FullMatch("1234", "(.*)", (int*)NULL));
+  ASSERT_TRUE(RE2::FullMatch("1234567890123456", "(.*)", (long long*)NULL));
+  ASSERT_TRUE(RE2::FullMatch("123.4567890123456", "(.*)", (double*)NULL));
+  ASSERT_TRUE(RE2::FullMatch("123.4567890123456", "(.*)", (float*)NULL));
+
+  // Fail on non-void* NULL arg if the match doesn't parse for the given type.
+  ASSERT_FALSE(RE2::FullMatch("hello", "h(.*)lo", &s, (char*)NULL));
+  ASSERT_FALSE(RE2::FullMatch("hello", "(.*)", (int*)NULL));
+  ASSERT_FALSE(RE2::FullMatch("1234567890123456", "(.*)", (int*)NULL));
+  ASSERT_FALSE(RE2::FullMatch("hello", "(.*)", (double*)NULL));
+  ASSERT_FALSE(RE2::FullMatch("hello", "(.*)", (float*)NULL));
+}
+
+// // Check that numeric parsing code does not read past the end of
+// // the number being parsed.
+// // This implementation requires mmap(2) et al. and thus cannot
+// // be used unless they are available.
+// TEST(RE2, NULTerminated) {
+// #if defined(_POSIX_MAPPED_FILES) && _POSIX_MAPPED_FILES > 0
+//   char *v;
+//   int x;
+//   long pagesize = sysconf(_SC_PAGE_SIZE);
+
+// #ifndef MAP_ANONYMOUS
+// #define MAP_ANONYMOUS MAP_ANON
+// #endif
+//   v = static_cast<char*>(mmap(NULL, 2*pagesize, PROT_READ|PROT_WRITE,
+//                               MAP_ANONYMOUS|MAP_PRIVATE, -1, 0));
+//   ASSERT_TRUE(v != reinterpret_cast<char*>(-1));
+//   LOG(INFO) << "Memory at " << (void*)v;
+//   ASSERT_EQ(munmap(v + pagesize, pagesize), 0) << " error " << errno;
+//   v[pagesize - 1] = '1';
+
+//   x = 0;
+//   ASSERT_TRUE(RE2::FullMatch(StringPiece(v + pagesize - 1, 1), "(.*)", &x));
+//   ASSERT_EQ(x, 1);
+// #endif
+// }
+
+TEST(RE2, FullMatchTypeTests) {
+  // Type tests
+  std::string zeros(1000, '0');
+  {
+    char c;
+    ASSERT_TRUE(RE2::FullMatch("Hello", "(H)ello", &c));
+    ASSERT_EQ(c, 'H');
+  }
+  {
+    unsigned char c;
+    ASSERT_TRUE(RE2::FullMatch("Hello", "(H)ello", &c));
+    ASSERT_EQ(c, static_cast<unsigned char>('H'));
+  }
+  {
+    int16_t v;
+    ASSERT_TRUE(RE2::FullMatch("100",     "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
+    ASSERT_TRUE(RE2::FullMatch("-100",    "(-?\\d+)", &v)); ASSERT_EQ(v, -100);
+    ASSERT_TRUE(RE2::FullMatch("32767",   "(-?\\d+)", &v)); ASSERT_EQ(v, 32767);
+    ASSERT_TRUE(RE2::FullMatch("-32768",  "(-?\\d+)", &v)); ASSERT_EQ(v, -32768);
+    ASSERT_FALSE(RE2::FullMatch("-32769", "(-?\\d+)", &v));
+    ASSERT_FALSE(RE2::FullMatch("32768",  "(-?\\d+)", &v));
+  }
+  {
+    uint16_t v;
+    ASSERT_TRUE(RE2::FullMatch("100",    "(\\d+)", &v)); ASSERT_EQ(v, 100);
+    ASSERT_TRUE(RE2::FullMatch("32767",  "(\\d+)", &v)); ASSERT_EQ(v, 32767);
+    ASSERT_TRUE(RE2::FullMatch("65535",  "(\\d+)", &v)); ASSERT_EQ(v, 65535);
+    ASSERT_FALSE(RE2::FullMatch("65536", "(\\d+)", &v));
+  }
+  {
+    int32_t v;
+    static const int32_t max = INT32_C(0x7fffffff);
+    static const int32_t min = -max - 1;
+    ASSERT_TRUE(RE2::FullMatch("100",          "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
+    ASSERT_TRUE(RE2::FullMatch("-100",         "(-?\\d+)", &v)); ASSERT_EQ(v, -100);
+    ASSERT_TRUE(RE2::FullMatch("2147483647",   "(-?\\d+)", &v)); ASSERT_EQ(v, max);
+    ASSERT_TRUE(RE2::FullMatch("-2147483648",  "(-?\\d+)", &v)); ASSERT_EQ(v, min);
+    ASSERT_FALSE(RE2::FullMatch("-2147483649", "(-?\\d+)", &v));
+    ASSERT_FALSE(RE2::FullMatch("2147483648",  "(-?\\d+)", &v));
+
+    ASSERT_TRUE(RE2::FullMatch(zeros + "2147483647", "(-?\\d+)", &v));
+    ASSERT_EQ(v, max);
+    ASSERT_TRUE(RE2::FullMatch("-" + zeros + "2147483648", "(-?\\d+)", &v));
+    ASSERT_EQ(v, min);
+
+    ASSERT_FALSE(RE2::FullMatch("-" + zeros + "2147483649", "(-?\\d+)", &v));
+    ASSERT_TRUE(RE2::FullMatch("0x7fffffff", "(.*)", RE2::CRadix(&v)));
+    ASSERT_EQ(v, max);
+    ASSERT_FALSE(RE2::FullMatch("000x7fffffff", "(.*)", RE2::CRadix(&v)));
+  }
+  {
+    uint32_t v;
+    static const uint32_t max = UINT32_C(0xffffffff);
+    ASSERT_TRUE(RE2::FullMatch("100",         "(\\d+)", &v)); ASSERT_EQ(v, 100);
+    ASSERT_TRUE(RE2::FullMatch("4294967295",  "(\\d+)", &v)); ASSERT_EQ(v, max);
+    ASSERT_FALSE(RE2::FullMatch("4294967296", "(\\d+)", &v));
+    ASSERT_FALSE(RE2::FullMatch("-1",         "(\\d+)", &v));
+
+    ASSERT_TRUE(RE2::FullMatch(zeros + "4294967295", "(\\d+)", &v)); ASSERT_EQ(v, max);
+  }
+  {
+    int64_t v;
+    static const int64_t max = INT64_C(0x7fffffffffffffff);
+    static const int64_t min = -max - 1;
+    std::string str;
+
+    ASSERT_TRUE(RE2::FullMatch("100",  "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
+    ASSERT_TRUE(RE2::FullMatch("-100", "(-?\\d+)", &v)); ASSERT_EQ(v, -100);
+
+    str = std::to_string(max);
+    ASSERT_TRUE(RE2::FullMatch(str,    "(-?\\d+)", &v)); ASSERT_EQ(v, max);
+
+    str = std::to_string(min);
+    ASSERT_TRUE(RE2::FullMatch(str,    "(-?\\d+)", &v)); ASSERT_EQ(v, min);
+
+    str = std::to_string(max);
+    ASSERT_NE(str.back(), '9');
+    str.back()++;
+    ASSERT_FALSE(RE2::FullMatch(str,   "(-?\\d+)", &v));
+
+    str = std::to_string(min);
+    ASSERT_NE(str.back(), '9');
+    str.back()++;
+    ASSERT_FALSE(RE2::FullMatch(str,   "(-?\\d+)", &v));
+  }
+  {
+    uint64_t v;
+    int64_t v2;
+    static const uint64_t max = UINT64_C(0xffffffffffffffff);
+    std::string str;
+
+    ASSERT_TRUE(RE2::FullMatch("100",  "(-?\\d+)", &v));  ASSERT_EQ(v, 100);
+    ASSERT_TRUE(RE2::FullMatch("-100", "(-?\\d+)", &v2)); ASSERT_EQ(v2, -100);
+
+    str = std::to_string(max);
+    ASSERT_TRUE(RE2::FullMatch(str,    "(-?\\d+)", &v)); ASSERT_EQ(v, max);
+
+    ASSERT_NE(str.back(), '9');
+    str.back()++;
+    ASSERT_FALSE(RE2::FullMatch(str,   "(-?\\d+)", &v));
+  }
+}
+
+TEST(RE2, FloatingPointFullMatchTypes) {
+  std::string zeros(1000, '0');
+  {
+    float v;
+    ASSERT_TRUE(RE2::FullMatch("100",   "(.*)", &v)); ASSERT_EQ(v, 100);
+    ASSERT_TRUE(RE2::FullMatch("-100.", "(.*)", &v)); ASSERT_EQ(v, -100);
+    ASSERT_TRUE(RE2::FullMatch("1e23",  "(.*)", &v)); ASSERT_EQ(v, float(1e23));
+    ASSERT_TRUE(RE2::FullMatch(" 100",  "(.*)", &v)); ASSERT_EQ(v, 100);
+
+    ASSERT_TRUE(RE2::FullMatch(zeros + "1e23",  "(.*)", &v));
+    ASSERT_EQ(v, float(1e23));
+
+    // 6700000000081920.1 is an edge case.
+    // 6700000000081920 is exactly halfway between
+    // two float32s, so the .1 should make it round up.
+    // However, the .1 is outside the precision possible with
+    // a float64: the nearest float64 is 6700000000081920.
+    // So if the code uses strtod and then converts to float32,
+    // round-to-even will make it round down instead of up.
+    // To pass the test, the parser must call strtof directly.
+    // This test case is carefully chosen to use only a 17-digit
+    // number, since C does not guarantee to get the correctly
+    // rounded answer for strtod and strtof unless the input is
+    // short.
+    //
+    // This is known to fail on Cygwin and MinGW due to a broken
+    // implementation of strtof(3). And apparently MSVC too. Sigh.
+#if !defined(_MSC_VER) && !defined(__CYGWIN__) && !defined(__MINGW32__)
+    ASSERT_TRUE(RE2::FullMatch("0.1", "(.*)", &v));
+    ASSERT_EQ(v, 0.1f) << StringPrintf("%.8g != %.8g", v, 0.1f);
+    ASSERT_TRUE(RE2::FullMatch("6700000000081920.1", "(.*)", &v));
+    ASSERT_EQ(v, 6700000000081920.1f)
+      << StringPrintf("%.8g != %.8g", v, 6700000000081920.1f);
+#endif
+  }
+  {
+    double v;
+    ASSERT_TRUE(RE2::FullMatch("100",   "(.*)", &v)); ASSERT_EQ(v, 100);
+    ASSERT_TRUE(RE2::FullMatch("-100.", "(.*)", &v)); ASSERT_EQ(v, -100);
+    ASSERT_TRUE(RE2::FullMatch("1e23",  "(.*)", &v)); ASSERT_EQ(v, 1e23);
+    ASSERT_TRUE(RE2::FullMatch(zeros + "1e23", "(.*)", &v));
+    ASSERT_EQ(v, double(1e23));
+
+    ASSERT_TRUE(RE2::FullMatch("0.1", "(.*)", &v));
+    ASSERT_EQ(v, 0.1) << StringPrintf("%.17g != %.17g", v, 0.1);
+    ASSERT_TRUE(RE2::FullMatch("1.00000005960464485", "(.*)", &v));
+    ASSERT_EQ(v, 1.0000000596046448)
+      << StringPrintf("%.17g != %.17g", v, 1.0000000596046448);
+  }
+}
+
+TEST(RE2, FullMatchAnchored) {
+  int i;
+  // Check that matching is fully anchored
+  ASSERT_FALSE(RE2::FullMatch("x1001", "(\\d+)",  &i));
+  ASSERT_FALSE(RE2::FullMatch("1001x", "(\\d+)",  &i));
+  ASSERT_TRUE(RE2::FullMatch("x1001",  "x(\\d+)", &i)); ASSERT_EQ(i, 1001);
+  ASSERT_TRUE(RE2::FullMatch("1001x",  "(\\d+)x", &i)); ASSERT_EQ(i, 1001);
+}
+
+TEST(RE2, FullMatchBraces) {
+  // Braces
+  ASSERT_TRUE(RE2::FullMatch("0abcd",  "[0-9a-f+.-]{5,}"));
+  ASSERT_TRUE(RE2::FullMatch("0abcde", "[0-9a-f+.-]{5,}"));
+  ASSERT_FALSE(RE2::FullMatch("0abc",  "[0-9a-f+.-]{5,}"));
+}
+
+TEST(RE2, Complicated) {
+  // Complicated RE2
+  ASSERT_TRUE(RE2::FullMatch("foo", "foo|bar|[A-Z]"));
+  ASSERT_TRUE(RE2::FullMatch("bar", "foo|bar|[A-Z]"));
+  ASSERT_TRUE(RE2::FullMatch("X",   "foo|bar|[A-Z]"));
+  ASSERT_FALSE(RE2::FullMatch("XY", "foo|bar|[A-Z]"));
+}
+
+// TEST(RE2, FullMatchEnd) {
+//   // Check full-match handling (needs '$' tacked on internally)
+//   ASSERT_TRUE(RE2::FullMatch("fo", "fo|foo"));
+//   ASSERT_TRUE(RE2::FullMatch("foo", "fo|foo"));
+//   ASSERT_TRUE(RE2::FullMatch("fo", "fo|foo$"));
+//   ASSERT_TRUE(RE2::FullMatch("foo", "fo|foo$"));
+//   ASSERT_TRUE(RE2::FullMatch("foo", "foo$"));
+//   ASSERT_FALSE(RE2::FullMatch("foo$bar", "foo\\$"));
+//   ASSERT_FALSE(RE2::FullMatch("fox", "fo|bar"));
+
+//   // Uncomment the following if we change the handling of '$' to
+//   // prevent it from matching a trailing newline
+//   if (false) {
+//     // Check that we don't get bitten by pcre's special handling of a
+//     // '\n' at the end of the string matching '$'
+//     ASSERT_FALSE(RE2::PartialMatch("foo\n", "foo$"));
+//   }
+// }
+
+TEST(RE2, FullMatchArgCount) {
+  // Number of args
+  int a[16];
+  ASSERT_TRUE(RE2::FullMatch("", ""));
+
+  memset(a, 0, sizeof(0));
+  ASSERT_TRUE(RE2::FullMatch("1", "(\\d){1}", &a[0]));
+  ASSERT_EQ(a[0], 1);
+
+  memset(a, 0, sizeof(0));
+  ASSERT_TRUE(RE2::FullMatch("12", "(\\d)(\\d)", &a[0], &a[1]));
+  ASSERT_EQ(a[0], 1);
+  ASSERT_EQ(a[1], 2);
+
+  memset(a, 0, sizeof(0));
+  ASSERT_TRUE(RE2::FullMatch("123", "(\\d)(\\d)(\\d)", &a[0], &a[1], &a[2]));
+  ASSERT_EQ(a[0], 1);
+  ASSERT_EQ(a[1], 2);
+  ASSERT_EQ(a[2], 3);
+
+  memset(a, 0, sizeof(0));
+  ASSERT_TRUE(RE2::FullMatch("1234", "(\\d)(\\d)(\\d)(\\d)", &a[0], &a[1],
+                             &a[2], &a[3]));
+  ASSERT_EQ(a[0], 1);
+  ASSERT_EQ(a[1], 2);
+  ASSERT_EQ(a[2], 3);
+  ASSERT_EQ(a[3], 4);
+
+  memset(a, 0, sizeof(0));
+  ASSERT_TRUE(RE2::FullMatch("12345", "(\\d)(\\d)(\\d)(\\d)(\\d)", &a[0], &a[1],
+                             &a[2], &a[3], &a[4]));
+  ASSERT_EQ(a[0], 1);
+  ASSERT_EQ(a[1], 2);
+  ASSERT_EQ(a[2], 3);
+  ASSERT_EQ(a[3], 4);
+  ASSERT_EQ(a[4], 5);
+
+  memset(a, 0, sizeof(0));
+  ASSERT_TRUE(RE2::FullMatch("123456", "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)", &a[0],
+                             &a[1], &a[2], &a[3], &a[4], &a[5]));
+  ASSERT_EQ(a[0], 1);
+  ASSERT_EQ(a[1], 2);
+  ASSERT_EQ(a[2], 3);
+  ASSERT_EQ(a[3], 4);
+  ASSERT_EQ(a[4], 5);
+  ASSERT_EQ(a[5], 6);
+
+  memset(a, 0, sizeof(0));
+  ASSERT_TRUE(RE2::FullMatch("1234567", "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
+                             &a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6]));
+  ASSERT_EQ(a[0], 1);
+  ASSERT_EQ(a[1], 2);
+  ASSERT_EQ(a[2], 3);
+  ASSERT_EQ(a[3], 4);
+  ASSERT_EQ(a[4], 5);
+  ASSERT_EQ(a[5], 6);
+  ASSERT_EQ(a[6], 7);
+
+  memset(a, 0, sizeof(0));
+  ASSERT_TRUE(RE2::FullMatch("1234567890123456",
+                             "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
+                             "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
+                             &a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6],
+                             &a[7], &a[8], &a[9], &a[10], &a[11], &a[12],
+                             &a[13], &a[14], &a[15]));
+  ASSERT_EQ(a[0], 1);
+  ASSERT_EQ(a[1], 2);
+  ASSERT_EQ(a[2], 3);
+  ASSERT_EQ(a[3], 4);
+  ASSERT_EQ(a[4], 5);
+  ASSERT_EQ(a[5], 6);
+  ASSERT_EQ(a[6], 7);
+  ASSERT_EQ(a[7], 8);
+  ASSERT_EQ(a[8], 9);
+  ASSERT_EQ(a[9], 0);
+  ASSERT_EQ(a[10], 1);
+  ASSERT_EQ(a[11], 2);
+  ASSERT_EQ(a[12], 3);
+  ASSERT_EQ(a[13], 4);
+  ASSERT_EQ(a[14], 5);
+  ASSERT_EQ(a[15], 6);
+}
+
+TEST(RE2, Accessors) {
+  // Check the pattern() accessor
+  {
+    const std::string kPattern = "http://([^/]+)/.*";
+    const RE2 re(kPattern);
+    ASSERT_EQ(kPattern, re.pattern());
+  }
+
+  // Check RE2 error field.
+  {
+    RE2 re("foo");
+    ASSERT_TRUE(re.error().empty());  // Must have no error
+    ASSERT_TRUE(re.ok());
+    ASSERT_EQ(re.error_code(), RE2::NoError);
+  }
+}
+
+// TEST(RE2, UTF8) {
+//   // Check UTF-8 handling
+//   // Three Japanese characters (nihongo)
+//   const char utf8_string[] = {
+//        (char)0xe6, (char)0x97, (char)0xa5, // 65e5
+//        (char)0xe6, (char)0x9c, (char)0xac, // 627c
+//        (char)0xe8, (char)0xaa, (char)0x9e, // 8a9e
+//        0
+//   };
+//   const char utf8_pattern[] = {
+//        '.',
+//        (char)0xe6, (char)0x9c, (char)0xac, // 627c
+//        '.',
+//        0
+//   };
+
+//   // Both should match in either mode, bytes or UTF-8
+//   RE2 re_test1(".........", RE2::Latin1);
+//   ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test1));
+//   RE2 re_test2("...");
+//   ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test2));
+
+//   // Check that '.' matches one byte or UTF-8 character
+//   // according to the mode.
+//   std::string s;
+//   RE2 re_test3("(.)", RE2::Latin1);
+//   ASSERT_TRUE(RE2::PartialMatch(utf8_string, re_test3, &s));
+//   ASSERT_EQ(s, std::string("\xe6"));
+//   RE2 re_test4("(.)");
+//   ASSERT_TRUE(RE2::PartialMatch(utf8_string, re_test4, &s));
+//   ASSERT_EQ(s, std::string("\xe6\x97\xa5"));
+
+//   // Check that string matches itself in either mode
+//   RE2 re_test5(utf8_string, RE2::Latin1);
+//   ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test5));
+//   RE2 re_test6(utf8_string);
+//   ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test6));
+
+//   // Check that pattern matches string only in UTF8 mode
+//   RE2 re_test7(utf8_pattern, RE2::Latin1);
+//   ASSERT_FALSE(RE2::FullMatch(utf8_string, re_test7));
+//   RE2 re_test8(utf8_pattern);
+//   ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test8));
+// }
+
+TEST(RE2, UngreedyUTF8) {
+  // Check that ungreedy, UTF8 regular expressions don't match when they
+  // oughtn't -- see bug 82246.
+  {
+    // This code always worked.
+    const char* pattern = "\\w+X";
+    const std::string target = "a aX";
+    RE2 match_sentence(pattern, RE2::Latin1);
+    RE2 match_sentence_re(pattern);
+
+    ASSERT_FALSE(RE2::FullMatch(target, match_sentence));
+    ASSERT_FALSE(RE2::FullMatch(target, match_sentence_re));
+  }
+  {
+    const char* pattern = "(?U)\\w+X";
+    const std::string target = "a aX";
+    RE2 match_sentence(pattern, RE2::Latin1);
+    ASSERT_EQ(match_sentence.error(), "");
+    RE2 match_sentence_re(pattern);
+
+    ASSERT_FALSE(RE2::FullMatch(target, match_sentence));
+    ASSERT_FALSE(RE2::FullMatch(target, match_sentence_re));
+  }
+}
+
+// TEST(RE2, Rejects) {
+//   {
+//     RE2 re("a\\1", RE2::Quiet);
+//     ASSERT_FALSE(re.ok()); }
+//   {
+//     RE2 re("a[x", RE2::Quiet);
+//     ASSERT_FALSE(re.ok());
+//   }
+//   {
+//     RE2 re("a[z-a]", RE2::Quiet);
+//     ASSERT_FALSE(re.ok());
+//   }
+//   {
+//     RE2 re("a[[:foobar:]]", RE2::Quiet);
+//     ASSERT_FALSE(re.ok());
+//   }
+//   {
+//     RE2 re("a(b", RE2::Quiet);
+//     ASSERT_FALSE(re.ok());
+//   }
+//   {
+//     RE2 re("a\\", RE2::Quiet);
+//     ASSERT_FALSE(re.ok());
+//   }
+// }
+
+TEST(RE2, NoCrash) {
+  // Test that using a bad regexp doesn't crash.
+  {
+    RE2 re("a\\", RE2::Quiet);
+    ASSERT_FALSE(re.ok());
+    ASSERT_FALSE(RE2::PartialMatch("a\\b", re));
+  }
+
+  // Test that using an enormous regexp doesn't crash
+  {
+    RE2 re("(((.{100}){100}){100}){100}", RE2::Quiet);
+    ASSERT_FALSE(re.ok());
+    ASSERT_FALSE(RE2::PartialMatch("aaa", re));
+  }
+
+  // Test that a crazy regexp still compiles and runs.
+  {
+    RE2 re(".{512}x", RE2::Quiet);
+    ASSERT_TRUE(re.ok());
+    std::string s;
+    s.append(515, 'c');
+    s.append("x");
+    ASSERT_TRUE(RE2::PartialMatch(s, re));
+  }
+}
+
+TEST(RE2, Recursion) {
+  // Test that recursion is stopped.
+  // This test is PCRE-legacy -- there's no recursion in RE2.
+  int bytes = 15 * 1024;  // enough to crash PCRE
+  TestRecursion(bytes, ".");
+  TestRecursion(bytes, "a");
+  TestRecursion(bytes, "a.");
+  TestRecursion(bytes, "ab.");
+  TestRecursion(bytes, "abc.");
+}
+
+TEST(RE2, BigCountedRepetition) {
+  // Test that counted repetition works, given tons of memory.
+  RE2::Options opt;
+  opt.set_max_mem(256<<20);
+
+  RE2 re(".{512}x", opt);
+  ASSERT_TRUE(re.ok());
+  std::string s;
+  s.append(515, 'c');
+  s.append("x");
+  ASSERT_TRUE(RE2::PartialMatch(s, re));
+}
+
+TEST(RE2, DeepRecursion) {
+  // Test for deep stack recursion.  This would fail with a
+  // segmentation violation due to stack overflow before pcre was
+  // patched.
+  // Again, a PCRE legacy test.  RE2 doesn't recurse.
+  std::string comment("x*");
+  std::string a(131072, 'a');
+  comment += a;
+  comment += "*x";
+  RE2 re("((?:\\s|xx.*\n|x[*](?:\n|.)*?[*]x)*)");
+  ASSERT_TRUE(RE2::FullMatch(comment, re));
+}
+
+// Suggested by Josh Hyman.  Failed when SearchOnePass was
+// not implementing case-folding.
+TEST(CaseInsensitive, MatchAndConsume) {
+  std::string text = "A fish named *Wanda*";
+  StringPiece sp(text);
+  StringPiece result;
+  EXPECT_TRUE(RE2::PartialMatch(text, "(?i)([wand]{5})", &result));
+  EXPECT_TRUE(RE2::FindAndConsume(&sp, "(?i)([wand]{5})", &result));
+}
+
+// RE2 should permit implicit conversions from string, StringPiece, const char*,
+// and C string literals.
+TEST(RE2, ImplicitConversions) {
+  std::string re_string(".");
+  StringPiece re_stringpiece(".");
+  const char* re_cstring = ".";
+  EXPECT_TRUE(RE2::PartialMatch("e", re_string));
+  EXPECT_TRUE(RE2::PartialMatch("e", re_stringpiece));
+  EXPECT_TRUE(RE2::PartialMatch("e", re_cstring));
+  EXPECT_TRUE(RE2::PartialMatch("e", "."));
+}
+
+// Bugs introduced by 8622304
+TEST(RE2, CL8622304) {
+  // reported by ingow
+  std::string dir;
+  EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])"));  // ok
+  EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])", &dir));  // fails
+
+  // reported by jacobsa
+  std::string key, val;
+  EXPECT_TRUE(RE2::PartialMatch("bar:1,0x2F,030,4,5;baz:true;fooby:false,true",
+              "(\\w+)(?::((?:[^;\\\\]|\\\\.)*))?;?",
+              &key,
+              &val));
+  EXPECT_EQ(key, "bar");
+  EXPECT_EQ(val, "1,0x2F,030,4,5");
+}
+
+// // Check that RE2 returns correct regexp pieces on error.
+// // In particular, make sure it returns whole runes
+// // and that it always reports invalid UTF-8.
+// // Also check that Perl error flag piece is big enough.
+// static struct ErrorTest {
+//   const char *regexp;
+//   RE2::ErrorCode error_code;
+//   const char *error_arg;
+// } error_tests[] = {
+//   { "ab\\αcd", RE2::ErrorBadEscape, "\\α" },
+//   { "ef\\x☺01", RE2::ErrorBadEscape, "\\x☺0" },
+//   { "gh\\x1☺01", RE2::ErrorBadEscape, "\\x1☺" },
+//   { "ij\\x1", RE2::ErrorBadEscape, "\\x1" },
+//   { "kl\\x", RE2::ErrorBadEscape, "\\x" },
+//   { "uv\\x{0000☺}", RE2::ErrorBadEscape, "\\x{0000☺" },
+//   { "wx\\p{ABC", RE2::ErrorBadCharRange, "\\p{ABC" },
+//   // used to return (?s but the error is X
+//   { "yz(?smiUX:abc)", RE2::ErrorBadPerlOp, "(?smiUX" },
+//   { "aa(?sm☺i", RE2::ErrorBadPerlOp, "(?sm☺" },
+//   { "bb[abc", RE2::ErrorMissingBracket, "[abc" },
+//   { "abc(def", RE2::ErrorMissingParen, "abc(def" },
+//   { "abc)def", RE2::ErrorUnexpectedParen, "abc)def" },
+
+//   // no argument string returned for invalid UTF-8
+//   { "mn\\x1\377", RE2::ErrorBadUTF8, "" },
+//   { "op\377qr", RE2::ErrorBadUTF8, "" },
+//   { "st\\x{00000\377", RE2::ErrorBadUTF8, "" },
+//   { "zz\\p{\377}", RE2::ErrorBadUTF8, "" },
+//   { "zz\\x{00\377}", RE2::ErrorBadUTF8, "" },
+//   { "zz(?P<name\377>abc)", RE2::ErrorBadUTF8, "" },
+// };
+// TEST(RE2, ErrorCodeAndArg) {
+//   for (size_t i = 0; i < arraysize(error_tests); i++) {
+//     RE2 re(error_tests[i].regexp, RE2::Quiet);
+//     EXPECT_FALSE(re.ok());
+//     EXPECT_EQ(re.error_code(), error_tests[i].error_code) << re.error();
+//     EXPECT_EQ(re.error_arg(), error_tests[i].error_arg) << re.error();
+//   }
+// }
+
+// // Check that "never match \n" mode never matches \n.
+// static struct NeverTest {
+//   const char* regexp;
+//   const char* text;
+//   const char* match;
+// } never_tests[] = {
+//   { "(.*)", "abc\ndef\nghi\n", "abc" },
+//   { "(?s)(abc.*def)", "abc\ndef\n", NULL },
+//   { "(abc(.|\n)*def)", "abc\ndef\n", NULL },
+//   { "(abc[^x]*def)", "abc\ndef\n", NULL },
+//   { "(abc[^x]*def)", "abczzzdef\ndef\n", "abczzzdef" },
+// };
+// TEST(RE2, NeverNewline) {
+//   RE2::Options opt;
+//   opt.set_never_nl(true);
+//   for (size_t i = 0; i < arraysize(never_tests); i++) {
+//     const NeverTest& t = never_tests[i];
+//     RE2 re(t.regexp, opt);
+//     if (t.match == NULL) {
+//       EXPECT_FALSE(re.PartialMatch(t.text, re));
+//     } else {
+//       StringPiece m;
+//       EXPECT_TRUE(re.PartialMatch(t.text, re, &m));
+//       EXPECT_EQ(m, t.match);
+//     }
+//   }
+// }
+
+// // Check that dot_nl option works.
+// TEST(RE2, DotNL) {
+//   RE2::Options opt;
+//   opt.set_dot_nl(true);
+//   EXPECT_TRUE(RE2::PartialMatch("\n", RE2(".", opt)));
+//   EXPECT_FALSE(RE2::PartialMatch("\n", RE2("(?-s).", opt)));
+//   opt.set_never_nl(true);
+//   EXPECT_FALSE(RE2::PartialMatch("\n", RE2(".", opt)));
+// }
+
+// // Check that there are no capturing groups in "never capture" mode.
+// TEST(RE2, NeverCapture) {
+//   RE2::Options opt;
+//   opt.set_never_capture(true);
+//   RE2 re("(r)(e)", opt);
+//   EXPECT_EQ(0, re.NumberOfCapturingGroups());
+// }
+
+// Bitstate bug was looking at submatch[0] even if nsubmatch == 0.
+// Triggered by a failed DFA search falling back to Bitstate when
+// using Match with a NULL submatch set.  Bitstate tried to read
+// the submatch[0] entry even if nsubmatch was 0.
+TEST(RE2, BitstateCaptureBug) {
+  RE2::Options opt;
+  opt.set_max_mem(20000);
+  RE2 re("(_________$)", opt);
+  StringPiece s = "xxxxxxxxxxxxxxxxxxxxxxxxxx_________x";
+  EXPECT_FALSE(re.Match(s, 0, s.size(), RE2::UNANCHORED, NULL, 0));
+}
+
+// C++ version of bug 609710.
+TEST(RE2, UnicodeClasses) {
+  const std::string str = "ABCDEFGHI譚永鋒";
+  std::string a, b, c;
+
+  EXPECT_TRUE(RE2::FullMatch("A", "\\p{L}"));
+  EXPECT_TRUE(RE2::FullMatch("A", "\\p{Lu}"));
+  EXPECT_FALSE(RE2::FullMatch("A", "\\p{Ll}"));
+  EXPECT_FALSE(RE2::FullMatch("A", "\\P{L}"));
+  EXPECT_FALSE(RE2::FullMatch("A", "\\P{Lu}"));
+  EXPECT_TRUE(RE2::FullMatch("A", "\\P{Ll}"));
+
+  EXPECT_TRUE(RE2::FullMatch("譚", "\\p{L}"));
+  EXPECT_FALSE(RE2::FullMatch("譚", "\\p{Lu}"));
+  EXPECT_FALSE(RE2::FullMatch("譚", "\\p{Ll}"));
+  EXPECT_FALSE(RE2::FullMatch("譚", "\\P{L}"));
+  EXPECT_TRUE(RE2::FullMatch("譚", "\\P{Lu}"));
+  EXPECT_TRUE(RE2::FullMatch("譚", "\\P{Ll}"));
+
+  EXPECT_TRUE(RE2::FullMatch("永", "\\p{L}"));
+  EXPECT_FALSE(RE2::FullMatch("永", "\\p{Lu}"));
+  EXPECT_FALSE(RE2::FullMatch("永", "\\p{Ll}"));
+  EXPECT_FALSE(RE2::FullMatch("永", "\\P{L}"));
+  EXPECT_TRUE(RE2::FullMatch("永", "\\P{Lu}"));
+  EXPECT_TRUE(RE2::FullMatch("永", "\\P{Ll}"));
+
+  EXPECT_TRUE(RE2::FullMatch("鋒", "\\p{L}"));
+  EXPECT_FALSE(RE2::FullMatch("鋒", "\\p{Lu}"));
+  EXPECT_FALSE(RE2::FullMatch("鋒", "\\p{Ll}"));
+  EXPECT_FALSE(RE2::FullMatch("鋒", "\\P{L}"));
+  EXPECT_TRUE(RE2::FullMatch("鋒", "\\P{Lu}"));
+  EXPECT_TRUE(RE2::FullMatch("鋒", "\\P{Ll}"));
+
+  EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?(.).*?(.)", &a, &b, &c));
+  EXPECT_EQ("A", a);
+  EXPECT_EQ("B", b);
+  EXPECT_EQ("C", c);
+
+  EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?([\\p{L}]).*?(.)", &a, &b, &c));
+  EXPECT_EQ("A", a);
+  EXPECT_EQ("B", b);
+  EXPECT_EQ("C", c);
+
+  EXPECT_FALSE(RE2::PartialMatch(str, "\\P{L}"));
+
+  EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?([\\p{Lu}]).*?(.)", &a, &b, &c));
+  EXPECT_EQ("A", a);
+  EXPECT_EQ("B", b);
+  EXPECT_EQ("C", c);
+
+  EXPECT_FALSE(RE2::PartialMatch(str, "[^\\p{Lu}\\p{Lo}]"));
+
+  EXPECT_TRUE(RE2::PartialMatch(str, ".*(.).*?([\\p{Lu}\\p{Lo}]).*?(.)", &a, &b, &c));
+  EXPECT_EQ("譚", a);
+  EXPECT_EQ("永", b);
+  EXPECT_EQ("鋒", c);
+}
+
+TEST(RE2, LazyRE2) {
+  // Test with and without options.
+  static LazyRE2 a = {"a"};
+  static LazyRE2 b = {"b", RE2::Latin1};
+
+  EXPECT_EQ("a", a->pattern());
+  EXPECT_EQ(RE2::Options::EncodingUTF8, a->options().encoding());
+
+  EXPECT_EQ("b", b->pattern());
+  EXPECT_EQ(RE2::Options::EncodingLatin1, b->options().encoding());
+}
+
+// Bug reported by saito. 2009/02/17
+TEST(RE2, NullVsEmptyString) {
+  RE2 re(".*");
+  EXPECT_TRUE(re.ok());
+
+  StringPiece null;
+  EXPECT_TRUE(RE2::FullMatch(null, re));
+
+  StringPiece empty("");
+  EXPECT_TRUE(RE2::FullMatch(empty, re));
+}
+
+// // Similar to the previous test, check that the null string and the empty
+// // string both match, but also that the null string can only provide null
+// // submatches whereas the empty string can also provide empty submatches.
+// TEST(RE2, NullVsEmptyStringSubmatches) {
+//   RE2 re("()|(foo)");
+//   EXPECT_TRUE(re.ok());
+
+//   // matches[0] is overall match, [1] is (), [2] is (foo), [3] is nonexistent.
+//   StringPiece matches[4];
+
+//   for (size_t i = 0; i < arraysize(matches); i++)
+//     matches[i] = "bar";
+
+//   StringPiece null;
+//   EXPECT_TRUE(re.Match(null, 0, null.size(), RE2::UNANCHORED,
+//                        matches, arraysize(matches)));
+//   for (size_t i = 0; i < arraysize(matches); i++) {
+//     EXPECT_TRUE(matches[i].data() == NULL);  // always null
+//     EXPECT_TRUE(matches[i].empty());
+//   }
+
+//   for (size_t i = 0; i < arraysize(matches); i++)
+//     matches[i] = "bar";
+
+//   StringPiece empty("");
+//   EXPECT_TRUE(re.Match(empty, 0, empty.size(), RE2::UNANCHORED,
+//                        matches, arraysize(matches)));
+//   EXPECT_TRUE(matches[0].data() != NULL);  // empty, not null
+//   EXPECT_TRUE(matches[0].empty());
+//   EXPECT_TRUE(matches[1].data() != NULL);  // empty, not null
+//   EXPECT_TRUE(matches[1].empty());
+//   EXPECT_TRUE(matches[2].data() == NULL);
+//   EXPECT_TRUE(matches[2].empty());
+//   EXPECT_TRUE(matches[3].data() == NULL);
+//   EXPECT_TRUE(matches[3].empty());
+// }
+
+// Issue 1816809
+TEST(RE2, Bug1816809) {
+  RE2 re("(((((llx((-3)|(4)))(;(llx((-3)|(4))))*))))");
+  StringPiece piece("llx-3;llx4");
+  std::string x;
+  EXPECT_TRUE(RE2::Consume(&piece, re, &x));
+}
+
+// Issue 3061120
+TEST(RE2, Bug3061120) {
+  RE2 re("(?i)\\W");
+  EXPECT_FALSE(RE2::PartialMatch("x", re));  // always worked
+  EXPECT_FALSE(RE2::PartialMatch("k", re));  // broke because of kelvin
+  EXPECT_FALSE(RE2::PartialMatch("s", re));  // broke because of latin long s
+}
+
+// TEST(RE2, CapturingGroupNames) {
+//   // Opening parentheses annotated with group IDs:
+//   //      12    3        45   6         7
+//   RE2 re("((abc)(?P<G2>)|((e+)(?P<G2>.*)(?P<G1>u+)))");
+//   EXPECT_TRUE(re.ok());
+//   const std::map<int, std::string>& have = re.CapturingGroupNames();
+//   std::map<int, std::string> want;
+//   want[3] = "G2";
+//   want[6] = "G2";
+//   want[7] = "G1";
+//   EXPECT_EQ(want, have);
+// }
+
+// TEST(RE2, RegexpToStringLossOfAnchor) {
+//   EXPECT_EQ(RE2("^[a-c]at", RE2::POSIX).Regexp()->ToString(), "^[a-c]at");
+//   EXPECT_EQ(RE2("^[a-c]at").Regexp()->ToString(), "(?-m:^)[a-c]at");
+//   EXPECT_EQ(RE2("ca[t-z]$", RE2::POSIX).Regexp()->ToString(), "ca[t-z]$");
+//   EXPECT_EQ(RE2("ca[t-z]$").Regexp()->ToString(), "ca[t-z](?-m:$)");
+// }
+
+// // Issue 10131674
+// TEST(RE2, Bug10131674) {
+//   // Some of these escapes describe values that do not fit in a byte.
+//   RE2 re("\\140\\440\\174\\271\\150\\656\\106\\201\\004\\332", RE2::Latin1);
+//   EXPECT_FALSE(re.ok());
+//   EXPECT_FALSE(RE2::FullMatch("hello world", re));
+// }
+
+// TEST(RE2, Bug18391750) {
+//   // Stray write past end of match_ in nfa.cc, caught by fuzzing + address sanitizer.
+//   const char t[] = {
+//       (char)0x28, (char)0x28, (char)0xfc, (char)0xfc, (char)0x08, (char)0x08,
+//       (char)0x26, (char)0x26, (char)0x28, (char)0xc2, (char)0x9b, (char)0xc5,
+//       (char)0xc5, (char)0xd4, (char)0x8f, (char)0x8f, (char)0x69, (char)0x69,
+//       (char)0xe7, (char)0x29, (char)0x7b, (char)0x37, (char)0x31, (char)0x31,
+//       (char)0x7d, (char)0xae, (char)0x7c, (char)0x7c, (char)0xf3, (char)0x29,
+//       (char)0xae, (char)0xae, (char)0x2e, (char)0x2a, (char)0x29, (char)0x00,
+//   };
+//   RE2::Options opt;
+//   opt.set_encoding(RE2::Options::EncodingLatin1);
+//   opt.set_longest_match(true);
+//   opt.set_dot_nl(true);
+//   opt.set_case_sensitive(false);
+//   RE2 re(t, opt);
+//   ASSERT_TRUE(re.ok());
+//   RE2::PartialMatch(t, re);
+// }
+
+TEST(RE2, Bug18458852) {
+  // Bug in parser accepting invalid (too large) rune,
+  // causing compiler to fail in DCHECK in UTF-8
+  // character class code.
+  const char b[] = {
+      (char)0x28, (char)0x05, (char)0x05, (char)0x41, (char)0x41, (char)0x28,
+      (char)0x24, (char)0x5b, (char)0x5e, (char)0xf5, (char)0x87, (char)0x87,
+      (char)0x90, (char)0x29, (char)0x5d, (char)0x29, (char)0x29, (char)0x00,
+  };
+  RE2 re(b);
+  ASSERT_FALSE(re.ok());
+}
+
+TEST(RE2, Bug18523943) {
+  // Bug in BitState: case kFailInst failed the match entirely.
+
+  RE2::Options opt;
+  const char a[] = {
+      (char)0x29, (char)0x29, (char)0x24, (char)0x00,
+  };
+  const char b[] = {
+      (char)0x28, (char)0x0a, (char)0x2a, (char)0x2a, (char)0x29, (char)0x00,
+  };
+  opt.set_log_errors(false);
+  opt.set_encoding(RE2::Options::EncodingLatin1);
+  opt.set_posix_syntax(true);
+  opt.set_longest_match(true);
+  opt.set_literal(false);
+  opt.set_never_nl(true);
+
+  RE2 re((const char*)b, opt);
+  ASSERT_TRUE(re.ok());
+  std::string s1;
+  ASSERT_TRUE(RE2::PartialMatch((const char*)a, re, &s1));
+}
+
+TEST(RE2, Bug21371806) {
+  // Bug in parser accepting Unicode groups in Latin-1 mode,
+  // causing compiler to fail in DCHECK in prog.cc.
+
+  RE2::Options opt;
+  opt.set_encoding(RE2::Options::EncodingLatin1);
+
+  RE2 re("g\\p{Zl}]", opt);
+  ASSERT_TRUE(re.ok());
+}
+
+// TEST(RE2, Bug26356109) {
+//   // Bug in parser caused by factoring of common prefixes in alternations.
+
+//   // In the past, this was factored to "a\\C*?[bc]". Thus, the automaton would
+//   // consume "ab" and then stop (when unanchored) whereas it should consume all
+//   // of "abc" as per first-match semantics.
+//   RE2 re("a\\C*?c|a\\C*?b");
+//   ASSERT_TRUE(re.ok());
+
+//   std::string s = "abc";
+//   StringPiece m;
+
+//   ASSERT_TRUE(re.Match(s, 0, s.size(), RE2::UNANCHORED, &m, 1));
+//   ASSERT_EQ(m, s) << " (UNANCHORED) got m='" << m << "', want '" << s << "'";
+
+//   ASSERT_TRUE(re.Match(s, 0, s.size(), RE2::ANCHOR_BOTH, &m, 1));
+//   ASSERT_EQ(m, s) << " (ANCHOR_BOTH) got m='" << m << "', want '" << s << "'";
+// }
+
+// TEST(RE2, Issue104) {
+//   // RE2::GlobalReplace always advanced by one byte when the empty string was
+//   // matched, which would clobber any rune that is longer than one byte.
+
+//   std::string s = "bc";
+//   ASSERT_EQ(3, RE2::GlobalReplace(&s, "a*", "d"));
+//   ASSERT_EQ("dbdcd", s);
+
+//   s = "ąć";
+//   ASSERT_EQ(3, RE2::GlobalReplace(&s, "Ć*", "Ĉ"));
+//   ASSERT_EQ("ĈąĈćĈ", s);
+
+
+//   s = "人类";
+//   ASSERT_EQ(3, RE2::GlobalReplace(&s, "大*", "小"));
+//   ASSERT_EQ("小人小类小", s);
+// }
+
+// TEST(RE2, Issue310) {
+//   // (?:|a)* matched more text than (?:|a)+ did.
+
+//   std::string s = "aaa";
+//   StringPiece m;
+
+//   RE2 star("(?:|a)*");
+//   ASSERT_TRUE(star.Match(s, 0, s.size(), RE2::UNANCHORED, &m, 1));
+//   ASSERT_EQ(m, "") << " got m='" << m << "', want ''";
+
+//   RE2 plus("(?:|a)+");
+//   ASSERT_TRUE(plus.Match(s, 0, s.size(), RE2::UNANCHORED, &m, 1));
+//   ASSERT_EQ(m, "") << " got m='" << m << "', want ''";
+// }
+
+}  // namespace re2
diff --git a/re2/testing/regexp_benchmark.cc b/re2/testing/regexp_benchmark.cc
new file mode 100644
index 0000000000000000000000000000000000000000..3eeb09889ed893724d671df08bf70e3c827925b9
--- /dev/null
+++ b/re2/testing/regexp_benchmark.cc
@@ -0,0 +1,1570 @@
+// Copyright 2006-2008 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Benchmarks for regular expression implementations.
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string>
+#include <thread>
+#include <unordered_map>
+#include <utility>
+
+#include "util/benchmark.h"
+#include "util/test.h"
+#include "util/flags.h"
+#include "util/logging.h"
+#include "util/malloc_counter.h"
+#include "util/strutil.h"
+#include "re2/prog.h"
+#include "re2/re2.h"
+#include "re2/regexp.h"
+#include "util/mutex.h"
+#include "util/pcre.h"
+
+namespace re2 {
+void Test();
+void MemoryUsage();
+}  // namespace re2
+
+typedef testing::MallocCounter MallocCounter;
+
+namespace re2 {
+
+void Test() {
+  Regexp* re = Regexp::Parse("(\\d+)-(\\d+)-(\\d+)", Regexp::LikePerl, NULL);
+  CHECK(re);
+  Prog* prog = re->CompileToProg(0);
+  CHECK(prog);
+  CHECK(prog->IsOnePass());
+  CHECK(prog->CanBitState());
+  const char* text = "650-253-0001";
+  StringPiece sp[4];
+  CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
+  CHECK_EQ(sp[0], "650-253-0001");
+  CHECK_EQ(sp[1], "650");
+  CHECK_EQ(sp[2], "253");
+  CHECK_EQ(sp[3], "0001");
+  delete prog;
+  re->Decref();
+  LOG(INFO) << "test passed\n";
+}
+
+void MemoryUsage() {
+  const char* regexp = "(\\d+)-(\\d+)-(\\d+)";
+  const char* text = "650-253-0001";
+  {
+    MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
+    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+    CHECK(re);
+    // Can't pass mc.HeapGrowth() and mc.PeakHeapGrowth() to LOG(INFO) directly,
+    // because LOG(INFO) might do a big allocation before they get evaluated.
+    fprintf(stderr, "Regexp: %7lld bytes (peak=%lld)\n",
+            mc.HeapGrowth(), mc.PeakHeapGrowth());
+    mc.Reset();
+
+    Prog* prog = re->CompileToProg(0);
+    CHECK(prog);
+    CHECK(prog->IsOnePass());
+    CHECK(prog->CanBitState());
+    fprintf(stderr, "Prog:   %7lld bytes (peak=%lld)\n",
+            mc.HeapGrowth(), mc.PeakHeapGrowth());
+    mc.Reset();
+
+    StringPiece sp[4];
+    CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
+    fprintf(stderr, "Search: %7lld bytes (peak=%lld)\n",
+            mc.HeapGrowth(), mc.PeakHeapGrowth());
+    delete prog;
+    re->Decref();
+  }
+
+  {
+    MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
+
+    PCRE re(regexp, PCRE::UTF8);
+    fprintf(stderr, "RE:     %7lld bytes (peak=%lld)\n",
+            mc.HeapGrowth(), mc.PeakHeapGrowth());
+    PCRE::FullMatch(text, re);
+    fprintf(stderr, "RE:     %7lld bytes (peak=%lld)\n",
+            mc.HeapGrowth(), mc.PeakHeapGrowth());
+  }
+
+  {
+    MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
+
+    PCRE* re = new PCRE(regexp, PCRE::UTF8);
+    fprintf(stderr, "PCRE*:  %7lld bytes (peak=%lld)\n",
+            mc.HeapGrowth(), mc.PeakHeapGrowth());
+    PCRE::FullMatch(text, *re);
+    fprintf(stderr, "PCRE*:  %7lld bytes (peak=%lld)\n",
+            mc.HeapGrowth(), mc.PeakHeapGrowth());
+    delete re;
+  }
+
+  {
+    MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
+
+    RE2 re(regexp);
+    fprintf(stderr, "RE2:    %7lld bytes (peak=%lld)\n",
+            mc.HeapGrowth(), mc.PeakHeapGrowth());
+    RE2::FullMatch(text, re);
+    fprintf(stderr, "RE2:    %7lld bytes (peak=%lld)\n",
+            mc.HeapGrowth(), mc.PeakHeapGrowth());
+  }
+
+  fprintf(stderr, "sizeof: PCRE=%zd RE2=%zd Prog=%zd Inst=%zd\n",
+          sizeof(PCRE), sizeof(RE2), sizeof(Prog), sizeof(Prog::Inst));
+}
+
+int NumCPUs() {
+  return static_cast<int>(std::thread::hardware_concurrency());
+}
+
+// Regular expression implementation wrappers.
+// Defined at bottom of file, but they are repetitive
+// and not interesting.
+
+typedef void SearchImpl(benchmark::State& state, const char* regexp,
+                        const StringPiece& text, Prog::Anchor anchor,
+                        bool expect_match);
+
+SearchImpl SearchDFA, SearchNFA, SearchOnePass, SearchBitState, SearchPCRE,
+    SearchRE2, SearchCachedDFA, SearchCachedNFA, SearchCachedOnePass,
+    SearchCachedBitState, SearchCachedPCRE, SearchCachedRE2;
+
+typedef void ParseImpl(benchmark::State& state, const char* regexp,
+                       const StringPiece& text);
+
+ParseImpl Parse1NFA, Parse1OnePass, Parse1BitState, Parse1PCRE, Parse1RE2,
+    Parse1Backtrack, Parse1CachedNFA, Parse1CachedOnePass, Parse1CachedBitState,
+    Parse1CachedPCRE, Parse1CachedRE2, Parse1CachedBacktrack;
+
+ParseImpl Parse3NFA, Parse3OnePass, Parse3BitState, Parse3PCRE, Parse3RE2,
+    Parse3Backtrack, Parse3CachedNFA, Parse3CachedOnePass, Parse3CachedBitState,
+    Parse3CachedPCRE, Parse3CachedRE2, Parse3CachedBacktrack;
+
+ParseImpl SearchParse2CachedPCRE, SearchParse2CachedRE2;
+
+ParseImpl SearchParse1CachedPCRE, SearchParse1CachedRE2;
+
+// Benchmark: failed search for regexp in random text.
+
+// Generate random text that won't contain the search string,
+// to test worst-case search behavior.
+std::string RandomText(int64_t nbytes) {
+  static const std::string* const text = []() {
+    std::string* text = new std::string;
+    srand(1);
+    text->resize(16<<20);
+    for (int64_t i = 0; i < 16<<20; i++) {
+      // Generate a one-byte rune that isn't a control character (e.g. '\n').
+      // Clipping to 0x20 introduces some bias, but we don't need uniformity.
+      int byte = rand() & 0x7F;
+      if (byte < 0x20)
+        byte = 0x20;
+      (*text)[i] = byte;
+    }
+    return text;
+  }();
+  CHECK_LE(nbytes, 16<<20);
+  return text->substr(0, nbytes);
+}
+
+// Makes text of size nbytes, then calls run to search
+// the text for regexp iters times.
+void Search(benchmark::State& state, const char* regexp, SearchImpl* search) {
+  std::string s = RandomText(state.range(0));
+  search(state, regexp, s, Prog::kUnanchored, false);
+  state.SetBytesProcessed(state.iterations() * state.range(0));
+}
+
+// These three are easy because they have prefixes,
+// giving the search loop something to prefix accel.
+#define EASY0      "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
+#define EASY1      "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"
+#define EASY2      "(?i)" EASY0
+
+// This is a little harder, since it starts with a character class
+// and thus can't be memchr'ed.  Could look for ABC and work backward,
+// but no one does that.
+#define MEDIUM     "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
+
+// This is a fair amount harder, because of the leading [ -~]*.
+// A bad backtracking implementation will take O(text^2) time to
+// figure out there's no match.
+#define HARD       "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
+
+// This has quite a high degree of fanout.
+// NFA execution will be particularly slow.
+#define FANOUT     "(?:[\\x{80}-\\x{10FFFF}]?){100}[\\x{80}-\\x{10FFFF}]"
+
+// This stresses engines that are trying to track parentheses.
+#define PARENS     "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" \
+                   "(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$"
+
+void Search_Easy0_CachedDFA(benchmark::State& state)     { Search(state, EASY0, SearchCachedDFA); }
+void Search_Easy0_CachedNFA(benchmark::State& state)     { Search(state, EASY0, SearchCachedNFA); }
+void Search_Easy0_CachedPCRE(benchmark::State& state)    { Search(state, EASY0, SearchCachedPCRE); }
+void Search_Easy0_CachedRE2(benchmark::State& state)     { Search(state, EASY0, SearchCachedRE2); }
+
+BENCHMARK_RANGE(Search_Easy0_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Easy0_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK_RANGE(Search_Easy0_CachedPCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK_RANGE(Search_Easy0_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
+
+void Search_Easy1_CachedDFA(benchmark::State& state)     { Search(state, EASY1, SearchCachedDFA); }
+void Search_Easy1_CachedNFA(benchmark::State& state)     { Search(state, EASY1, SearchCachedNFA); }
+void Search_Easy1_CachedPCRE(benchmark::State& state)    { Search(state, EASY1, SearchCachedPCRE); }
+void Search_Easy1_CachedRE2(benchmark::State& state)     { Search(state, EASY1, SearchCachedRE2); }
+
+BENCHMARK_RANGE(Search_Easy1_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Easy1_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK_RANGE(Search_Easy1_CachedPCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK_RANGE(Search_Easy1_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
+
+void Search_Easy2_CachedDFA(benchmark::State& state)     { Search(state, EASY2, SearchCachedDFA); }
+void Search_Easy2_CachedNFA(benchmark::State& state)     { Search(state, EASY2, SearchCachedNFA); }
+void Search_Easy2_CachedPCRE(benchmark::State& state)    { Search(state, EASY2, SearchCachedPCRE); }
+void Search_Easy2_CachedRE2(benchmark::State& state)     { Search(state, EASY2, SearchCachedRE2); }
+
+BENCHMARK_RANGE(Search_Easy2_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Easy2_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK_RANGE(Search_Easy2_CachedPCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK_RANGE(Search_Easy2_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
+
+void Search_Medium_CachedDFA(benchmark::State& state)     { Search(state, MEDIUM, SearchCachedDFA); }
+void Search_Medium_CachedNFA(benchmark::State& state)     { Search(state, MEDIUM, SearchCachedNFA); }
+void Search_Medium_CachedPCRE(benchmark::State& state)    { Search(state, MEDIUM, SearchCachedPCRE); }
+void Search_Medium_CachedRE2(benchmark::State& state)     { Search(state, MEDIUM, SearchCachedRE2); }
+
+BENCHMARK_RANGE(Search_Medium_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Medium_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK_RANGE(Search_Medium_CachedPCRE,    8, 256<<10)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK_RANGE(Search_Medium_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
+
+void Search_Hard_CachedDFA(benchmark::State& state)     { Search(state, HARD, SearchCachedDFA); }
+void Search_Hard_CachedNFA(benchmark::State& state)     { Search(state, HARD, SearchCachedNFA); }
+void Search_Hard_CachedPCRE(benchmark::State& state)    { Search(state, HARD, SearchCachedPCRE); }
+void Search_Hard_CachedRE2(benchmark::State& state)     { Search(state, HARD, SearchCachedRE2); }
+
+BENCHMARK_RANGE(Search_Hard_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Hard_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK_RANGE(Search_Hard_CachedPCRE,    8, 4<<10)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK_RANGE(Search_Hard_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
+
+void Search_Fanout_CachedDFA(benchmark::State& state)     { Search(state, FANOUT, SearchCachedDFA); }
+void Search_Fanout_CachedNFA(benchmark::State& state)     { Search(state, FANOUT, SearchCachedNFA); }
+void Search_Fanout_CachedPCRE(benchmark::State& state)    { Search(state, FANOUT, SearchCachedPCRE); }
+void Search_Fanout_CachedRE2(benchmark::State& state)     { Search(state, FANOUT, SearchCachedRE2); }
+
+BENCHMARK_RANGE(Search_Fanout_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Fanout_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK_RANGE(Search_Fanout_CachedPCRE,    8, 4<<10)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK_RANGE(Search_Fanout_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
+
+void Search_Parens_CachedDFA(benchmark::State& state)     { Search(state, PARENS, SearchCachedDFA); }
+void Search_Parens_CachedNFA(benchmark::State& state)     { Search(state, PARENS, SearchCachedNFA); }
+void Search_Parens_CachedPCRE(benchmark::State& state)    { Search(state, PARENS, SearchCachedPCRE); }
+void Search_Parens_CachedRE2(benchmark::State& state)     { Search(state, PARENS, SearchCachedRE2); }
+
+BENCHMARK_RANGE(Search_Parens_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Parens_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK_RANGE(Search_Parens_CachedPCRE,    8, 8)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK_RANGE(Search_Parens_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
+
+void SearchBigFixed(benchmark::State& state, SearchImpl* search) {
+  std::string s;
+  s.append(state.range(0)/2, 'x');
+  std::string regexp = "^" + s + ".*$";
+  std::string t = RandomText(state.range(0)/2);
+  s += t;
+  search(state, regexp.c_str(), s, Prog::kUnanchored, true);
+  state.SetBytesProcessed(state.iterations() * state.range(0));
+}
+
+void Search_BigFixed_CachedDFA(benchmark::State& state)     { SearchBigFixed(state, SearchCachedDFA); }
+void Search_BigFixed_CachedNFA(benchmark::State& state)     { SearchBigFixed(state, SearchCachedNFA); }
+void Search_BigFixed_CachedPCRE(benchmark::State& state)    { SearchBigFixed(state, SearchCachedPCRE); }
+void Search_BigFixed_CachedRE2(benchmark::State& state)     { SearchBigFixed(state, SearchCachedRE2); }
+
+BENCHMARK_RANGE(Search_BigFixed_CachedDFA,     8, 1<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_BigFixed_CachedNFA,     8, 32<<10)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK_RANGE(Search_BigFixed_CachedPCRE,    8, 32<<10)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK_RANGE(Search_BigFixed_CachedRE2,     8, 1<<20)->ThreadRange(1, NumCPUs());
+
+// Benchmark: FindAndConsume
+
+void FindAndConsume(benchmark::State& state) {
+  std::string s = RandomText(state.range(0));
+  s.append("Hello World");
+  RE2 re("((Hello World))");
+  for (auto _ : state) {
+    StringPiece t = s;
+    StringPiece u;
+    CHECK(RE2::FindAndConsume(&t, re, &u));
+    CHECK_EQ(u, "Hello World");
+  }
+  state.SetBytesProcessed(state.iterations() * state.range(0));
+}
+
+BENCHMARK_RANGE(FindAndConsume, 8, 16<<20)->ThreadRange(1, NumCPUs());
+
+// Benchmark: successful anchored search.
+
+void SearchSuccess(benchmark::State& state, const char* regexp,
+                   SearchImpl* search) {
+  std::string s = RandomText(state.range(0));
+  search(state, regexp, s, Prog::kAnchored, true);
+  state.SetBytesProcessed(state.iterations() * state.range(0));
+}
+
+// Unambiguous search (RE2 can use OnePass).
+
+void Search_Success_DFA(benchmark::State& state)     { SearchSuccess(state, ".*$", SearchDFA); }
+void Search_Success_NFA(benchmark::State& state)     { SearchSuccess(state, ".*$", SearchNFA); }
+void Search_Success_PCRE(benchmark::State& state)    { SearchSuccess(state, ".*$", SearchPCRE); }
+void Search_Success_RE2(benchmark::State& state)     { SearchSuccess(state, ".*$", SearchRE2); }
+void Search_Success_OnePass(benchmark::State& state) { SearchSuccess(state, ".*$", SearchOnePass); }
+
+BENCHMARK_RANGE(Search_Success_DFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Success_NFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK_RANGE(Search_Success_PCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK_RANGE(Search_Success_RE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Success_OnePass, 8, 2<<20)->ThreadRange(1, NumCPUs());
+
+void Search_Success_CachedDFA(benchmark::State& state)     { SearchSuccess(state, ".*$", SearchCachedDFA); }
+void Search_Success_CachedNFA(benchmark::State& state)     { SearchSuccess(state, ".*$", SearchCachedNFA); }
+void Search_Success_CachedPCRE(benchmark::State& state)    { SearchSuccess(state, ".*$", SearchCachedPCRE); }
+void Search_Success_CachedRE2(benchmark::State& state)     { SearchSuccess(state, ".*$", SearchCachedRE2); }
+void Search_Success_CachedOnePass(benchmark::State& state) { SearchSuccess(state, ".*$", SearchCachedOnePass); }
+
+BENCHMARK_RANGE(Search_Success_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Success_CachedNFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK_RANGE(Search_Success_CachedPCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK_RANGE(Search_Success_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Success_CachedOnePass, 8, 2<<20)->ThreadRange(1, NumCPUs());
+
+// Ambiguous search (RE2 cannot use OnePass).
+// Used to be ".*.$", but that is coalesced to ".+$" these days.
+
+void Search_Success1_DFA(benchmark::State& state)      { SearchSuccess(state, ".*\\C$", SearchDFA); }
+void Search_Success1_NFA(benchmark::State& state)      { SearchSuccess(state, ".*\\C$", SearchNFA); }
+void Search_Success1_PCRE(benchmark::State& state)     { SearchSuccess(state, ".*\\C$", SearchPCRE); }
+void Search_Success1_RE2(benchmark::State& state)      { SearchSuccess(state, ".*\\C$", SearchRE2); }
+void Search_Success1_BitState(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchBitState); }
+
+BENCHMARK_RANGE(Search_Success1_DFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Success1_NFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK_RANGE(Search_Success1_PCRE,     8, 16<<20)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK_RANGE(Search_Success1_RE2,      8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Success1_BitState, 8, 2<<20)->ThreadRange(1, NumCPUs());
+
+void Search_Success1_CachedDFA(benchmark::State& state)      { SearchSuccess(state, ".*\\C$", SearchCachedDFA); }
+void Search_Success1_CachedNFA(benchmark::State& state)      { SearchSuccess(state, ".*\\C$", SearchCachedNFA); }
+void Search_Success1_CachedPCRE(benchmark::State& state)     { SearchSuccess(state, ".*\\C$", SearchCachedPCRE); }
+void Search_Success1_CachedRE2(benchmark::State& state)      { SearchSuccess(state, ".*\\C$", SearchCachedRE2); }
+void Search_Success1_CachedBitState(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchCachedBitState); }
+
+BENCHMARK_RANGE(Search_Success1_CachedDFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Success1_CachedNFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK_RANGE(Search_Success1_CachedPCRE,     8, 16<<20)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK_RANGE(Search_Success1_CachedRE2,      8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_Success1_CachedBitState, 8, 2<<20)->ThreadRange(1, NumCPUs());
+
+// Benchmark: AltMatch optimisation (just to verify that it works)
+// Note that OnePass doesn't implement it!
+
+void SearchAltMatch(benchmark::State& state, SearchImpl* search) {
+  std::string s = RandomText(state.range(0));
+  search(state, "\\C*", s, Prog::kAnchored, true);
+  state.SetBytesProcessed(state.iterations() * state.range(0));
+}
+
+void Search_AltMatch_DFA(benchmark::State& state)      { SearchAltMatch(state, SearchDFA); }
+void Search_AltMatch_NFA(benchmark::State& state)      { SearchAltMatch(state, SearchNFA); }
+void Search_AltMatch_OnePass(benchmark::State& state)  { SearchAltMatch(state, SearchOnePass); }
+void Search_AltMatch_BitState(benchmark::State& state) { SearchAltMatch(state, SearchBitState); }
+void Search_AltMatch_PCRE(benchmark::State& state)     { SearchAltMatch(state, SearchPCRE); }
+void Search_AltMatch_RE2(benchmark::State& state)      { SearchAltMatch(state, SearchRE2); }
+
+BENCHMARK_RANGE(Search_AltMatch_DFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_AltMatch_NFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_AltMatch_OnePass,  8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_AltMatch_BitState, 8, 16<<20)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK_RANGE(Search_AltMatch_PCRE,     8, 16<<20)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK_RANGE(Search_AltMatch_RE2,      8, 16<<20)->ThreadRange(1, NumCPUs());
+
+void Search_AltMatch_CachedDFA(benchmark::State& state)      { SearchAltMatch(state, SearchCachedDFA); }
+void Search_AltMatch_CachedNFA(benchmark::State& state)      { SearchAltMatch(state, SearchCachedNFA); }
+void Search_AltMatch_CachedOnePass(benchmark::State& state)  { SearchAltMatch(state, SearchCachedOnePass); }
+void Search_AltMatch_CachedBitState(benchmark::State& state) { SearchAltMatch(state, SearchCachedBitState); }
+void Search_AltMatch_CachedPCRE(benchmark::State& state)     { SearchAltMatch(state, SearchCachedPCRE); }
+void Search_AltMatch_CachedRE2(benchmark::State& state)      { SearchAltMatch(state, SearchCachedRE2); }
+
+BENCHMARK_RANGE(Search_AltMatch_CachedDFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_AltMatch_CachedNFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_AltMatch_CachedOnePass,  8, 16<<20)->ThreadRange(1, NumCPUs());
+BENCHMARK_RANGE(Search_AltMatch_CachedBitState, 8, 16<<20)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK_RANGE(Search_AltMatch_CachedPCRE,     8, 16<<20)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK_RANGE(Search_AltMatch_CachedRE2,      8, 16<<20)->ThreadRange(1, NumCPUs());
+
+// Benchmark: use regexp to find phone number.
+
+void SearchDigits(benchmark::State& state, SearchImpl* search) {
+  StringPiece s("650-253-0001");
+  search(state, "([0-9]+)-([0-9]+)-([0-9]+)", s, Prog::kAnchored, true);
+  state.SetItemsProcessed(state.iterations());
+}
+
+void Search_Digits_DFA(benchmark::State& state)         { SearchDigits(state, SearchDFA); }
+void Search_Digits_NFA(benchmark::State& state)         { SearchDigits(state, SearchNFA); }
+void Search_Digits_OnePass(benchmark::State& state)     { SearchDigits(state, SearchOnePass); }
+void Search_Digits_PCRE(benchmark::State& state)        { SearchDigits(state, SearchPCRE); }
+void Search_Digits_RE2(benchmark::State& state)         { SearchDigits(state, SearchRE2); }
+void Search_Digits_BitState(benchmark::State& state)    { SearchDigits(state, SearchBitState); }
+
+BENCHMARK(Search_Digits_DFA)->ThreadRange(1, NumCPUs());
+BENCHMARK(Search_Digits_NFA)->ThreadRange(1, NumCPUs());
+BENCHMARK(Search_Digits_OnePass)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK(Search_Digits_PCRE)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK(Search_Digits_RE2)->ThreadRange(1, NumCPUs());
+BENCHMARK(Search_Digits_BitState)->ThreadRange(1, NumCPUs());
+
+// Benchmark: use regexp to parse digit fields in phone number.
+
+void Parse3Digits(benchmark::State& state,
+                  void (*parse3)(benchmark::State&, const char*,
+                                 const StringPiece&)) {
+  parse3(state, "([0-9]+)-([0-9]+)-([0-9]+)", "650-253-0001");
+  state.SetItemsProcessed(state.iterations());
+}
+
+void Parse_Digits_NFA(benchmark::State& state)         { Parse3Digits(state, Parse3NFA); }
+void Parse_Digits_OnePass(benchmark::State& state)     { Parse3Digits(state, Parse3OnePass); }
+void Parse_Digits_PCRE(benchmark::State& state)        { Parse3Digits(state, Parse3PCRE); }
+void Parse_Digits_RE2(benchmark::State& state)         { Parse3Digits(state, Parse3RE2); }
+void Parse_Digits_Backtrack(benchmark::State& state)   { Parse3Digits(state, Parse3Backtrack); }
+void Parse_Digits_BitState(benchmark::State& state)    { Parse3Digits(state, Parse3BitState); }
+
+BENCHMARK(Parse_Digits_NFA)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_Digits_OnePass)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK(Parse_Digits_PCRE)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK(Parse_Digits_RE2)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_Digits_Backtrack)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_Digits_BitState)->ThreadRange(1, NumCPUs());
+
+void Parse_CachedDigits_NFA(benchmark::State& state)         { Parse3Digits(state, Parse3CachedNFA); }
+void Parse_CachedDigits_OnePass(benchmark::State& state)     { Parse3Digits(state, Parse3CachedOnePass); }
+void Parse_CachedDigits_PCRE(benchmark::State& state)        { Parse3Digits(state, Parse3CachedPCRE); }
+void Parse_CachedDigits_RE2(benchmark::State& state)         { Parse3Digits(state, Parse3CachedRE2); }
+void Parse_CachedDigits_Backtrack(benchmark::State& state)   { Parse3Digits(state, Parse3CachedBacktrack); }
+void Parse_CachedDigits_BitState(benchmark::State& state)    { Parse3Digits(state, Parse3CachedBitState); }
+
+BENCHMARK(Parse_CachedDigits_NFA)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_CachedDigits_OnePass)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK(Parse_CachedDigits_PCRE)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK(Parse_CachedDigits_Backtrack)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_CachedDigits_RE2)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_CachedDigits_BitState)->ThreadRange(1, NumCPUs());
+
+void Parse3DigitDs(benchmark::State& state,
+                   void (*parse3)(benchmark::State&, const char*,
+                                  const StringPiece&)) {
+  parse3(state, "(\\d+)-(\\d+)-(\\d+)", "650-253-0001");
+  state.SetItemsProcessed(state.iterations());
+}
+
+void Parse_DigitDs_NFA(benchmark::State& state)         { Parse3DigitDs(state, Parse3NFA); }
+void Parse_DigitDs_OnePass(benchmark::State& state)     { Parse3DigitDs(state, Parse3OnePass); }
+void Parse_DigitDs_PCRE(benchmark::State& state)        { Parse3DigitDs(state, Parse3PCRE); }
+void Parse_DigitDs_RE2(benchmark::State& state)         { Parse3DigitDs(state, Parse3RE2); }
+void Parse_DigitDs_Backtrack(benchmark::State& state)   { Parse3DigitDs(state, Parse3CachedBacktrack); }
+void Parse_DigitDs_BitState(benchmark::State& state)    { Parse3DigitDs(state, Parse3CachedBitState); }
+
+BENCHMARK(Parse_DigitDs_NFA)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_DigitDs_OnePass)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK(Parse_DigitDs_PCRE)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK(Parse_DigitDs_RE2)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_DigitDs_Backtrack)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_DigitDs_BitState)->ThreadRange(1, NumCPUs());
+
+void Parse_CachedDigitDs_NFA(benchmark::State& state)         { Parse3DigitDs(state, Parse3CachedNFA); }
+void Parse_CachedDigitDs_OnePass(benchmark::State& state)     { Parse3DigitDs(state, Parse3CachedOnePass); }
+void Parse_CachedDigitDs_PCRE(benchmark::State& state)        { Parse3DigitDs(state, Parse3CachedPCRE); }
+void Parse_CachedDigitDs_RE2(benchmark::State& state)         { Parse3DigitDs(state, Parse3CachedRE2); }
+void Parse_CachedDigitDs_Backtrack(benchmark::State& state)   { Parse3DigitDs(state, Parse3CachedBacktrack); }
+void Parse_CachedDigitDs_BitState(benchmark::State& state)    { Parse3DigitDs(state, Parse3CachedBitState); }
+
+BENCHMARK(Parse_CachedDigitDs_NFA)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_CachedDigitDs_OnePass)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK(Parse_CachedDigitDs_PCRE)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK(Parse_CachedDigitDs_Backtrack)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_CachedDigitDs_RE2)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_CachedDigitDs_BitState)->ThreadRange(1, NumCPUs());
+
+// Benchmark: splitting off leading number field.
+
+void Parse1Split(benchmark::State& state,
+                 void (*parse1)(benchmark::State&, const char*,
+                                const StringPiece&)) {
+  parse1(state, "[0-9]+-(.*)", "650-253-0001");
+  state.SetItemsProcessed(state.iterations());
+}
+
+void Parse_Split_NFA(benchmark::State& state)         { Parse1Split(state, Parse1NFA); }
+void Parse_Split_OnePass(benchmark::State& state)     { Parse1Split(state, Parse1OnePass); }
+void Parse_Split_PCRE(benchmark::State& state)        { Parse1Split(state, Parse1PCRE); }
+void Parse_Split_RE2(benchmark::State& state)         { Parse1Split(state, Parse1RE2); }
+void Parse_Split_BitState(benchmark::State& state)    { Parse1Split(state, Parse1BitState); }
+
+BENCHMARK(Parse_Split_NFA)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_Split_OnePass)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK(Parse_Split_PCRE)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK(Parse_Split_RE2)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_Split_BitState)->ThreadRange(1, NumCPUs());
+
+void Parse_CachedSplit_NFA(benchmark::State& state)         { Parse1Split(state, Parse1CachedNFA); }
+void Parse_CachedSplit_OnePass(benchmark::State& state)     { Parse1Split(state, Parse1CachedOnePass); }
+void Parse_CachedSplit_PCRE(benchmark::State& state)        { Parse1Split(state, Parse1CachedPCRE); }
+void Parse_CachedSplit_RE2(benchmark::State& state)         { Parse1Split(state, Parse1CachedRE2); }
+void Parse_CachedSplit_BitState(benchmark::State& state)    { Parse1Split(state, Parse1CachedBitState); }
+
+BENCHMARK(Parse_CachedSplit_NFA)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_CachedSplit_OnePass)->ThreadRange(1, NumCPUs());
+#ifdef USEPCRE
+BENCHMARK(Parse_CachedSplit_PCRE)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK(Parse_CachedSplit_RE2)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_CachedSplit_BitState)->ThreadRange(1, NumCPUs());
+
+// Benchmark: splitting off leading number field but harder (ambiguous regexp).
+
+void Parse1SplitHard(benchmark::State& state,
+                     void (*run)(benchmark::State&, const char*,
+                                 const StringPiece&)) {
+  run(state, "[0-9]+.(.*)", "650-253-0001");
+  state.SetItemsProcessed(state.iterations());
+}
+
+void Parse_SplitHard_NFA(benchmark::State& state)         { Parse1SplitHard(state, Parse1NFA); }
+void Parse_SplitHard_PCRE(benchmark::State& state)        { Parse1SplitHard(state, Parse1PCRE); }
+void Parse_SplitHard_RE2(benchmark::State& state)         { Parse1SplitHard(state, Parse1RE2); }
+void Parse_SplitHard_BitState(benchmark::State& state)    { Parse1SplitHard(state, Parse1BitState); }
+
+#ifdef USEPCRE
+BENCHMARK(Parse_SplitHard_PCRE)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK(Parse_SplitHard_RE2)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_SplitHard_BitState)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_SplitHard_NFA)->ThreadRange(1, NumCPUs());
+
+void Parse_CachedSplitHard_NFA(benchmark::State& state)       { Parse1SplitHard(state, Parse1CachedNFA); }
+void Parse_CachedSplitHard_PCRE(benchmark::State& state)      { Parse1SplitHard(state, Parse1CachedPCRE); }
+void Parse_CachedSplitHard_RE2(benchmark::State& state)       { Parse1SplitHard(state, Parse1CachedRE2); }
+void Parse_CachedSplitHard_BitState(benchmark::State& state)  { Parse1SplitHard(state, Parse1CachedBitState); }
+void Parse_CachedSplitHard_Backtrack(benchmark::State& state) { Parse1SplitHard(state, Parse1CachedBacktrack); }
+
+#ifdef USEPCRE
+BENCHMARK(Parse_CachedSplitHard_PCRE)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK(Parse_CachedSplitHard_RE2)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_CachedSplitHard_BitState)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_CachedSplitHard_NFA)->ThreadRange(1, NumCPUs());
+BENCHMARK(Parse_CachedSplitHard_Backtrack)->ThreadRange(1, NumCPUs());
+
+// Benchmark: Parse1SplitHard, big text, small match.
+
+void Parse1SplitBig1(benchmark::State& state,
+                     void (*run)(benchmark::State&, const char*,
+                                 const StringPiece&)) {
+  std::string s;
+  s.append(100000, 'x');
+  s.append("650-253-0001");
+  run(state, "[0-9]+.(.*)", s);
+  state.SetItemsProcessed(state.iterations());
+}
+
+void Parse_CachedSplitBig1_PCRE(benchmark::State& state)      { Parse1SplitBig1(state, SearchParse1CachedPCRE); }
+void Parse_CachedSplitBig1_RE2(benchmark::State& state)       { Parse1SplitBig1(state, SearchParse1CachedRE2); }
+
+#ifdef USEPCRE
+BENCHMARK(Parse_CachedSplitBig1_PCRE)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK(Parse_CachedSplitBig1_RE2)->ThreadRange(1, NumCPUs());
+
+// Benchmark: Parse1SplitHard, big text, big match.
+
+void Parse1SplitBig2(benchmark::State& state,
+                     void (*run)(benchmark::State&, const char*,
+                                 const StringPiece&)) {
+  std::string s;
+  s.append("650-253-");
+  s.append(100000, '0');
+  run(state, "[0-9]+.(.*)", s);
+  state.SetItemsProcessed(state.iterations());
+}
+
+void Parse_CachedSplitBig2_PCRE(benchmark::State& state)      { Parse1SplitBig2(state, SearchParse1CachedPCRE); }
+void Parse_CachedSplitBig2_RE2(benchmark::State& state)       { Parse1SplitBig2(state, SearchParse1CachedRE2); }
+
+#ifdef USEPCRE
+BENCHMARK(Parse_CachedSplitBig2_PCRE)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK(Parse_CachedSplitBig2_RE2)->ThreadRange(1, NumCPUs());
+
+// Benchmark: measure time required to parse (but not execute)
+// a simple regular expression.
+
+void ParseRegexp(benchmark::State& state, const std::string& regexp) {
+  for (auto _ : state) {
+    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+    CHECK(re);
+    re->Decref();
+  }
+}
+
+void SimplifyRegexp(benchmark::State& state, const std::string& regexp) {
+  for (auto _ : state) {
+    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+    CHECK(re);
+    Regexp* sre = re->Simplify();
+    CHECK(sre);
+    sre->Decref();
+    re->Decref();
+  }
+}
+
+void NullWalkRegexp(benchmark::State& state, const std::string& regexp) {
+  Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+  CHECK(re);
+  for (auto _ : state) {
+    re->NullWalk();
+  }
+  re->Decref();
+}
+
+void SimplifyCompileRegexp(benchmark::State& state, const std::string& regexp) {
+  for (auto _ : state) {
+    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+    CHECK(re);
+    Regexp* sre = re->Simplify();
+    CHECK(sre);
+    Prog* prog = sre->CompileToProg(0);
+    CHECK(prog);
+    delete prog;
+    sre->Decref();
+    re->Decref();
+  }
+}
+
+void CompileRegexp(benchmark::State& state, const std::string& regexp) {
+  for (auto _ : state) {
+    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+    CHECK(re);
+    Prog* prog = re->CompileToProg(0);
+    CHECK(prog);
+    delete prog;
+    re->Decref();
+  }
+}
+
+void CompileToProg(benchmark::State& state, const std::string& regexp) {
+  Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+  CHECK(re);
+  for (auto _ : state) {
+    Prog* prog = re->CompileToProg(0);
+    CHECK(prog);
+    delete prog;
+  }
+  re->Decref();
+}
+
+void CompileByteMap(benchmark::State& state, const std::string& regexp) {
+  Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+  CHECK(re);
+  Prog* prog = re->CompileToProg(0);
+  CHECK(prog);
+  for (auto _ : state) {
+    prog->ComputeByteMap();
+  }
+  delete prog;
+  re->Decref();
+}
+
+void CompilePCRE(benchmark::State& state, const std::string& regexp) {
+  for (auto _ : state) {
+    PCRE re(regexp, PCRE::UTF8);
+    CHECK_EQ(re.error(), "");
+  }
+}
+
+void CompileRE2(benchmark::State& state, const std::string& regexp) {
+  for (auto _ : state) {
+    RE2 re(regexp);
+    CHECK_EQ(re.error(), "");
+  }
+}
+
+void RunBuild(benchmark::State& state, const std::string& regexp,
+              void (*run)(benchmark::State&, const std::string&)) {
+  run(state, regexp);
+  state.SetItemsProcessed(state.iterations());
+}
+
+}  // namespace re2
+
+DEFINE_FLAG(std::string, compile_regexp, "(.*)-(\\d+)-of-(\\d+)",
+            "regexp for compile benchmarks");
+
+namespace re2 {
+
+void BM_PCRE_Compile(benchmark::State& state)             { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompilePCRE); }
+void BM_Regexp_Parse(benchmark::State& state)             { RunBuild(state, GetFlag(FLAGS_compile_regexp), ParseRegexp); }
+void BM_Regexp_Simplify(benchmark::State& state)          { RunBuild(state, GetFlag(FLAGS_compile_regexp), SimplifyRegexp); }
+void BM_CompileToProg(benchmark::State& state)            { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompileToProg); }
+void BM_CompileByteMap(benchmark::State& state)           { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompileByteMap); }
+void BM_Regexp_Compile(benchmark::State& state)           { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompileRegexp); }
+void BM_Regexp_SimplifyCompile(benchmark::State& state)   { RunBuild(state, GetFlag(FLAGS_compile_regexp), SimplifyCompileRegexp); }
+void BM_Regexp_NullWalk(benchmark::State& state)          { RunBuild(state, GetFlag(FLAGS_compile_regexp), NullWalkRegexp); }
+void BM_RE2_Compile(benchmark::State& state)              { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompileRE2); }
+
+#ifdef USEPCRE
+BENCHMARK(BM_PCRE_Compile)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK(BM_Regexp_Parse)->ThreadRange(1, NumCPUs());
+BENCHMARK(BM_Regexp_Simplify)->ThreadRange(1, NumCPUs());
+BENCHMARK(BM_CompileToProg)->ThreadRange(1, NumCPUs());
+BENCHMARK(BM_CompileByteMap)->ThreadRange(1, NumCPUs());
+BENCHMARK(BM_Regexp_Compile)->ThreadRange(1, NumCPUs());
+BENCHMARK(BM_Regexp_SimplifyCompile)->ThreadRange(1, NumCPUs());
+BENCHMARK(BM_Regexp_NullWalk)->ThreadRange(1, NumCPUs());
+BENCHMARK(BM_RE2_Compile)->ThreadRange(1, NumCPUs());
+
+// Makes text of size nbytes, then calls run to search
+// the text for regexp iters times.
+void SearchPhone(benchmark::State& state, ParseImpl* search) {
+  std::string s = RandomText(state.range(0));
+  s.append("(650) 253-0001");
+  search(state, "(\\d{3}-|\\(\\d{3}\\)\\s+)(\\d{3}-\\d{4})", s);
+  state.SetBytesProcessed(state.iterations() * state.range(0));
+}
+
+void SearchPhone_CachedPCRE(benchmark::State& state) {
+  SearchPhone(state, SearchParse2CachedPCRE);
+}
+
+void SearchPhone_CachedRE2(benchmark::State& state) {
+  SearchPhone(state, SearchParse2CachedRE2);
+}
+
+#ifdef USEPCRE
+BENCHMARK_RANGE(SearchPhone_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK_RANGE(SearchPhone_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
+
+/*
+TODO(rsc): Make this work again.
+void CacheFill(int iters, int n, SearchImpl *srch) {
+  std::string s = DeBruijnString(n+1);
+  std::string t;
+  for (int i = n+1; i < 20; i++) {
+    t = s + s;
+    using std::swap;
+    swap(s, t);
+  }
+  srch(iters, StringPrintf("0[01]{%d}$", n).c_str(), s,
+       Prog::kUnanchored, true);
+  SetBenchmarkBytesProcessed(static_cast<int64_t>(iters)*s.size());
+}
+
+void CacheFillPCRE(int i, int n) { CacheFill(i, n, SearchCachedPCRE); }
+void CacheFillRE2(int i, int n)  { CacheFill(i, n, SearchCachedRE2); }
+void CacheFillNFA(int i, int n)  { CacheFill(i, n, SearchCachedNFA); }
+void CacheFillDFA(int i, int n)  { CacheFill(i, n, SearchCachedDFA); }
+
+// BENCHMARK_WITH_ARG uses __LINE__ to generate distinct identifiers
+// for the static BenchmarkRegisterer, which makes it unusable inside
+// a macro like DO24 below.  MY_BENCHMARK_WITH_ARG uses the argument a
+// to make the identifiers distinct (only possible when 'a' is a simple
+// expression like 2, not like 1+1).
+#define MY_BENCHMARK_WITH_ARG(n, a) \
+  bool __benchmark_ ## n ## a =     \
+    (new ::testing::Benchmark(#n, NewPermanentCallback(&n)))->ThreadRange(1, NumCPUs());
+
+#define DO24(A, B) \
+  A(B, 1);    A(B, 2);    A(B, 3);    A(B, 4);    A(B, 5);    A(B, 6);  \
+  A(B, 7);    A(B, 8);    A(B, 9);    A(B, 10);   A(B, 11);   A(B, 12); \
+  A(B, 13);   A(B, 14);   A(B, 15);   A(B, 16);   A(B, 17);   A(B, 18); \
+  A(B, 19);   A(B, 20);   A(B, 21);   A(B, 22);   A(B, 23);   A(B, 24);
+
+DO24(MY_BENCHMARK_WITH_ARG, CacheFillPCRE)
+DO24(MY_BENCHMARK_WITH_ARG, CacheFillNFA)
+DO24(MY_BENCHMARK_WITH_ARG, CacheFillRE2)
+DO24(MY_BENCHMARK_WITH_ARG, CacheFillDFA)
+
+#undef DO24
+#undef MY_BENCHMARK_WITH_ARG
+*/
+
+////////////////////////////////////////////////////////////////////////
+//
+// Implementation routines.  Sad that there are so many,
+// but all the interfaces are slightly different.
+
+// Runs implementation to search for regexp in text, iters times.
+// Expect_match says whether the regexp should be found.
+// Anchored says whether to run an anchored search.
+
+void SearchDFA(benchmark::State& state, const char* regexp,
+               const StringPiece& text, Prog::Anchor anchor,
+               bool expect_match) {
+  for (auto _ : state) {
+    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+    CHECK(re);
+    Prog* prog = re->CompileToProg(0);
+    CHECK(prog);
+    bool failed = false;
+    CHECK_EQ(prog->SearchDFA(text, StringPiece(), anchor, Prog::kFirstMatch,
+                             NULL, &failed, NULL),
+             expect_match);
+    CHECK(!failed);
+    delete prog;
+    re->Decref();
+  }
+}
+
+void SearchNFA(benchmark::State& state, const char* regexp,
+               const StringPiece& text, Prog::Anchor anchor,
+               bool expect_match) {
+  for (auto _ : state) {
+    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+    CHECK(re);
+    Prog* prog = re->CompileToProg(0);
+    CHECK(prog);
+    CHECK_EQ(prog->SearchNFA(text, StringPiece(), anchor, Prog::kFirstMatch,
+                             NULL, 0),
+             expect_match);
+    delete prog;
+    re->Decref();
+  }
+}
+
+void SearchOnePass(benchmark::State& state, const char* regexp,
+                   const StringPiece& text, Prog::Anchor anchor,
+                   bool expect_match) {
+  for (auto _ : state) {
+    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+    CHECK(re);
+    Prog* prog = re->CompileToProg(0);
+    CHECK(prog);
+    CHECK(prog->IsOnePass());
+    CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
+             expect_match);
+    delete prog;
+    re->Decref();
+  }
+}
+
+void SearchBitState(benchmark::State& state, const char* regexp,
+                    const StringPiece& text, Prog::Anchor anchor,
+                    bool expect_match) {
+  for (auto _ : state) {
+    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+    CHECK(re);
+    Prog* prog = re->CompileToProg(0);
+    CHECK(prog);
+    CHECK(prog->CanBitState());
+    CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
+             expect_match);
+    delete prog;
+    re->Decref();
+  }
+}
+
+void SearchPCRE(benchmark::State& state, const char* regexp,
+                const StringPiece& text, Prog::Anchor anchor,
+                bool expect_match) {
+  for (auto _ : state) {
+    PCRE re(regexp, PCRE::UTF8);
+    CHECK_EQ(re.error(), "");
+    if (anchor == Prog::kAnchored)
+      CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
+    else
+      CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
+  }
+}
+
+void SearchRE2(benchmark::State& state, const char* regexp,
+               const StringPiece& text, Prog::Anchor anchor,
+               bool expect_match) {
+  for (auto _ : state) {
+    RE2 re(regexp);
+    CHECK_EQ(re.error(), "");
+    if (anchor == Prog::kAnchored)
+      CHECK_EQ(RE2::FullMatch(text, re), expect_match);
+    else
+      CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
+  }
+}
+
+// SearchCachedXXX is like SearchXXX but only does the
+// regexp parsing and compiling once.  This lets us measure
+// search time without the per-regexp overhead.
+
+Prog* GetCachedProg(const char* regexp) {
+  static auto& mutex = *new Mutex;
+  MutexLock lock(&mutex);
+  static auto& cache = *new std::unordered_map<std::string, Prog*>;
+  Prog* prog = cache[regexp];
+  if (prog == NULL) {
+    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+    CHECK(re);
+    prog = re->CompileToProg(int64_t{1}<<31);  // mostly for the DFA
+    CHECK(prog);
+    cache[regexp] = prog;
+    re->Decref();
+    // We must call this here - while we have exclusive access.
+    prog->IsOnePass();
+  }
+  return prog;
+}
+
+PCRE* GetCachedPCRE(const char* regexp) {
+  static auto& mutex = *new Mutex;
+  MutexLock lock(&mutex);
+  static auto& cache = *new std::unordered_map<std::string, PCRE*>;
+  PCRE* re = cache[regexp];
+  if (re == NULL) {
+    re = new PCRE(regexp, PCRE::UTF8);
+    CHECK_EQ(re->error(), "");
+    cache[regexp] = re;
+  }
+  return re;
+}
+
+RE2* GetCachedRE2(const char* regexp) {
+  static auto& mutex = *new Mutex;
+  MutexLock lock(&mutex);
+  static auto& cache = *new std::unordered_map<std::string, RE2*>;
+  RE2* re = cache[regexp];
+  if (re == NULL) {
+    re = new RE2(regexp);
+    CHECK_EQ(re->error(), "");
+    cache[regexp] = re;
+  }
+  return re;
+}
+
+void SearchCachedDFA(benchmark::State& state, const char* regexp,
+                     const StringPiece& text, Prog::Anchor anchor,
+                     bool expect_match) {
+  Prog* prog = GetCachedProg(regexp);
+  for (auto _ : state) {
+    bool failed = false;
+    CHECK_EQ(prog->SearchDFA(text, StringPiece(), anchor, Prog::kFirstMatch,
+                             NULL, &failed, NULL),
+             expect_match);
+    CHECK(!failed);
+  }
+}
+
+void SearchCachedNFA(benchmark::State& state, const char* regexp,
+                     const StringPiece& text, Prog::Anchor anchor,
+                     bool expect_match) {
+  Prog* prog = GetCachedProg(regexp);
+  for (auto _ : state) {
+    CHECK_EQ(prog->SearchNFA(text, StringPiece(), anchor, Prog::kFirstMatch,
+                             NULL, 0),
+             expect_match);
+  }
+}
+
+void SearchCachedOnePass(benchmark::State& state, const char* regexp,
+                         const StringPiece& text, Prog::Anchor anchor,
+                         bool expect_match) {
+  Prog* prog = GetCachedProg(regexp);
+  CHECK(prog->IsOnePass());
+  for (auto _ : state) {
+    CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
+             expect_match);
+  }
+}
+
+void SearchCachedBitState(benchmark::State& state, const char* regexp,
+                          const StringPiece& text, Prog::Anchor anchor,
+                          bool expect_match) {
+  Prog* prog = GetCachedProg(regexp);
+  CHECK(prog->CanBitState());
+  for (auto _ : state) {
+    CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
+             expect_match);
+  }
+}
+
+void SearchCachedPCRE(benchmark::State& state, const char* regexp,
+                      const StringPiece& text, Prog::Anchor anchor,
+                      bool expect_match) {
+  PCRE& re = *GetCachedPCRE(regexp);
+  for (auto _ : state) {
+    if (anchor == Prog::kAnchored)
+      CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
+    else
+      CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
+  }
+}
+
+void SearchCachedRE2(benchmark::State& state, const char* regexp,
+                     const StringPiece& text, Prog::Anchor anchor,
+                     bool expect_match) {
+  RE2& re = *GetCachedRE2(regexp);
+  for (auto _ : state) {
+    if (anchor == Prog::kAnchored)
+      CHECK_EQ(RE2::FullMatch(text, re), expect_match);
+    else
+      CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
+  }
+}
+
+// Runs implementation to full match regexp against text,
+// extracting three submatches.  Expects match always.
+
+void Parse3NFA(benchmark::State& state, const char* regexp,
+               const StringPiece& text) {
+  for (auto _ : state) {
+    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+    CHECK(re);
+    Prog* prog = re->CompileToProg(0);
+    CHECK(prog);
+    StringPiece sp[4];  // 4 because sp[0] is whole match.
+    CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
+                          Prog::kFullMatch, sp, 4));
+    delete prog;
+    re->Decref();
+  }
+}
+
+void Parse3OnePass(benchmark::State& state, const char* regexp,
+                   const StringPiece& text) {
+  for (auto _ : state) {
+    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+    CHECK(re);
+    Prog* prog = re->CompileToProg(0);
+    CHECK(prog);
+    CHECK(prog->IsOnePass());
+    StringPiece sp[4];  // 4 because sp[0] is whole match.
+    CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
+    delete prog;
+    re->Decref();
+  }
+}
+
+void Parse3BitState(benchmark::State& state, const char* regexp,
+                    const StringPiece& text) {
+  for (auto _ : state) {
+    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+    CHECK(re);
+    Prog* prog = re->CompileToProg(0);
+    CHECK(prog);
+    CHECK(prog->CanBitState());
+    StringPiece sp[4];  // 4 because sp[0] is whole match.
+    CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
+    delete prog;
+    re->Decref();
+  }
+}
+
+void Parse3Backtrack(benchmark::State& state, const char* regexp,
+                     const StringPiece& text) {
+  for (auto _ : state) {
+    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+    CHECK(re);
+    Prog* prog = re->CompileToProg(0);
+    CHECK(prog);
+    StringPiece sp[4];  // 4 because sp[0] is whole match.
+    CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
+    delete prog;
+    re->Decref();
+  }
+}
+
+void Parse3PCRE(benchmark::State& state, const char* regexp,
+                const StringPiece& text) {
+  for (auto _ : state) {
+    PCRE re(regexp, PCRE::UTF8);
+    CHECK_EQ(re.error(), "");
+    StringPiece sp1, sp2, sp3;
+    CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
+  }
+}
+
+void Parse3RE2(benchmark::State& state, const char* regexp,
+               const StringPiece& text) {
+  for (auto _ : state) {
+    RE2 re(regexp);
+    CHECK_EQ(re.error(), "");
+    StringPiece sp1, sp2, sp3;
+    CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
+  }
+}
+
+void Parse3CachedNFA(benchmark::State& state, const char* regexp,
+                     const StringPiece& text) {
+  Prog* prog = GetCachedProg(regexp);
+  StringPiece sp[4];  // 4 because sp[0] is whole match.
+  for (auto _ : state) {
+    CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
+                          Prog::kFullMatch, sp, 4));
+  }
+}
+
+void Parse3CachedOnePass(benchmark::State& state, const char* regexp,
+                         const StringPiece& text) {
+  Prog* prog = GetCachedProg(regexp);
+  CHECK(prog->IsOnePass());
+  StringPiece sp[4];  // 4 because sp[0] is whole match.
+  for (auto _ : state) {
+    CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
+  }
+}
+
+void Parse3CachedBitState(benchmark::State& state, const char* regexp,
+                          const StringPiece& text) {
+  Prog* prog = GetCachedProg(regexp);
+  CHECK(prog->CanBitState());
+  StringPiece sp[4];  // 4 because sp[0] is whole match.
+  for (auto _ : state) {
+    CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
+  }
+}
+
+void Parse3CachedBacktrack(benchmark::State& state, const char* regexp,
+                           const StringPiece& text) {
+  Prog* prog = GetCachedProg(regexp);
+  StringPiece sp[4];  // 4 because sp[0] is whole match.
+  for (auto _ : state) {
+    CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
+  }
+}
+
+void Parse3CachedPCRE(benchmark::State& state, const char* regexp,
+                      const StringPiece& text) {
+  PCRE& re = *GetCachedPCRE(regexp);
+  StringPiece sp1, sp2, sp3;
+  for (auto _ : state) {
+    CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
+  }
+}
+
+void Parse3CachedRE2(benchmark::State& state, const char* regexp,
+                     const StringPiece& text) {
+  RE2& re = *GetCachedRE2(regexp);
+  StringPiece sp1, sp2, sp3;
+  for (auto _ : state) {
+    CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
+  }
+}
+
+// Runs implementation to full match regexp against text,
+// extracting three submatches.  Expects match always.
+
+void Parse1NFA(benchmark::State& state, const char* regexp,
+               const StringPiece& text) {
+  for (auto _ : state) {
+    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+    CHECK(re);
+    Prog* prog = re->CompileToProg(0);
+    CHECK(prog);
+    StringPiece sp[2];  // 2 because sp[0] is whole match.
+    CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
+                          Prog::kFullMatch, sp, 2));
+    delete prog;
+    re->Decref();
+  }
+}
+
+void Parse1OnePass(benchmark::State& state, const char* regexp,
+                   const StringPiece& text) {
+  for (auto _ : state) {
+    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+    CHECK(re);
+    Prog* prog = re->CompileToProg(0);
+    CHECK(prog);
+    CHECK(prog->IsOnePass());
+    StringPiece sp[2];  // 2 because sp[0] is whole match.
+    CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
+    delete prog;
+    re->Decref();
+  }
+}
+
+void Parse1BitState(benchmark::State& state, const char* regexp,
+                    const StringPiece& text) {
+  for (auto _ : state) {
+    Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
+    CHECK(re);
+    Prog* prog = re->CompileToProg(0);
+    CHECK(prog);
+    CHECK(prog->CanBitState());
+    StringPiece sp[2];  // 2 because sp[0] is whole match.
+    CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
+    delete prog;
+    re->Decref();
+  }
+}
+
+void Parse1PCRE(benchmark::State& state, const char* regexp,
+                const StringPiece& text) {
+  for (auto _ : state) {
+    PCRE re(regexp, PCRE::UTF8);
+    CHECK_EQ(re.error(), "");
+    StringPiece sp1;
+    CHECK(PCRE::FullMatch(text, re, &sp1));
+  }
+}
+
+void Parse1RE2(benchmark::State& state, const char* regexp,
+               const StringPiece& text) {
+  for (auto _ : state) {
+    RE2 re(regexp);
+    CHECK_EQ(re.error(), "");
+    StringPiece sp1;
+    CHECK(RE2::FullMatch(text, re, &sp1));
+  }
+}
+
+void Parse1CachedNFA(benchmark::State& state, const char* regexp,
+                     const StringPiece& text) {
+  Prog* prog = GetCachedProg(regexp);
+  StringPiece sp[2];  // 2 because sp[0] is whole match.
+  for (auto _ : state) {
+    CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
+                          Prog::kFullMatch, sp, 2));
+  }
+}
+
+void Parse1CachedOnePass(benchmark::State& state, const char* regexp,
+                         const StringPiece& text) {
+  Prog* prog = GetCachedProg(regexp);
+  CHECK(prog->IsOnePass());
+  StringPiece sp[2];  // 2 because sp[0] is whole match.
+  for (auto _ : state) {
+    CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
+  }
+}
+
+void Parse1CachedBitState(benchmark::State& state, const char* regexp,
+                          const StringPiece& text) {
+  Prog* prog = GetCachedProg(regexp);
+  CHECK(prog->CanBitState());
+  StringPiece sp[2];  // 2 because sp[0] is whole match.
+  for (auto _ : state) {
+    CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
+  }
+}
+
+void Parse1CachedBacktrack(benchmark::State& state, const char* regexp,
+                           const StringPiece& text) {
+  Prog* prog = GetCachedProg(regexp);
+  StringPiece sp[2];  // 2 because sp[0] is whole match.
+  for (auto _ : state) {
+    CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
+  }
+}
+
+void Parse1CachedPCRE(benchmark::State& state, const char* regexp,
+                      const StringPiece& text) {
+  PCRE& re = *GetCachedPCRE(regexp);
+  StringPiece sp1;
+  for (auto _ : state) {
+    CHECK(PCRE::FullMatch(text, re, &sp1));
+  }
+}
+
+void Parse1CachedRE2(benchmark::State& state, const char* regexp,
+                     const StringPiece& text) {
+  RE2& re = *GetCachedRE2(regexp);
+  StringPiece sp1;
+  for (auto _ : state) {
+    CHECK(RE2::FullMatch(text, re, &sp1));
+  }
+}
+
+void SearchParse2CachedPCRE(benchmark::State& state, const char* regexp,
+                            const StringPiece& text) {
+  PCRE& re = *GetCachedPCRE(regexp);
+  for (auto _ : state) {
+    StringPiece sp1, sp2;
+    CHECK(PCRE::PartialMatch(text, re, &sp1, &sp2));
+  }
+}
+
+void SearchParse2CachedRE2(benchmark::State& state, const char* regexp,
+                           const StringPiece& text) {
+  RE2& re = *GetCachedRE2(regexp);
+  for (auto _ : state) {
+    StringPiece sp1, sp2;
+    CHECK(RE2::PartialMatch(text, re, &sp1, &sp2));
+  }
+}
+
+void SearchParse1CachedPCRE(benchmark::State& state, const char* regexp,
+                            const StringPiece& text) {
+  PCRE& re = *GetCachedPCRE(regexp);
+  for (auto _ : state) {
+    StringPiece sp1;
+    CHECK(PCRE::PartialMatch(text, re, &sp1));
+  }
+}
+
+void SearchParse1CachedRE2(benchmark::State& state, const char* regexp,
+                           const StringPiece& text) {
+  RE2& re = *GetCachedRE2(regexp);
+  for (auto _ : state) {
+    StringPiece sp1;
+    CHECK(RE2::PartialMatch(text, re, &sp1));
+  }
+}
+
+void EmptyPartialMatchPCRE(benchmark::State& state) {
+  PCRE re("");
+  for (auto _ : state) {
+    PCRE::PartialMatch("", re);
+  }
+}
+
+void EmptyPartialMatchRE2(benchmark::State& state) {
+  RE2 re("");
+  for (auto _ : state) {
+    RE2::PartialMatch("", re);
+  }
+}
+#ifdef USEPCRE
+BENCHMARK(EmptyPartialMatchPCRE)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK(EmptyPartialMatchRE2)->ThreadRange(1, NumCPUs());
+
+void SimplePartialMatchPCRE(benchmark::State& state) {
+  PCRE re("abcdefg");
+  for (auto _ : state) {
+    PCRE::PartialMatch("abcdefg", re);
+  }
+}
+
+void SimplePartialMatchRE2(benchmark::State& state) {
+  RE2 re("abcdefg");
+  for (auto _ : state) {
+    RE2::PartialMatch("abcdefg", re);
+  }
+}
+#ifdef USEPCRE
+BENCHMARK(SimplePartialMatchPCRE)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK(SimplePartialMatchRE2)->ThreadRange(1, NumCPUs());
+
+static std::string http_text =
+  "GET /asdfhjasdhfasdlfhasdflkjasdfkljasdhflaskdjhf"
+  "alksdjfhasdlkfhasdlkjfhasdljkfhadsjklf HTTP/1.1";
+
+void HTTPPartialMatchPCRE(benchmark::State& state) {
+  StringPiece a;
+  PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
+  for (auto _ : state) {
+    PCRE::PartialMatch(http_text, re, &a);
+  }
+}
+
+void HTTPPartialMatchRE2(benchmark::State& state) {
+  StringPiece a;
+  RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
+  for (auto _ : state) {
+    RE2::PartialMatch(http_text, re, &a);
+  }
+}
+
+#ifdef USEPCRE
+BENCHMARK(HTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK(HTTPPartialMatchRE2)->ThreadRange(1, NumCPUs());
+
+static std::string smallhttp_text =
+  "GET /abc HTTP/1.1";
+
+void SmallHTTPPartialMatchPCRE(benchmark::State& state) {
+  StringPiece a;
+  PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
+  for (auto _ : state) {
+    PCRE::PartialMatch(smallhttp_text, re, &a);
+  }
+}
+
+void SmallHTTPPartialMatchRE2(benchmark::State& state) {
+  StringPiece a;
+  RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
+  for (auto _ : state) {
+    RE2::PartialMatch(smallhttp_text, re, &a);
+  }
+}
+
+#ifdef USEPCRE
+BENCHMARK(SmallHTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK(SmallHTTPPartialMatchRE2)->ThreadRange(1, NumCPUs());
+
+void DotMatchPCRE(benchmark::State& state) {
+  StringPiece a;
+  PCRE re("(?-s)^(.+)");
+  for (auto _ : state) {
+    PCRE::PartialMatch(http_text, re, &a);
+  }
+}
+
+void DotMatchRE2(benchmark::State& state) {
+  StringPiece a;
+  RE2 re("(?-s)^(.+)");
+  for (auto _ : state) {
+    RE2::PartialMatch(http_text, re, &a);
+  }
+}
+
+#ifdef USEPCRE
+BENCHMARK(DotMatchPCRE)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK(DotMatchRE2)->ThreadRange(1, NumCPUs());
+
+void ASCIIMatchPCRE(benchmark::State& state) {
+  StringPiece a;
+  PCRE re("(?-s)^([ -~]+)");
+  for (auto _ : state) {
+    PCRE::PartialMatch(http_text, re, &a);
+  }
+}
+
+void ASCIIMatchRE2(benchmark::State& state) {
+  StringPiece a;
+  RE2 re("(?-s)^([ -~]+)");
+  for (auto _ : state) {
+    RE2::PartialMatch(http_text, re, &a);
+  }
+}
+
+#ifdef USEPCRE
+BENCHMARK(ASCIIMatchPCRE)->ThreadRange(1, NumCPUs());
+#endif
+BENCHMARK(ASCIIMatchRE2)->ThreadRange(1, NumCPUs());
+
+void FullMatchPCRE(benchmark::State& state, const char *regexp) {
+  std::string s = RandomText(state.range(0));
+  s += "ABCDEFGHIJ";
+  PCRE re(regexp);
+  for (auto _ : state) {
+    CHECK(PCRE::FullMatch(s, re));
+  }
+  state.SetBytesProcessed(state.iterations() * state.range(0));
+}
+
+void FullMatchRE2(benchmark::State& state, const char *regexp) {
+  std::string s = RandomText(state.range(0));
+  s += "ABCDEFGHIJ";
+  RE2 re(regexp, RE2::Latin1);
+  for (auto _ : state) {
+    CHECK(RE2::FullMatch(s, re));
+  }
+  state.SetBytesProcessed(state.iterations() * state.range(0));
+}
+
+void FullMatch_DotStar_CachedPCRE(benchmark::State& state) { FullMatchPCRE(state, "(?s).*"); }
+void FullMatch_DotStar_CachedRE2(benchmark::State& state)  { FullMatchRE2(state, "(?s).*"); }
+
+void FullMatch_DotStarDollar_CachedPCRE(benchmark::State& state) { FullMatchPCRE(state, "(?s).*$"); }
+void FullMatch_DotStarDollar_CachedRE2(benchmark::State& state)  { FullMatchRE2(state, "(?s).*$"); }
+
+void FullMatch_DotStarCapture_CachedPCRE(benchmark::State& state) { FullMatchPCRE(state, "(?s)((.*)()()($))"); }
+void FullMatch_DotStarCapture_CachedRE2(benchmark::State& state)  { FullMatchRE2(state, "(?s)((.*)()()($))"); }
+
+#ifdef USEPCRE
+BENCHMARK_RANGE(FullMatch_DotStar_CachedPCRE, 8, 2<<20);
+#endif
+BENCHMARK_RANGE(FullMatch_DotStar_CachedRE2,  8, 2<<20);
+
+#ifdef USEPCRE
+BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedPCRE, 8, 2<<20);
+#endif
+BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedRE2,  8, 2<<20);
+
+#ifdef USEPCRE
+BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedPCRE, 8, 2<<20);
+#endif
+BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedRE2,  8, 2<<20);
+
+void PossibleMatchRangeCommon(benchmark::State& state, const char* regexp) {
+  RE2 re(regexp);
+  std::string min;
+  std::string max;
+  const int kMaxLen = 16;
+  for (auto _ : state) {
+    CHECK(re.PossibleMatchRange(&min, &max, kMaxLen));
+  }
+}
+
+void PossibleMatchRange_Trivial(benchmark::State& state) {
+  PossibleMatchRangeCommon(state, ".*");
+}
+void PossibleMatchRange_Complex(benchmark::State& state) {
+  PossibleMatchRangeCommon(state, "^abc[def]?[gh]{1,2}.*");
+}
+void PossibleMatchRange_Prefix(benchmark::State& state) {
+  PossibleMatchRangeCommon(state, "^some_random_prefix.*");
+}
+void PossibleMatchRange_NoProg(benchmark::State& state) {
+  PossibleMatchRangeCommon(state, "^some_random_string$");
+}
+
+BENCHMARK(PossibleMatchRange_Trivial);
+BENCHMARK(PossibleMatchRange_Complex);
+BENCHMARK(PossibleMatchRange_Prefix);
+BENCHMARK(PossibleMatchRange_NoProg);
+
+}  // namespace re2
diff --git a/re2/testing/set_test.cc b/re2/testing/set_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..5a760c4b5e27d986ec582bf512f7968e6906194c
--- /dev/null
+++ b/re2/testing/set_test.cc
@@ -0,0 +1,230 @@
+// Copyright 2010 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stddef.h>
+#include <string>
+#include <vector>
+#include <utility>
+
+#include "util/test.h"
+#include "util/logging.h"
+#include "re2/re2.h"
+#include "re2/set.h"
+
+namespace re2 {
+
+TEST(Set, Unanchored) {
+  RE2::Set s(RE2::DefaultOptions, RE2::UNANCHORED);
+
+  ASSERT_EQ(s.Add("foo", NULL), 0);
+  ASSERT_EQ(s.Add("(", NULL), -1);
+  ASSERT_EQ(s.Add("bar", NULL), 1);
+  ASSERT_EQ(s.Compile(), true);
+
+  ASSERT_EQ(s.Match("foobar", NULL), true);
+  ASSERT_EQ(s.Match("fooba", NULL), true);
+  ASSERT_EQ(s.Match("oobar", NULL), true);
+
+  std::vector<int> v;
+  ASSERT_EQ(s.Match("foobar", &v), true);
+  ASSERT_EQ(v.size(), 2);
+  ASSERT_EQ(v[0], 0);
+  ASSERT_EQ(v[1], 1);
+
+  ASSERT_EQ(s.Match("fooba", &v), true);
+  ASSERT_EQ(v.size(), 1);
+  ASSERT_EQ(v[0], 0);
+
+  ASSERT_EQ(s.Match("oobar", &v), true);
+  ASSERT_EQ(v.size(), 1);
+  ASSERT_EQ(v[0], 1);
+}
+
+TEST(Set, UnanchoredFactored) {
+  RE2::Set s(RE2::DefaultOptions, RE2::UNANCHORED);
+
+  ASSERT_EQ(s.Add("foo", NULL), 0);
+  ASSERT_EQ(s.Add("(", NULL), -1);
+  ASSERT_EQ(s.Add("foobar", NULL), 1);
+  ASSERT_EQ(s.Compile(), true);
+
+  ASSERT_EQ(s.Match("foobar", NULL), true);
+  ASSERT_EQ(s.Match("obarfoobaroo", NULL), true);
+  ASSERT_EQ(s.Match("fooba", NULL), true);
+  ASSERT_EQ(s.Match("oobar", NULL), false);
+
+  std::vector<int> v;
+  ASSERT_EQ(s.Match("foobar", &v), true);
+  ASSERT_EQ(v.size(), 2);
+  ASSERT_EQ(v[0], 0);
+  ASSERT_EQ(v[1], 1);
+
+  ASSERT_EQ(s.Match("obarfoobaroo", &v), true);
+  ASSERT_EQ(v.size(), 2);
+  ASSERT_EQ(v[0], 0);
+  ASSERT_EQ(v[1], 1);
+
+  ASSERT_EQ(s.Match("fooba", &v), true);
+  ASSERT_EQ(v.size(), 1);
+  ASSERT_EQ(v[0], 0);
+
+  ASSERT_EQ(s.Match("oobar", &v), false);
+  ASSERT_EQ(v.size(), 0);
+}
+
+TEST(Set, UnanchoredDollar) {
+  RE2::Set s(RE2::DefaultOptions, RE2::UNANCHORED);
+
+  ASSERT_EQ(s.Add("foo$", NULL), 0);
+  ASSERT_EQ(s.Compile(), true);
+
+  ASSERT_EQ(s.Match("foo", NULL), true);
+  ASSERT_EQ(s.Match("foobar", NULL), false);
+
+  std::vector<int> v;
+  ASSERT_EQ(s.Match("foo", &v), true);
+  ASSERT_EQ(v.size(), 1);
+  ASSERT_EQ(v[0], 0);
+
+  ASSERT_EQ(s.Match("foobar", &v), false);
+  ASSERT_EQ(v.size(), 0);
+}
+
+TEST(Set, UnanchoredWordBoundary) {
+  RE2::Set s(RE2::DefaultOptions, RE2::UNANCHORED);
+
+  ASSERT_EQ(s.Add("foo\\b", NULL), 0);
+  ASSERT_EQ(s.Compile(), true);
+
+  ASSERT_EQ(s.Match("foo", NULL), true);
+  ASSERT_EQ(s.Match("foobar", NULL), false);
+  ASSERT_EQ(s.Match("foo bar", NULL), true);
+
+  std::vector<int> v;
+  ASSERT_EQ(s.Match("foo", &v), true);
+  ASSERT_EQ(v.size(), 1);
+  ASSERT_EQ(v[0], 0);
+
+  ASSERT_EQ(s.Match("foobar", &v), false);
+  ASSERT_EQ(v.size(), 0);
+
+  ASSERT_EQ(s.Match("foo bar", &v), true);
+  ASSERT_EQ(v.size(), 1);
+  ASSERT_EQ(v[0], 0);
+}
+
+TEST(Set, Anchored) {
+  RE2::Set s(RE2::DefaultOptions, RE2::ANCHOR_BOTH);
+
+  ASSERT_EQ(s.Add("foo", NULL), 0);
+  ASSERT_EQ(s.Add("(", NULL), -1);
+  ASSERT_EQ(s.Add("bar", NULL), 1);
+  ASSERT_EQ(s.Compile(), true);
+
+  ASSERT_EQ(s.Match("foobar", NULL), false);
+  ASSERT_EQ(s.Match("fooba", NULL), false);
+  ASSERT_EQ(s.Match("oobar", NULL), false);
+  ASSERT_EQ(s.Match("foo", NULL), true);
+  ASSERT_EQ(s.Match("bar", NULL), true);
+
+  std::vector<int> v;
+  ASSERT_EQ(s.Match("foobar", &v), false);
+  ASSERT_EQ(v.size(), 0);
+
+  ASSERT_EQ(s.Match("fooba", &v), false);
+  ASSERT_EQ(v.size(), 0);
+
+  ASSERT_EQ(s.Match("oobar", &v), false);
+  ASSERT_EQ(v.size(), 0);
+
+  ASSERT_EQ(s.Match("foo", &v), true);
+  ASSERT_EQ(v.size(), 1);
+  ASSERT_EQ(v[0], 0);
+
+  ASSERT_EQ(s.Match("bar", &v), true);
+  ASSERT_EQ(v.size(), 1);
+  ASSERT_EQ(v[0], 1);
+}
+
+TEST(Set, EmptyUnanchored) {
+  RE2::Set s(RE2::DefaultOptions, RE2::UNANCHORED);
+
+  ASSERT_EQ(s.Compile(), true);
+
+  ASSERT_EQ(s.Match("", NULL), false);
+  ASSERT_EQ(s.Match("foobar", NULL), false);
+
+  std::vector<int> v;
+  ASSERT_EQ(s.Match("", &v), false);
+  ASSERT_EQ(v.size(), 0);
+
+  ASSERT_EQ(s.Match("foobar", &v), false);
+  ASSERT_EQ(v.size(), 0);
+}
+
+TEST(Set, EmptyAnchored) {
+  RE2::Set s(RE2::DefaultOptions, RE2::ANCHOR_BOTH);
+
+  ASSERT_EQ(s.Compile(), true);
+
+  ASSERT_EQ(s.Match("", NULL), false);
+  ASSERT_EQ(s.Match("foobar", NULL), false);
+
+  std::vector<int> v;
+  ASSERT_EQ(s.Match("", &v), false);
+  ASSERT_EQ(v.size(), 0);
+
+  ASSERT_EQ(s.Match("foobar", &v), false);
+  ASSERT_EQ(v.size(), 0);
+}
+
+TEST(Set, Prefix) {
+  RE2::Set s(RE2::DefaultOptions, RE2::ANCHOR_BOTH);
+
+  ASSERT_EQ(s.Add("/prefix/\\d*", NULL), 0);
+  ASSERT_EQ(s.Compile(), true);
+
+  ASSERT_EQ(s.Match("/prefix", NULL), false);
+  ASSERT_EQ(s.Match("/prefix/", NULL), true);
+  ASSERT_EQ(s.Match("/prefix/42", NULL), true);
+
+  std::vector<int> v;
+  ASSERT_EQ(s.Match("/prefix", &v), false);
+  ASSERT_EQ(v.size(), 0);
+
+  ASSERT_EQ(s.Match("/prefix/", &v), true);
+  ASSERT_EQ(v.size(), 1);
+  ASSERT_EQ(v[0], 0);
+
+  ASSERT_EQ(s.Match("/prefix/42", &v), true);
+  ASSERT_EQ(v.size(), 1);
+  ASSERT_EQ(v[0], 0);
+}
+
+TEST(Set, MoveSemantics) {
+  RE2::Set s1(RE2::DefaultOptions, RE2::UNANCHORED);
+  ASSERT_EQ(s1.Add("foo\\d+", NULL), 0);
+  ASSERT_EQ(s1.Compile(), true);
+  ASSERT_EQ(s1.Match("abc foo1 xyz", NULL), true);
+  ASSERT_EQ(s1.Match("abc bar2 xyz", NULL), false);
+
+  // The moved-to object should do what the moved-from object did.
+  RE2::Set s2 = std::move(s1);
+  ASSERT_EQ(s2.Match("abc foo1 xyz", NULL), true);
+  ASSERT_EQ(s2.Match("abc bar2 xyz", NULL), false);
+
+  // The moved-from object should have been reset and be reusable.
+  ASSERT_EQ(s1.Add("bar\\d+", NULL), 0);
+  ASSERT_EQ(s1.Compile(), true);
+  ASSERT_EQ(s1.Match("abc foo1 xyz", NULL), false);
+  ASSERT_EQ(s1.Match("abc bar2 xyz", NULL), true);
+
+  // Verify that "overwriting" works and also doesn't leak memory.
+  // (The latter will need a leak detector such as LeakSanitizer.)
+  s1 = std::move(s2);
+  ASSERT_EQ(s1.Match("abc foo1 xyz", NULL), true);
+  ASSERT_EQ(s1.Match("abc bar2 xyz", NULL), false);
+}
+
+}  // namespace re2
diff --git a/runtests b/runtests
new file mode 100644
index 0000000000000000000000000000000000000000..94584a660df481773179052b250579aa3ef2c2b9
--- /dev/null
+++ b/runtests
@@ -0,0 +1,33 @@
+#!/usr/bin/env sh
+
+# System Integrity Protection on Darwin complicated these matters somewhat.
+# See https://github.com/google/re2/issues/175 for details.
+if [ "x$1" = "x-shared-library-path" ]; then
+	if [ "x$(uname)" = "xDarwin" ]; then
+		DYLD_LIBRARY_PATH="$2:$DYLD_LIBRARY_PATH"
+		export DYLD_LIBRARY_PATH
+	else
+		LD_LIBRARY_PATH="$2:$LD_LIBRARY_PATH"
+		export LD_LIBRARY_PATH
+	fi
+	shift 2
+fi
+
+success=true
+for i; do
+	printf "%-40s" $i
+	if $($i >$i.log 2>&1) 2>/dev/null; then
+		echo PASS
+	else
+		echo FAIL';' output in $i.log
+		success=false
+	fi
+done
+
+if $success; then
+	echo 'ALL TESTS PASSED.'
+	exit 0
+else
+	echo 'TESTS FAILED.'
+	exit 1
+fi
diff --git a/testinstall.cc b/testinstall.cc
new file mode 100644
index 0000000000000000000000000000000000000000..19cc9003bf8decc18a7be4dacfb0bb4f181a39b0
--- /dev/null
+++ b/testinstall.cc
@@ -0,0 +1,27 @@
+// Copyright 2008 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+#include <re2/filtered_re2.h>
+#include <re2/re2.h>
+
+int main() {
+  re2::FilteredRE2 f;
+  int id;
+  f.Add("a.*b.*c", RE2::DefaultOptions, &id);
+  std::vector<std::string> v;
+  f.Compile(&v);
+  std::vector<int> ids;
+  f.FirstMatch("abbccc", ids);
+
+  int n;
+  if (RE2::FullMatch("axbyc", "a.*b.*c") &&
+      RE2::PartialMatch("foo123bar", "(\\d+)", &n) && n == 123) {
+    printf("PASS\n");
+    return 0;
+  }
+
+  printf("FAIL\n");
+  return 2;
+}
diff --git a/util/benchmark.cc b/util/benchmark.cc
new file mode 100644
index 0000000000000000000000000000000000000000..e39c3349abd3818694e3ecb7cbcca40dfbb22734
--- /dev/null
+++ b/util/benchmark.cc
@@ -0,0 +1,131 @@
+// Copyright 2009 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <algorithm>
+#include <chrono>
+
+#include "util/benchmark.h"
+#include "util/flags.h"
+#include "re2/re2.h"
+
+#ifdef _WIN32
+#define snprintf _snprintf
+#endif
+
+using ::testing::Benchmark;
+
+static Benchmark* benchmarks[10000];
+static int nbenchmarks;
+
+void Benchmark::Register() {
+  lo_ = std::max(1, lo_);
+  hi_ = std::max(lo_, hi_);
+  benchmarks[nbenchmarks++] = this;
+}
+
+static int64_t nsec() {
+  return std::chrono::duration_cast<std::chrono::nanoseconds>(
+             std::chrono::steady_clock::now().time_since_epoch())
+      .count();
+}
+
+static int64_t t0;
+static int64_t ns;
+static int64_t bytes;
+static int64_t items;
+
+void StartBenchmarkTiming() {
+  if (t0 == 0) {
+    t0 = nsec();
+  }
+}
+
+void StopBenchmarkTiming() {
+  if (t0 != 0) {
+    ns += nsec() - t0;
+    t0 = 0;
+  }
+}
+
+void SetBenchmarkBytesProcessed(int64_t b) { bytes = b; }
+
+void SetBenchmarkItemsProcessed(int64_t i) { items = i; }
+
+static void RunFunc(Benchmark* b, int iters, int arg) {
+  t0 = nsec();
+  ns = 0;
+  bytes = 0;
+  items = 0;
+  b->func()(iters, arg);
+  StopBenchmarkTiming();
+}
+
+static int round(int n) {
+  int base = 1;
+  while (base * 10 < n) base *= 10;
+  if (n < 2 * base) return 2 * base;
+  if (n < 5 * base) return 5 * base;
+  return 10 * base;
+}
+
+static void RunBench(Benchmark* b, int arg) {
+  int iters, last;
+
+  // Run once just in case it's expensive.
+  iters = 1;
+  RunFunc(b, iters, arg);
+  while (ns < (int)1e9 && iters < (int)1e9) {
+    last = iters;
+    if (ns / iters == 0) {
+      iters = (int)1e9;
+    } else {
+      iters = (int)1e9 / static_cast<int>(ns / iters);
+    }
+    iters = std::max(last + 1, std::min(iters + iters / 2, 100 * last));
+    iters = round(iters);
+    RunFunc(b, iters, arg);
+  }
+
+  char mb[100];
+  char suf[100];
+  mb[0] = '\0';
+  suf[0] = '\0';
+  if (ns > 0 && bytes > 0)
+    snprintf(mb, sizeof mb, "\t%7.2f MB/s",
+             ((double)bytes / 1e6) / ((double)ns / 1e9));
+  if (b->has_arg()) {
+    if (arg >= (1 << 20)) {
+      snprintf(suf, sizeof suf, "/%dM", arg / (1 << 20));
+    } else if (arg >= (1 << 10)) {
+      snprintf(suf, sizeof suf, "/%dK", arg / (1 << 10));
+    } else {
+      snprintf(suf, sizeof suf, "/%d", arg);
+    }
+  }
+  printf("%s%s\t%8d\t%10lld ns/op%s\n", b->name(), suf, iters,
+         (long long)ns / iters, mb);
+  fflush(stdout);
+}
+
+static bool WantBench(const char* name, int argc, const char** argv) {
+  if (argc == 1) return true;
+  for (int i = 1; i < argc; i++) {
+    if (RE2::PartialMatch(name, argv[i]))
+      return true;
+  }
+  return false;
+}
+
+int main(int argc, const char** argv) {
+  for (int i = 0; i < nbenchmarks; i++) {
+    Benchmark* b = benchmarks[i];
+    if (!WantBench(b->name(), argc, argv))
+      continue;
+    for (int arg = b->lo(); arg <= b->hi(); arg <<= 1)
+      RunBench(b, arg);
+  }
+}
diff --git a/util/benchmark.h b/util/benchmark.h
new file mode 100644
index 0000000000000000000000000000000000000000..d97b49e17f4cb091455d06cd8717e081119caf27
--- /dev/null
+++ b/util/benchmark.h
@@ -0,0 +1,156 @@
+// Copyright 2009 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef UTIL_BENCHMARK_H_
+#define UTIL_BENCHMARK_H_
+
+#include <stdint.h>
+#include <functional>
+
+#include "util/logging.h"
+#include "util/util.h"
+
+// Globals for the old benchmark API.
+void StartBenchmarkTiming();
+void StopBenchmarkTiming();
+void SetBenchmarkBytesProcessed(int64_t b);
+void SetBenchmarkItemsProcessed(int64_t i);
+
+namespace benchmark {
+
+// The new benchmark API implemented as a layer over the old benchmark API.
+// (Please refer to https://github.com/google/benchmark for documentation.)
+class State {
+ private:
+  class Iterator {
+   public:
+    // Benchmark code looks like this:
+    //
+    //   for (auto _ : state) {
+    //     // ...
+    //   }
+    //
+    // We try to avoid compiler warnings about such variables being unused.
+    struct ATTRIBUTE_UNUSED Value {};
+
+    explicit Iterator(int64_t iters) : iters_(iters) {}
+
+    bool operator!=(const Iterator& that) const {
+      if (iters_ != that.iters_) {
+        return true;
+      } else {
+        // We are about to stop the loop, so stop timing.
+        StopBenchmarkTiming();
+        return false;
+      }
+    }
+
+    Value operator*() const {
+      return Value();
+    }
+
+    Iterator& operator++() {
+      --iters_;
+      return *this;
+    }
+
+   private:
+    int64_t iters_;
+  };
+
+ public:
+  explicit State(int64_t iters)
+      : iters_(iters), arg_(0), has_arg_(false) {}
+
+  State(int64_t iters, int64_t arg)
+      : iters_(iters), arg_(arg), has_arg_(true) {}
+
+  Iterator begin() {
+    // We are about to start the loop, so start timing.
+    StartBenchmarkTiming();
+    return Iterator(iters_);
+  }
+
+  Iterator end() {
+    return Iterator(0);
+  }
+
+  void SetBytesProcessed(int64_t b) { SetBenchmarkBytesProcessed(b); }
+  void SetItemsProcessed(int64_t i) { SetBenchmarkItemsProcessed(i); }
+  int64_t iterations() const { return iters_; }
+  // Pretend to support multiple arguments.
+  int64_t range(int pos) const { CHECK(has_arg_); return arg_; }
+
+ private:
+  int64_t iters_;
+  int64_t arg_;
+  bool has_arg_;
+
+  State(const State&) = delete;
+  State& operator=(const State&) = delete;
+};
+
+}  // namespace benchmark
+
+namespace testing {
+
+class Benchmark {
+ public:
+  Benchmark(const char* name, void (*func)(benchmark::State&))
+      : name_(name),
+        func_([func](int iters, int arg) {
+          benchmark::State state(iters);
+          func(state);
+        }),
+        lo_(0),
+        hi_(0),
+        has_arg_(false) {
+    Register();
+  }
+
+  Benchmark(const char* name, void (*func)(benchmark::State&), int lo, int hi)
+      : name_(name),
+        func_([func](int iters, int arg) {
+          benchmark::State state(iters, arg);
+          func(state);
+        }),
+        lo_(lo),
+        hi_(hi),
+        has_arg_(true) {
+    Register();
+  }
+
+  // Pretend to support multiple threads.
+  Benchmark* ThreadRange(int lo, int hi) { return this; }
+
+  const char* name() const { return name_; }
+  const std::function<void(int, int)>& func() const { return func_; }
+  int lo() const { return lo_; }
+  int hi() const { return hi_; }
+  bool has_arg() const { return has_arg_; }
+
+ private:
+  void Register();
+
+  const char* name_;
+  std::function<void(int, int)> func_;
+  int lo_;
+  int hi_;
+  bool has_arg_;
+
+  Benchmark(const Benchmark&) = delete;
+  Benchmark& operator=(const Benchmark&) = delete;
+};
+
+}  // namespace testing
+
+#define BENCHMARK(f)                     \
+  ::testing::Benchmark* _benchmark_##f = \
+      (new ::testing::Benchmark(#f, f))
+
+#define BENCHMARK_RANGE(f, lo, hi)       \
+  ::testing::Benchmark* _benchmark_##f = \
+      (new ::testing::Benchmark(#f, f, lo, hi))
+
+#endif  // UTIL_BENCHMARK_H_
diff --git a/util/flags.h b/util/flags.h
new file mode 100644
index 0000000000000000000000000000000000000000..3386b729d4319882ed096d2f44dceab8946f88fa
--- /dev/null
+++ b/util/flags.h
@@ -0,0 +1,26 @@
+// Copyright 2009 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef UTIL_FLAGS_H_
+#define UTIL_FLAGS_H_
+
+// Simplified version of Google's command line flags.
+// Does not support parsing the command line.
+// If you want to do that, see
+// https://gflags.github.io/gflags/
+
+#define DEFINE_FLAG(type, name, deflt, desc) \
+	namespace re2 { type FLAGS_##name = deflt; }
+
+#define DECLARE_FLAG(type, name) \
+	namespace re2 { extern type FLAGS_##name; }
+
+namespace re2 {
+template <typename T>
+T GetFlag(const T& flag) {
+  return flag;
+}
+}  // namespace re2
+
+#endif  // UTIL_FLAGS_H_
diff --git a/util/fuzz.cc b/util/fuzz.cc
new file mode 100644
index 0000000000000000000000000000000000000000..9cac1185ac65106e53ba7178d828bce82e8f163d
--- /dev/null
+++ b/util/fuzz.cc
@@ -0,0 +1,21 @@
+// Copyright 2016 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+// Entry point for libFuzzer.
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size);
+
+int main(int argc, char** argv) {
+  uint8_t data[32];
+  for (int i = 0; i < 32; i++) {
+    for (int j = 0; j < 32; j++) {
+      data[j] = random() & 0xFF;
+    }
+    LLVMFuzzerTestOneInput(data, 32);
+  }
+  return 0;
+}
diff --git a/util/logging.h b/util/logging.h
new file mode 100644
index 0000000000000000000000000000000000000000..5b2217f29ca4c79c3696aa66f6dbdef6be01f95d
--- /dev/null
+++ b/util/logging.h
@@ -0,0 +1,109 @@
+// Copyright 2009 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef UTIL_LOGGING_H_
+#define UTIL_LOGGING_H_
+
+// Simplified version of Google's logging.
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <ostream>
+#include <sstream>
+
+#include "util/util.h"
+
+// Debug-only checking.
+#define DCHECK(condition) assert(condition)
+#define DCHECK_EQ(val1, val2) assert((val1) == (val2))
+#define DCHECK_NE(val1, val2) assert((val1) != (val2))
+#define DCHECK_LE(val1, val2) assert((val1) <= (val2))
+#define DCHECK_LT(val1, val2) assert((val1) < (val2))
+#define DCHECK_GE(val1, val2) assert((val1) >= (val2))
+#define DCHECK_GT(val1, val2) assert((val1) > (val2))
+
+// Always-on checking
+#define CHECK(x)	if(x){}else LogMessageFatal(__FILE__, __LINE__).stream() << "Check failed: " #x
+#define CHECK_LT(x, y)	CHECK((x) < (y))
+#define CHECK_GT(x, y)	CHECK((x) > (y))
+#define CHECK_LE(x, y)	CHECK((x) <= (y))
+#define CHECK_GE(x, y)	CHECK((x) >= (y))
+#define CHECK_EQ(x, y)	CHECK((x) == (y))
+#define CHECK_NE(x, y)	CHECK((x) != (y))
+
+#define LOG_INFO LogMessage(__FILE__, __LINE__)
+#define LOG_WARNING LogMessage(__FILE__, __LINE__)
+#define LOG_ERROR LogMessage(__FILE__, __LINE__)
+#define LOG_FATAL LogMessageFatal(__FILE__, __LINE__)
+#define LOG_QFATAL LOG_FATAL
+
+// It seems that one of the Windows header files defines ERROR as 0.
+#ifdef _WIN32
+#define LOG_0 LOG_INFO
+#endif
+
+#ifdef NDEBUG
+#define LOG_DFATAL LOG_ERROR
+#else
+#define LOG_DFATAL LOG_FATAL
+#endif
+
+#define LOG(severity) LOG_ ## severity.stream()
+
+#define VLOG(x) if((x)>0){}else LOG_INFO.stream()
+
+class LogMessage {
+ public:
+  LogMessage(const char* file, int line)
+      : flushed_(false) {
+    stream() << file << ":" << line << ": ";
+  }
+  void Flush() {
+    stream() << "\n";
+    std::string s = str_.str();
+    size_t n = s.size();
+    if (fwrite(s.data(), 1, n, stderr) < n) {}  // shut up gcc
+    flushed_ = true;
+  }
+  ~LogMessage() {
+    if (!flushed_) {
+      Flush();
+    }
+  }
+  std::ostream& stream() { return str_; }
+
+ private:
+  bool flushed_;
+  std::ostringstream str_;
+
+  LogMessage(const LogMessage&) = delete;
+  LogMessage& operator=(const LogMessage&) = delete;
+};
+
+// Silence "destructor never returns" warning for ~LogMessageFatal().
+// Since this is a header file, push and then pop to limit the scope.
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable: 4722)
+#endif
+
+class LogMessageFatal : public LogMessage {
+ public:
+  LogMessageFatal(const char* file, int line)
+      : LogMessage(file, line) {}
+  ATTRIBUTE_NORETURN ~LogMessageFatal() {
+    Flush();
+    abort();
+  }
+ private:
+  LogMessageFatal(const LogMessageFatal&) = delete;
+  LogMessageFatal& operator=(const LogMessageFatal&) = delete;
+};
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+#endif  // UTIL_LOGGING_H_
diff --git a/util/malloc_counter.h b/util/malloc_counter.h
new file mode 100644
index 0000000000000000000000000000000000000000..81b564ff9868bbbd19c0fa1ef8ae8cd08a88abb0
--- /dev/null
+++ b/util/malloc_counter.h
@@ -0,0 +1,19 @@
+// Copyright 2009 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef UTIL_MALLOC_COUNTER_H_
+#define UTIL_MALLOC_COUNTER_H_
+
+namespace testing {
+class MallocCounter {
+ public:
+  MallocCounter(int x) {}
+  static const int THIS_THREAD_ONLY = 0;
+  long long HeapGrowth() { return 0; }
+  long long PeakHeapGrowth() { return 0; }
+  void Reset() {}
+};
+}  // namespace testing
+
+#endif  // UTIL_MALLOC_COUNTER_H_
diff --git a/util/mix.h b/util/mix.h
new file mode 100644
index 0000000000000000000000000000000000000000..d85c172ab0e3fb95a67ea2ab315f178596fca869
--- /dev/null
+++ b/util/mix.h
@@ -0,0 +1,41 @@
+// Copyright 2016 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef UTIL_MIX_H_
+#define UTIL_MIX_H_
+
+#include <stddef.h>
+#include <limits>
+
+namespace re2 {
+
+// Silence "truncation of constant value" warning for kMul in 32-bit mode.
+// Since this is a header file, push and then pop to limit the scope.
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable: 4309)
+#endif
+
+class HashMix {
+ public:
+  HashMix() : hash_(1) {}
+  explicit HashMix(size_t val) : hash_(val + 83) {}
+  void Mix(size_t val) {
+    static const size_t kMul = static_cast<size_t>(0xdc3eb94af8ab4c93ULL);
+    hash_ *= kMul;
+    hash_ = ((hash_ << 19) |
+             (hash_ >> (std::numeric_limits<size_t>::digits - 19))) + val;
+  }
+  size_t get() const { return hash_; }
+ private:
+  size_t hash_;
+};
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+}  // namespace re2
+
+#endif  // UTIL_MIX_H_
diff --git a/util/mutex.h b/util/mutex.h
new file mode 100644
index 0000000000000000000000000000000000000000..158046bb5c9f40b7bbcbebe2c5ebf8dae23979d2
--- /dev/null
+++ b/util/mutex.h
@@ -0,0 +1,148 @@
+// Copyright 2007 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef UTIL_MUTEX_H_
+#define UTIL_MUTEX_H_
+
+/*
+ * A simple mutex wrapper, supporting locks and read-write locks.
+ * You should assume the locks are *not* re-entrant.
+ */
+
+#ifdef _WIN32
+// Requires Windows Vista or Windows Server 2008 at minimum.
+#include <windows.h>
+#if defined(WINVER) && WINVER >= 0x0600
+#define MUTEX_IS_WIN32_SRWLOCK
+#endif
+#else
+#ifndef _POSIX_C_SOURCE
+#define _POSIX_C_SOURCE 200809L
+#endif
+#include <unistd.h>
+#if defined(_POSIX_READER_WRITER_LOCKS) && _POSIX_READER_WRITER_LOCKS > 0
+#define MUTEX_IS_PTHREAD_RWLOCK
+#endif
+#endif
+
+#if defined(MUTEX_IS_WIN32_SRWLOCK)
+typedef SRWLOCK MutexType;
+#elif defined(MUTEX_IS_PTHREAD_RWLOCK)
+#include <pthread.h>
+#include <stdlib.h>
+typedef pthread_rwlock_t MutexType;
+#else
+#include <mutex>
+typedef std::mutex MutexType;
+#endif
+
+namespace re2 {
+
+class Mutex {
+ public:
+  inline Mutex();
+  inline ~Mutex();
+  inline void Lock();    // Block if needed until free then acquire exclusively
+  inline void Unlock();  // Release a lock acquired via Lock()
+  // Note that on systems that don't support read-write locks, these may
+  // be implemented as synonyms to Lock() and Unlock().  So you can use
+  // these for efficiency, but don't use them anyplace where being able
+  // to do shared reads is necessary to avoid deadlock.
+  inline void ReaderLock();   // Block until free or shared then acquire a share
+  inline void ReaderUnlock(); // Release a read share of this Mutex
+  inline void WriterLock() { Lock(); }     // Acquire an exclusive lock
+  inline void WriterUnlock() { Unlock(); } // Release a lock from WriterLock()
+
+ private:
+  MutexType mutex_;
+
+  // Catch the error of writing Mutex when intending MutexLock.
+  Mutex(Mutex *ignored);
+
+  Mutex(const Mutex&) = delete;
+  Mutex& operator=(const Mutex&) = delete;
+};
+
+#if defined(MUTEX_IS_WIN32_SRWLOCK)
+
+Mutex::Mutex()             : mutex_(SRWLOCK_INIT) { }
+Mutex::~Mutex()            { }
+void Mutex::Lock()         { AcquireSRWLockExclusive(&mutex_); }
+void Mutex::Unlock()       { ReleaseSRWLockExclusive(&mutex_); }
+void Mutex::ReaderLock()   { AcquireSRWLockShared(&mutex_); }
+void Mutex::ReaderUnlock() { ReleaseSRWLockShared(&mutex_); }
+
+#elif defined(MUTEX_IS_PTHREAD_RWLOCK)
+
+#define SAFE_PTHREAD(fncall)    \
+  do {                          \
+    if ((fncall) != 0) abort(); \
+  } while (0)
+
+Mutex::Mutex()             { SAFE_PTHREAD(pthread_rwlock_init(&mutex_, NULL)); }
+Mutex::~Mutex()            { SAFE_PTHREAD(pthread_rwlock_destroy(&mutex_)); }
+void Mutex::Lock()         { SAFE_PTHREAD(pthread_rwlock_wrlock(&mutex_)); }
+void Mutex::Unlock()       { SAFE_PTHREAD(pthread_rwlock_unlock(&mutex_)); }
+void Mutex::ReaderLock()   { SAFE_PTHREAD(pthread_rwlock_rdlock(&mutex_)); }
+void Mutex::ReaderUnlock() { SAFE_PTHREAD(pthread_rwlock_unlock(&mutex_)); }
+
+#undef SAFE_PTHREAD
+
+#else
+
+Mutex::Mutex()             { }
+Mutex::~Mutex()            { }
+void Mutex::Lock()         { mutex_.lock(); }
+void Mutex::Unlock()       { mutex_.unlock(); }
+void Mutex::ReaderLock()   { Lock(); }  // C++11 doesn't have std::shared_mutex.
+void Mutex::ReaderUnlock() { Unlock(); }
+
+#endif
+
+// --------------------------------------------------------------------------
+// Some helper classes
+
+// MutexLock(mu) acquires mu when constructed and releases it when destroyed.
+class MutexLock {
+ public:
+  explicit MutexLock(Mutex *mu) : mu_(mu) { mu_->Lock(); }
+  ~MutexLock() { mu_->Unlock(); }
+ private:
+  Mutex * const mu_;
+
+  MutexLock(const MutexLock&) = delete;
+  MutexLock& operator=(const MutexLock&) = delete;
+};
+
+// ReaderMutexLock and WriterMutexLock do the same, for rwlocks
+class ReaderMutexLock {
+ public:
+  explicit ReaderMutexLock(Mutex *mu) : mu_(mu) { mu_->ReaderLock(); }
+  ~ReaderMutexLock() { mu_->ReaderUnlock(); }
+ private:
+  Mutex * const mu_;
+
+  ReaderMutexLock(const ReaderMutexLock&) = delete;
+  ReaderMutexLock& operator=(const ReaderMutexLock&) = delete;
+};
+
+class WriterMutexLock {
+ public:
+  explicit WriterMutexLock(Mutex *mu) : mu_(mu) { mu_->WriterLock(); }
+  ~WriterMutexLock() { mu_->WriterUnlock(); }
+ private:
+  Mutex * const mu_;
+
+  WriterMutexLock(const WriterMutexLock&) = delete;
+  WriterMutexLock& operator=(const WriterMutexLock&) = delete;
+};
+
+// Catch bug where variable name is omitted, e.g. MutexLock (&mu);
+#define MutexLock(x) static_assert(false, "MutexLock declaration missing variable name")
+#define ReaderMutexLock(x) static_assert(false, "ReaderMutexLock declaration missing variable name")
+#define WriterMutexLock(x) static_assert(false, "WriterMutexLock declaration missing variable name")
+
+}  // namespace re2
+
+#endif  // UTIL_MUTEX_H_
diff --git a/util/pcre.cc b/util/pcre.cc
new file mode 100644
index 0000000000000000000000000000000000000000..b68985144ff6439182e849c485636b9fe697732b
--- /dev/null
+++ b/util/pcre.cc
@@ -0,0 +1,1025 @@
+// Copyright 2003-2009 Google Inc.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This is a variant of PCRE's pcrecpp.cc, originally written at Google.
+// The main changes are the addition of the HitLimit method and
+// compilation as PCRE in namespace re2.
+
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits>
+#include <string>
+#include <utility>
+
+#include "util/util.h"
+#include "util/flags.h"
+#include "util/logging.h"
+#include "util/pcre.h"
+#include "util/strutil.h"
+
+// Silence warnings about the wacky formatting in the operator() functions.
+#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 6
+#pragma GCC diagnostic ignored "-Wmisleading-indentation"
+#endif
+
+#define PCREPORT(level) LOG(level)
+
+// Default PCRE limits.
+// Defaults chosen to allow a plausible amount of CPU and
+// not exceed main thread stacks.  Note that other threads
+// often have smaller stacks, and therefore tightening
+// regexp_stack_limit may frequently be necessary.
+DEFINE_FLAG(int, regexp_stack_limit, 256 << 10,
+            "default PCRE stack limit (bytes)");
+DEFINE_FLAG(int, regexp_match_limit, 1000000,
+            "default PCRE match limit (function calls)");
+
+#ifndef USEPCRE
+
+// Fake just enough of the PCRE API to allow this file to build. :)
+
+struct pcre_extra {
+  int flags;
+  int match_limit;
+  int match_limit_recursion;
+};
+
+#define PCRE_EXTRA_MATCH_LIMIT 0
+#define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0
+#define PCRE_ANCHORED 0
+#define PCRE_NOTEMPTY 0
+#define PCRE_ERROR_NOMATCH 1
+#define PCRE_ERROR_MATCHLIMIT 2
+#define PCRE_ERROR_RECURSIONLIMIT 3
+#define PCRE_INFO_CAPTURECOUNT 0
+
+void pcre_free(void*) {
+}
+
+pcre* pcre_compile(const char*, int, const char**, int*, const unsigned char*) {
+  return NULL;
+}
+
+int pcre_exec(const pcre*, const pcre_extra*, const char*, int, int, int, int*, int) {
+  return 0;
+}
+
+int pcre_fullinfo(const pcre*, const pcre_extra*, int, void*) {
+  return 0;
+}
+
+#endif
+
+namespace re2 {
+
+// Maximum number of args we can set
+static const int kMaxArgs = 16;
+static const int kVecSize = (1 + kMaxArgs) * 3;  // results + PCRE workspace
+
+// Approximate size of a recursive invocation of PCRE's
+// internal "match()" frame.  This varies depending on the
+// compiler and architecture, of course, so the constant is
+// just a conservative estimate.  To find the exact number,
+// run regexp_unittest with --regexp_stack_limit=0 under
+// a debugger and look at the frames when it crashes.
+// The exact frame size was 656 in production on 2008/02/03.
+static const int kPCREFrameSize = 700;
+
+// Special name for missing C++ arguments.
+PCRE::Arg PCRE::no_more_args((void*)NULL);
+
+const PCRE::PartialMatchFunctor PCRE::PartialMatch = { };
+const PCRE::FullMatchFunctor PCRE::FullMatch = { } ;
+const PCRE::ConsumeFunctor PCRE::Consume = { };
+const PCRE::FindAndConsumeFunctor PCRE::FindAndConsume = { };
+
+// If a regular expression has no error, its error_ field points here
+static const std::string empty_string;
+
+void PCRE::Init(const char* pattern, Option options, int match_limit,
+              int stack_limit, bool report_errors) {
+  pattern_ = pattern;
+  options_ = options;
+  match_limit_ = match_limit;
+  stack_limit_ = stack_limit;
+  hit_limit_ = false;
+  error_ = &empty_string;
+  report_errors_ = report_errors;
+  re_full_ = NULL;
+  re_partial_ = NULL;
+
+  if (options & ~(EnabledCompileOptions | EnabledExecOptions)) {
+    error_ = new std::string("illegal regexp option");
+    PCREPORT(ERROR)
+        << "Error compiling '" << pattern << "': illegal regexp option";
+  } else {
+    re_partial_ = Compile(UNANCHORED);
+    if (re_partial_ != NULL) {
+      re_full_ = Compile(ANCHOR_BOTH);
+    }
+  }
+}
+
+PCRE::PCRE(const char* pattern) {
+  Init(pattern, None, 0, 0, true);
+}
+PCRE::PCRE(const char* pattern, Option option) {
+  Init(pattern, option, 0, 0, true);
+}
+PCRE::PCRE(const std::string& pattern) {
+  Init(pattern.c_str(), None, 0, 0, true);
+}
+PCRE::PCRE(const std::string& pattern, Option option) {
+  Init(pattern.c_str(), option, 0, 0, true);
+}
+PCRE::PCRE(const std::string& pattern, const PCRE_Options& re_option) {
+  Init(pattern.c_str(), re_option.option(), re_option.match_limit(),
+       re_option.stack_limit(), re_option.report_errors());
+}
+
+PCRE::PCRE(const char *pattern, const PCRE_Options& re_option) {
+  Init(pattern, re_option.option(), re_option.match_limit(),
+       re_option.stack_limit(), re_option.report_errors());
+}
+
+PCRE::~PCRE() {
+  if (re_full_ != NULL)         pcre_free(re_full_);
+  if (re_partial_ != NULL)      pcre_free(re_partial_);
+  if (error_ != &empty_string)  delete error_;
+}
+
+pcre* PCRE::Compile(Anchor anchor) {
+  // Special treatment for anchoring.  This is needed because at
+  // runtime pcre only provides an option for anchoring at the
+  // beginning of a string.
+  //
+  // There are three types of anchoring we want:
+  //    UNANCHORED      Compile the original pattern, and use
+  //                    a pcre unanchored match.
+  //    ANCHOR_START    Compile the original pattern, and use
+  //                    a pcre anchored match.
+  //    ANCHOR_BOTH     Tack a "\z" to the end of the original pattern
+  //                    and use a pcre anchored match.
+
+  const char* error = "";
+  int eoffset;
+  pcre* re;
+  if (anchor != ANCHOR_BOTH) {
+    re = pcre_compile(pattern_.c_str(),
+                      (options_ & EnabledCompileOptions),
+                      &error, &eoffset, NULL);
+  } else {
+    // Tack a '\z' at the end of PCRE.  Parenthesize it first so that
+    // the '\z' applies to all top-level alternatives in the regexp.
+    std::string wrapped = "(?:";  // A non-counting grouping operator
+    wrapped += pattern_;
+    wrapped += ")\\z";
+    re = pcre_compile(wrapped.c_str(),
+                      (options_ & EnabledCompileOptions),
+                      &error, &eoffset, NULL);
+  }
+  if (re == NULL) {
+    if (error_ == &empty_string) error_ = new std::string(error);
+    PCREPORT(ERROR) << "Error compiling '" << pattern_ << "': " << error;
+  }
+  return re;
+}
+
+/***** Convenience interfaces *****/
+
+bool PCRE::FullMatchFunctor::operator ()(const StringPiece& text,
+                                       const PCRE& re,
+                                       const Arg& a0,
+                                       const Arg& a1,
+                                       const Arg& a2,
+                                       const Arg& a3,
+                                       const Arg& a4,
+                                       const Arg& a5,
+                                       const Arg& a6,
+                                       const Arg& a7,
+                                       const Arg& a8,
+                                       const Arg& a9,
+                                       const Arg& a10,
+                                       const Arg& a11,
+                                       const Arg& a12,
+                                       const Arg& a13,
+                                       const Arg& a14,
+                                       const Arg& a15) const {
+  const Arg* args[kMaxArgs];
+  int n = 0;
+  if (&a0 == &no_more_args)  goto done; args[n++] = &a0;
+  if (&a1 == &no_more_args)  goto done; args[n++] = &a1;
+  if (&a2 == &no_more_args)  goto done; args[n++] = &a2;
+  if (&a3 == &no_more_args)  goto done; args[n++] = &a3;
+  if (&a4 == &no_more_args)  goto done; args[n++] = &a4;
+  if (&a5 == &no_more_args)  goto done; args[n++] = &a5;
+  if (&a6 == &no_more_args)  goto done; args[n++] = &a6;
+  if (&a7 == &no_more_args)  goto done; args[n++] = &a7;
+  if (&a8 == &no_more_args)  goto done; args[n++] = &a8;
+  if (&a9 == &no_more_args)  goto done; args[n++] = &a9;
+  if (&a10 == &no_more_args) goto done; args[n++] = &a10;
+  if (&a11 == &no_more_args) goto done; args[n++] = &a11;
+  if (&a12 == &no_more_args) goto done; args[n++] = &a12;
+  if (&a13 == &no_more_args) goto done; args[n++] = &a13;
+  if (&a14 == &no_more_args) goto done; args[n++] = &a14;
+  if (&a15 == &no_more_args) goto done; args[n++] = &a15;
+done:
+
+  size_t consumed;
+  int vec[kVecSize] = {};
+  return re.DoMatchImpl(text, ANCHOR_BOTH, &consumed, args, n, vec, kVecSize);
+}
+
+bool PCRE::PartialMatchFunctor::operator ()(const StringPiece& text,
+                                          const PCRE& re,
+                                          const Arg& a0,
+                                          const Arg& a1,
+                                          const Arg& a2,
+                                          const Arg& a3,
+                                          const Arg& a4,
+                                          const Arg& a5,
+                                          const Arg& a6,
+                                          const Arg& a7,
+                                          const Arg& a8,
+                                          const Arg& a9,
+                                          const Arg& a10,
+                                          const Arg& a11,
+                                          const Arg& a12,
+                                          const Arg& a13,
+                                          const Arg& a14,
+                                          const Arg& a15) const {
+  const Arg* args[kMaxArgs];
+  int n = 0;
+  if (&a0 == &no_more_args)  goto done; args[n++] = &a0;
+  if (&a1 == &no_more_args)  goto done; args[n++] = &a1;
+  if (&a2 == &no_more_args)  goto done; args[n++] = &a2;
+  if (&a3 == &no_more_args)  goto done; args[n++] = &a3;
+  if (&a4 == &no_more_args)  goto done; args[n++] = &a4;
+  if (&a5 == &no_more_args)  goto done; args[n++] = &a5;
+  if (&a6 == &no_more_args)  goto done; args[n++] = &a6;
+  if (&a7 == &no_more_args)  goto done; args[n++] = &a7;
+  if (&a8 == &no_more_args)  goto done; args[n++] = &a8;
+  if (&a9 == &no_more_args)  goto done; args[n++] = &a9;
+  if (&a10 == &no_more_args) goto done; args[n++] = &a10;
+  if (&a11 == &no_more_args) goto done; args[n++] = &a11;
+  if (&a12 == &no_more_args) goto done; args[n++] = &a12;
+  if (&a13 == &no_more_args) goto done; args[n++] = &a13;
+  if (&a14 == &no_more_args) goto done; args[n++] = &a14;
+  if (&a15 == &no_more_args) goto done; args[n++] = &a15;
+done:
+
+  size_t consumed;
+  int vec[kVecSize] = {};
+  return re.DoMatchImpl(text, UNANCHORED, &consumed, args, n, vec, kVecSize);
+}
+
+bool PCRE::ConsumeFunctor::operator ()(StringPiece* input,
+                                     const PCRE& pattern,
+                                     const Arg& a0,
+                                     const Arg& a1,
+                                     const Arg& a2,
+                                     const Arg& a3,
+                                     const Arg& a4,
+                                     const Arg& a5,
+                                     const Arg& a6,
+                                     const Arg& a7,
+                                     const Arg& a8,
+                                     const Arg& a9,
+                                     const Arg& a10,
+                                     const Arg& a11,
+                                     const Arg& a12,
+                                     const Arg& a13,
+                                     const Arg& a14,
+                                     const Arg& a15) const {
+  const Arg* args[kMaxArgs];
+  int n = 0;
+  if (&a0 == &no_more_args)  goto done; args[n++] = &a0;
+  if (&a1 == &no_more_args)  goto done; args[n++] = &a1;
+  if (&a2 == &no_more_args)  goto done; args[n++] = &a2;
+  if (&a3 == &no_more_args)  goto done; args[n++] = &a3;
+  if (&a4 == &no_more_args)  goto done; args[n++] = &a4;
+  if (&a5 == &no_more_args)  goto done; args[n++] = &a5;
+  if (&a6 == &no_more_args)  goto done; args[n++] = &a6;
+  if (&a7 == &no_more_args)  goto done; args[n++] = &a7;
+  if (&a8 == &no_more_args)  goto done; args[n++] = &a8;
+  if (&a9 == &no_more_args)  goto done; args[n++] = &a9;
+  if (&a10 == &no_more_args) goto done; args[n++] = &a10;
+  if (&a11 == &no_more_args) goto done; args[n++] = &a11;
+  if (&a12 == &no_more_args) goto done; args[n++] = &a12;
+  if (&a13 == &no_more_args) goto done; args[n++] = &a13;
+  if (&a14 == &no_more_args) goto done; args[n++] = &a14;
+  if (&a15 == &no_more_args) goto done; args[n++] = &a15;
+done:
+
+  size_t consumed;
+  int vec[kVecSize] = {};
+  if (pattern.DoMatchImpl(*input, ANCHOR_START, &consumed,
+                          args, n, vec, kVecSize)) {
+    input->remove_prefix(consumed);
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool PCRE::FindAndConsumeFunctor::operator ()(StringPiece* input,
+                                            const PCRE& pattern,
+                                            const Arg& a0,
+                                            const Arg& a1,
+                                            const Arg& a2,
+                                            const Arg& a3,
+                                            const Arg& a4,
+                                            const Arg& a5,
+                                            const Arg& a6,
+                                            const Arg& a7,
+                                            const Arg& a8,
+                                            const Arg& a9,
+                                            const Arg& a10,
+                                            const Arg& a11,
+                                            const Arg& a12,
+                                            const Arg& a13,
+                                            const Arg& a14,
+                                            const Arg& a15) const {
+  const Arg* args[kMaxArgs];
+  int n = 0;
+  if (&a0 == &no_more_args)  goto done; args[n++] = &a0;
+  if (&a1 == &no_more_args)  goto done; args[n++] = &a1;
+  if (&a2 == &no_more_args)  goto done; args[n++] = &a2;
+  if (&a3 == &no_more_args)  goto done; args[n++] = &a3;
+  if (&a4 == &no_more_args)  goto done; args[n++] = &a4;
+  if (&a5 == &no_more_args)  goto done; args[n++] = &a5;
+  if (&a6 == &no_more_args)  goto done; args[n++] = &a6;
+  if (&a7 == &no_more_args)  goto done; args[n++] = &a7;
+  if (&a8 == &no_more_args)  goto done; args[n++] = &a8;
+  if (&a9 == &no_more_args)  goto done; args[n++] = &a9;
+  if (&a10 == &no_more_args) goto done; args[n++] = &a10;
+  if (&a11 == &no_more_args) goto done; args[n++] = &a11;
+  if (&a12 == &no_more_args) goto done; args[n++] = &a12;
+  if (&a13 == &no_more_args) goto done; args[n++] = &a13;
+  if (&a14 == &no_more_args) goto done; args[n++] = &a14;
+  if (&a15 == &no_more_args) goto done; args[n++] = &a15;
+done:
+
+  size_t consumed;
+  int vec[kVecSize] = {};
+  if (pattern.DoMatchImpl(*input, UNANCHORED, &consumed,
+                          args, n, vec, kVecSize)) {
+    input->remove_prefix(consumed);
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool PCRE::Replace(std::string *str,
+                 const PCRE& pattern,
+                 const StringPiece& rewrite) {
+  int vec[kVecSize] = {};
+  int matches = pattern.TryMatch(*str, 0, UNANCHORED, true, vec, kVecSize);
+  if (matches == 0)
+    return false;
+
+  std::string s;
+  if (!pattern.Rewrite(&s, rewrite, *str, vec, matches))
+    return false;
+
+  assert(vec[0] >= 0);
+  assert(vec[1] >= 0);
+  str->replace(vec[0], vec[1] - vec[0], s);
+  return true;
+}
+
+int PCRE::GlobalReplace(std::string *str,
+                      const PCRE& pattern,
+                      const StringPiece& rewrite) {
+  int count = 0;
+  int vec[kVecSize] = {};
+  std::string out;
+  size_t start = 0;
+  bool last_match_was_empty_string = false;
+
+  while (start <= str->size()) {
+    // If the previous match was for the empty string, we shouldn't
+    // just match again: we'll match in the same way and get an
+    // infinite loop.  Instead, we do the match in a special way:
+    // anchored -- to force another try at the same position --
+    // and with a flag saying that this time, ignore empty matches.
+    // If this special match returns, that means there's a non-empty
+    // match at this position as well, and we can continue.  If not,
+    // we do what perl does, and just advance by one.
+    // Notice that perl prints '@@@' for this;
+    //    perl -le '$_ = "aa"; s/b*|aa/@/g; print'
+    int matches;
+    if (last_match_was_empty_string) {
+      matches = pattern.TryMatch(*str, start, ANCHOR_START, false,
+                                 vec, kVecSize);
+      if (matches <= 0) {
+        if (start < str->size())
+          out.push_back((*str)[start]);
+        start++;
+        last_match_was_empty_string = false;
+        continue;
+      }
+    } else {
+      matches = pattern.TryMatch(*str, start, UNANCHORED, true,
+                                 vec, kVecSize);
+      if (matches <= 0)
+        break;
+    }
+    size_t matchstart = vec[0], matchend = vec[1];
+    assert(matchstart >= start);
+    assert(matchend >= matchstart);
+
+    out.append(*str, start, matchstart - start);
+    pattern.Rewrite(&out, rewrite, *str, vec, matches);
+    start = matchend;
+    count++;
+    last_match_was_empty_string = (matchstart == matchend);
+  }
+
+  if (count == 0)
+    return 0;
+
+  if (start < str->size())
+    out.append(*str, start, str->size() - start);
+  using std::swap;
+  swap(out, *str);
+  return count;
+}
+
+bool PCRE::Extract(const StringPiece &text,
+                 const PCRE& pattern,
+                 const StringPiece &rewrite,
+                 std::string *out) {
+  int vec[kVecSize] = {};
+  int matches = pattern.TryMatch(text, 0, UNANCHORED, true, vec, kVecSize);
+  if (matches == 0)
+    return false;
+  out->clear();
+  return pattern.Rewrite(out, rewrite, text, vec, matches);
+}
+
+std::string PCRE::QuoteMeta(const StringPiece& unquoted) {
+  std::string result;
+  result.reserve(unquoted.size() << 1);
+
+  // Escape any ascii character not in [A-Za-z_0-9].
+  //
+  // Note that it's legal to escape a character even if it has no
+  // special meaning in a regular expression -- so this function does
+  // that.  (This also makes it identical to the perl function of the
+  // same name except for the null-character special case;
+  // see `perldoc -f quotemeta`.)
+  for (size_t ii = 0; ii < unquoted.size(); ++ii) {
+    // Note that using 'isalnum' here raises the benchmark time from
+    // 32ns to 58ns:
+    if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
+        (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
+        (unquoted[ii] < '0' || unquoted[ii] > '9') &&
+        unquoted[ii] != '_' &&
+        // If this is the part of a UTF8 or Latin1 character, we need
+        // to copy this byte without escaping.  Experimentally this is
+        // what works correctly with the regexp library.
+        !(unquoted[ii] & 128)) {
+      if (unquoted[ii] == '\0') {  // Special handling for null chars.
+        // Can't use "\\0" since the next character might be a digit.
+        result += "\\x00";
+        continue;
+      }
+      result += '\\';
+    }
+    result += unquoted[ii];
+  }
+
+  return result;
+}
+
+/***** Actual matching and rewriting code *****/
+
+bool PCRE::HitLimit() {
+  return hit_limit_ != 0;
+}
+
+void PCRE::ClearHitLimit() {
+  hit_limit_ = 0;
+}
+
+int PCRE::TryMatch(const StringPiece& text,
+                   size_t startpos,
+                   Anchor anchor,
+                   bool empty_ok,
+                   int *vec,
+                   int vecsize) const {
+  pcre* re = (anchor == ANCHOR_BOTH) ? re_full_ : re_partial_;
+  if (re == NULL) {
+    PCREPORT(ERROR) << "Matching against invalid re: " << *error_;
+    return 0;
+  }
+
+  int match_limit = match_limit_;
+  if (match_limit <= 0) {
+    match_limit = GetFlag(FLAGS_regexp_match_limit);
+  }
+
+  int stack_limit = stack_limit_;
+  if (stack_limit <= 0) {
+    stack_limit = GetFlag(FLAGS_regexp_stack_limit);
+  }
+
+  pcre_extra extra = { 0 };
+  if (match_limit > 0) {
+    extra.flags |= PCRE_EXTRA_MATCH_LIMIT;
+    extra.match_limit = match_limit;
+  }
+  if (stack_limit > 0) {
+    extra.flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
+    extra.match_limit_recursion = stack_limit / kPCREFrameSize;
+  }
+
+  int options = 0;
+  if (anchor != UNANCHORED)
+    options |= PCRE_ANCHORED;
+  if (!empty_ok)
+    options |= PCRE_NOTEMPTY;
+
+  int rc = pcre_exec(re,              // The regular expression object
+                     &extra,
+                     (text.data() == NULL) ? "" : text.data(),
+                     static_cast<int>(text.size()),
+                     static_cast<int>(startpos),
+                     options,
+                     vec,
+                     vecsize);
+
+  // Handle errors
+  if (rc == 0) {
+    // pcre_exec() returns 0 as a special case when the number of
+    // capturing subpatterns exceeds the size of the vector.
+    // When this happens, there is a match and the output vector
+    // is filled, but we miss out on the positions of the extra subpatterns.
+    rc = vecsize / 2;
+  } else if (rc < 0) {
+    switch (rc) {
+      case PCRE_ERROR_NOMATCH:
+        return 0;
+      case PCRE_ERROR_MATCHLIMIT:
+        // Writing to hit_limit is not safe if multiple threads
+        // are using the PCRE, but the flag is only intended
+        // for use by unit tests anyway, so we let it go.
+        hit_limit_ = true;
+        PCREPORT(WARNING) << "Exceeded match limit of " << match_limit
+                        << " when matching '" << pattern_ << "'"
+                        << " against text that is " << text.size() << " bytes.";
+        return 0;
+      case PCRE_ERROR_RECURSIONLIMIT:
+        // See comment about hit_limit above.
+        hit_limit_ = true;
+        PCREPORT(WARNING) << "Exceeded stack limit of " << stack_limit
+                        << " when matching '" << pattern_ << "'"
+                        << " against text that is " << text.size() << " bytes.";
+        return 0;
+      default:
+        // There are other return codes from pcre.h :
+        // PCRE_ERROR_NULL           (-2)
+        // PCRE_ERROR_BADOPTION      (-3)
+        // PCRE_ERROR_BADMAGIC       (-4)
+        // PCRE_ERROR_UNKNOWN_NODE   (-5)
+        // PCRE_ERROR_NOMEMORY       (-6)
+        // PCRE_ERROR_NOSUBSTRING    (-7)
+        // ...
+        PCREPORT(ERROR) << "Unexpected return code: " << rc
+                      << " when matching '" << pattern_ << "'"
+                      << ", re=" << re
+                      << ", text=" << text
+                      << ", vec=" << vec
+                      << ", vecsize=" << vecsize;
+        return 0;
+    }
+  }
+
+  return rc;
+}
+
+bool PCRE::DoMatchImpl(const StringPiece& text,
+                       Anchor anchor,
+                       size_t* consumed,
+                       const Arg* const* args,
+                       int n,
+                       int* vec,
+                       int vecsize) const {
+  assert((1 + n) * 3 <= vecsize);  // results + PCRE workspace
+  if (NumberOfCapturingGroups() < n) {
+    // RE has fewer capturing groups than number of Arg pointers passed in.
+    return false;
+  }
+
+  int matches = TryMatch(text, 0, anchor, true, vec, vecsize);
+  assert(matches >= 0);  // TryMatch never returns negatives
+  if (matches == 0)
+    return false;
+
+  *consumed = vec[1];
+
+  if (n == 0 || args == NULL) {
+    // We are not interested in results
+    return true;
+  }
+
+  // If we got here, we must have matched the whole pattern.
+  // We do not need (can not do) any more checks on the value of 'matches' here
+  // -- see the comment for TryMatch.
+  for (int i = 0; i < n; i++) {
+    const int start = vec[2*(i+1)];
+    const int limit = vec[2*(i+1)+1];
+
+    // Avoid invoking undefined behavior when text.data() happens
+    // to be null and start happens to be -1, the latter being the
+    // case for an unmatched subexpression. Even if text.data() is
+    // not null, pointing one byte before was a longstanding bug.
+    const char* addr = NULL;
+    if (start != -1) {
+      addr = text.data() + start;
+    }
+
+    if (!args[i]->Parse(addr, limit-start)) {
+      // TODO: Should we indicate what the error was?
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool PCRE::DoMatch(const StringPiece& text,
+                   Anchor anchor,
+                   size_t* consumed,
+                   const Arg* const args[],
+                   int n) const {
+  assert(n >= 0);
+  const int vecsize = (1 + n) * 3;  // results + PCRE workspace
+                                    // (as for kVecSize)
+  int* vec = new int[vecsize];
+  bool b = DoMatchImpl(text, anchor, consumed, args, n, vec, vecsize);
+  delete[] vec;
+  return b;
+}
+
+bool PCRE::Rewrite(std::string *out, const StringPiece &rewrite,
+                 const StringPiece &text, int *vec, int veclen) const {
+  int number_of_capturing_groups = NumberOfCapturingGroups();
+  for (const char *s = rewrite.data(), *end = s + rewrite.size();
+       s < end; s++) {
+    int c = *s;
+    if (c == '\\') {
+      c = *++s;
+      if (isdigit(c)) {
+        int n = (c - '0');
+        if (n >= veclen) {
+          if (n <= number_of_capturing_groups) {
+            // unmatched optional capturing group. treat
+            // its value as empty string; i.e., nothing to append.
+          } else {
+            PCREPORT(ERROR) << "requested group " << n
+                          << " in regexp " << rewrite.data();
+            return false;
+          }
+        }
+        int start = vec[2 * n];
+        if (start >= 0)
+          out->append(text.data() + start, vec[2 * n + 1] - start);
+      } else if (c == '\\') {
+        out->push_back('\\');
+      } else {
+        PCREPORT(ERROR) << "invalid rewrite pattern: " << rewrite.data();
+        return false;
+      }
+    } else {
+      out->push_back(c);
+    }
+  }
+  return true;
+}
+
+bool PCRE::CheckRewriteString(const StringPiece& rewrite,
+                              std::string* error) const {
+  int max_token = -1;
+  for (const char *s = rewrite.data(), *end = s + rewrite.size();
+       s < end; s++) {
+    int c = *s;
+    if (c != '\\') {
+      continue;
+    }
+    if (++s == end) {
+      *error = "Rewrite schema error: '\\' not allowed at end.";
+      return false;
+    }
+    c = *s;
+    if (c == '\\') {
+      continue;
+    }
+    if (!isdigit(c)) {
+      *error = "Rewrite schema error: "
+               "'\\' must be followed by a digit or '\\'.";
+      return false;
+    }
+    int n = (c - '0');
+    if (max_token < n) {
+      max_token = n;
+    }
+  }
+
+  if (max_token > NumberOfCapturingGroups()) {
+    *error = StringPrintf(
+        "Rewrite schema requests %d matches, but the regexp only has %d "
+        "parenthesized subexpressions.",
+        max_token, NumberOfCapturingGroups());
+    return false;
+  }
+  return true;
+}
+
+
+// Return the number of capturing subpatterns, or -1 if the
+// regexp wasn't valid on construction.
+int PCRE::NumberOfCapturingGroups() const {
+  if (re_partial_ == NULL) return -1;
+
+  int result;
+  int rc = pcre_fullinfo(re_partial_,       // The regular expression object
+                         NULL,              // We did not study the pattern
+                         PCRE_INFO_CAPTURECOUNT,
+                         &result);
+  if (rc != 0) {
+    PCREPORT(ERROR) << "Unexpected return code: " << rc;
+    return -1;
+  }
+  return result;
+}
+
+
+/***** Parsers for various types *****/
+
+bool PCRE::Arg::parse_null(const char* str, size_t n, void* dest) {
+  // We fail if somebody asked us to store into a non-NULL void* pointer
+  return (dest == NULL);
+}
+
+bool PCRE::Arg::parse_string(const char* str, size_t n, void* dest) {
+  if (dest == NULL) return true;
+  reinterpret_cast<std::string*>(dest)->assign(str, n);
+  return true;
+}
+
+bool PCRE::Arg::parse_stringpiece(const char* str, size_t n, void* dest) {
+  if (dest == NULL) return true;
+  *(reinterpret_cast<StringPiece*>(dest)) = StringPiece(str, n);
+  return true;
+}
+
+bool PCRE::Arg::parse_char(const char* str, size_t n, void* dest) {
+  if (n != 1) return false;
+  if (dest == NULL) return true;
+  *(reinterpret_cast<char*>(dest)) = str[0];
+  return true;
+}
+
+bool PCRE::Arg::parse_schar(const char* str, size_t n, void* dest) {
+  if (n != 1) return false;
+  if (dest == NULL) return true;
+  *(reinterpret_cast<signed char*>(dest)) = str[0];
+  return true;
+}
+
+bool PCRE::Arg::parse_uchar(const char* str, size_t n, void* dest) {
+  if (n != 1) return false;
+  if (dest == NULL) return true;
+  *(reinterpret_cast<unsigned char*>(dest)) = str[0];
+  return true;
+}
+
+// Largest number spec that we are willing to parse
+static const int kMaxNumberLength = 32;
+
+// PCREQUIPCRES "buf" must have length at least kMaxNumberLength+1
+// PCREQUIPCRES "n > 0"
+// Copies "str" into "buf" and null-terminates if necessary.
+// Returns one of:
+//      a. "str" if no termination is needed
+//      b. "buf" if the string was copied and null-terminated
+//      c. "" if the input was invalid and has no hope of being parsed
+static const char* TerminateNumber(char* buf, const char* str, size_t n) {
+  if ((n > 0) && isspace(*str)) {
+    // We are less forgiving than the strtoxxx() routines and do not
+    // allow leading spaces.
+    return "";
+  }
+
+  // See if the character right after the input text may potentially
+  // look like a digit.
+  if (isdigit(str[n]) ||
+      ((str[n] >= 'a') && (str[n] <= 'f')) ||
+      ((str[n] >= 'A') && (str[n] <= 'F'))) {
+    if (n > kMaxNumberLength) return ""; // Input too big to be a valid number
+    memcpy(buf, str, n);
+    buf[n] = '\0';
+    return buf;
+  } else {
+    // We can parse right out of the supplied string, so return it.
+    return str;
+  }
+}
+
+bool PCRE::Arg::parse_long_radix(const char* str,
+                                 size_t n,
+                                 void* dest,
+                                 int radix) {
+  if (n == 0) return false;
+  char buf[kMaxNumberLength+1];
+  str = TerminateNumber(buf, str, n);
+  char* end;
+  errno = 0;
+  long r = strtol(str, &end, radix);
+  if (end != str + n) return false;   // Leftover junk
+  if (errno) return false;
+  if (dest == NULL) return true;
+  *(reinterpret_cast<long*>(dest)) = r;
+  return true;
+}
+
+bool PCRE::Arg::parse_ulong_radix(const char* str,
+                                  size_t n,
+                                  void* dest,
+                                  int radix) {
+  if (n == 0) return false;
+  char buf[kMaxNumberLength+1];
+  str = TerminateNumber(buf, str, n);
+  if (str[0] == '-') {
+    // strtoul() will silently accept negative numbers and parse
+    // them.  This module is more strict and treats them as errors.
+    return false;
+  }
+
+  char* end;
+  errno = 0;
+  unsigned long r = strtoul(str, &end, radix);
+  if (end != str + n) return false;   // Leftover junk
+  if (errno) return false;
+  if (dest == NULL) return true;
+  *(reinterpret_cast<unsigned long*>(dest)) = r;
+  return true;
+}
+
+bool PCRE::Arg::parse_short_radix(const char* str,
+                                  size_t n,
+                                  void* dest,
+                                  int radix) {
+  long r;
+  if (!parse_long_radix(str, n, &r, radix)) return false;  // Could not parse
+  if ((short)r != r) return false;                         // Out of range
+  if (dest == NULL) return true;
+  *(reinterpret_cast<short*>(dest)) = (short)r;
+  return true;
+}
+
+bool PCRE::Arg::parse_ushort_radix(const char* str,
+                                   size_t n,
+                                   void* dest,
+                                   int radix) {
+  unsigned long r;
+  if (!parse_ulong_radix(str, n, &r, radix)) return false;  // Could not parse
+  if ((unsigned short)r != r) return false;                 // Out of range
+  if (dest == NULL) return true;
+  *(reinterpret_cast<unsigned short*>(dest)) = (unsigned short)r;
+  return true;
+}
+
+bool PCRE::Arg::parse_int_radix(const char* str,
+                                size_t n,
+                                void* dest,
+                                int radix) {
+  long r;
+  if (!parse_long_radix(str, n, &r, radix)) return false;  // Could not parse
+  if ((int)r != r) return false;                           // Out of range
+  if (dest == NULL) return true;
+  *(reinterpret_cast<int*>(dest)) = (int)r;
+  return true;
+}
+
+bool PCRE::Arg::parse_uint_radix(const char* str,
+                                 size_t n,
+                                 void* dest,
+                                 int radix) {
+  unsigned long r;
+  if (!parse_ulong_radix(str, n, &r, radix)) return false;  // Could not parse
+  if ((unsigned int)r != r) return false;                   // Out of range
+  if (dest == NULL) return true;
+  *(reinterpret_cast<unsigned int*>(dest)) = (unsigned int)r;
+  return true;
+}
+
+bool PCRE::Arg::parse_longlong_radix(const char* str,
+                                     size_t n,
+                                     void* dest,
+                                     int radix) {
+  if (n == 0) return false;
+  char buf[kMaxNumberLength+1];
+  str = TerminateNumber(buf, str, n);
+  char* end;
+  errno = 0;
+  long long r = strtoll(str, &end, radix);
+  if (end != str + n) return false;   // Leftover junk
+  if (errno) return false;
+  if (dest == NULL) return true;
+  *(reinterpret_cast<long long*>(dest)) = r;
+  return true;
+}
+
+bool PCRE::Arg::parse_ulonglong_radix(const char* str,
+                                      size_t n,
+                                      void* dest,
+                                      int radix) {
+  if (n == 0) return false;
+  char buf[kMaxNumberLength+1];
+  str = TerminateNumber(buf, str, n);
+  if (str[0] == '-') {
+    // strtoull() will silently accept negative numbers and parse
+    // them.  This module is more strict and treats them as errors.
+    return false;
+  }
+  char* end;
+  errno = 0;
+  unsigned long long r = strtoull(str, &end, radix);
+  if (end != str + n) return false;   // Leftover junk
+  if (errno) return false;
+  if (dest == NULL) return true;
+  *(reinterpret_cast<unsigned long long*>(dest)) = r;
+  return true;
+}
+
+static bool parse_double_float(const char* str, size_t n, bool isfloat,
+                               void* dest) {
+  if (n == 0) return false;
+  static const int kMaxLength = 200;
+  char buf[kMaxLength];
+  if (n >= kMaxLength) return false;
+  memcpy(buf, str, n);
+  buf[n] = '\0';
+  char* end;
+  errno = 0;
+  double r;
+  if (isfloat) {
+    r = strtof(buf, &end);
+  } else {
+    r = strtod(buf, &end);
+  }
+  if (end != buf + n) return false;   // Leftover junk
+  if (errno) return false;
+  if (dest == NULL) return true;
+  if (isfloat) {
+    *(reinterpret_cast<float*>(dest)) = (float)r;
+  } else {
+    *(reinterpret_cast<double*>(dest)) = r;
+  }
+  return true;
+}
+
+bool PCRE::Arg::parse_double(const char* str, size_t n, void* dest) {
+  return parse_double_float(str, n, false, dest);
+}
+
+bool PCRE::Arg::parse_float(const char* str, size_t n, void* dest) {
+  return parse_double_float(str, n, true, dest);
+}
+
+#define DEFINE_INTEGER_PARSER(name)                                           \
+  bool PCRE::Arg::parse_##name(const char* str, size_t n, void* dest) {       \
+    return parse_##name##_radix(str, n, dest, 10);                            \
+  }                                                                           \
+  bool PCRE::Arg::parse_##name##_hex(const char* str, size_t n, void* dest) { \
+    return parse_##name##_radix(str, n, dest, 16);                            \
+  }                                                                           \
+  bool PCRE::Arg::parse_##name##_octal(const char* str, size_t n,             \
+                                       void* dest) {                          \
+    return parse_##name##_radix(str, n, dest, 8);                             \
+  }                                                                           \
+  bool PCRE::Arg::parse_##name##_cradix(const char* str, size_t n,            \
+                                        void* dest) {                         \
+    return parse_##name##_radix(str, n, dest, 0);                             \
+  }
+
+DEFINE_INTEGER_PARSER(short);
+DEFINE_INTEGER_PARSER(ushort);
+DEFINE_INTEGER_PARSER(int);
+DEFINE_INTEGER_PARSER(uint);
+DEFINE_INTEGER_PARSER(long);
+DEFINE_INTEGER_PARSER(ulong);
+DEFINE_INTEGER_PARSER(longlong);
+DEFINE_INTEGER_PARSER(ulonglong);
+
+#undef DEFINE_INTEGER_PARSER
+
+}  // namespace re2
diff --git a/util/pcre.h b/util/pcre.h
new file mode 100644
index 0000000000000000000000000000000000000000..896b0bdf8935a8e901fc74a2be0fdf0786e0bbc4
--- /dev/null
+++ b/util/pcre.h
@@ -0,0 +1,681 @@
+// Copyright 2003-2010 Google Inc.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef UTIL_PCRE_H_
+#define UTIL_PCRE_H_
+
+// This is a variant of PCRE's pcrecpp.h, originally written at Google.
+// The main changes are the addition of the HitLimit method and
+// compilation as PCRE in namespace re2.
+
+// C++ interface to the pcre regular-expression library.  PCRE supports
+// Perl-style regular expressions (with extensions like \d, \w, \s,
+// ...).
+//
+// -----------------------------------------------------------------------
+// REGEXP SYNTAX:
+//
+// This module uses the pcre library and hence supports its syntax
+// for regular expressions:
+//
+//      http://www.google.com/search?q=pcre
+//
+// The syntax is pretty similar to Perl's.  For those not familiar
+// with Perl's regular expressions, here are some examples of the most
+// commonly used extensions:
+//
+//   "hello (\\w+) world"  -- \w matches a "word" character
+//   "version (\\d+)"      -- \d matches a digit
+//   "hello\\s+world"      -- \s matches any whitespace character
+//   "\\b(\\w+)\\b"        -- \b matches empty string at a word boundary
+//   "(?i)hello"           -- (?i) turns on case-insensitive matching
+//   "/\\*(.*?)\\*/"       -- .*? matches . minimum no. of times possible
+//
+// -----------------------------------------------------------------------
+// MATCHING INTERFACE:
+//
+// The "FullMatch" operation checks that supplied text matches a
+// supplied pattern exactly.
+//
+// Example: successful match
+//    CHECK(PCRE::FullMatch("hello", "h.*o"));
+//
+// Example: unsuccessful match (requires full match):
+//    CHECK(!PCRE::FullMatch("hello", "e"));
+//
+// -----------------------------------------------------------------------
+// UTF-8 AND THE MATCHING INTERFACE:
+//
+// By default, pattern and text are plain text, one byte per character.
+// The UTF8 flag, passed to the constructor, causes both pattern
+// and string to be treated as UTF-8 text, still a byte stream but
+// potentially multiple bytes per character. In practice, the text
+// is likelier to be UTF-8 than the pattern, but the match returned
+// may depend on the UTF8 flag, so always use it when matching
+// UTF8 text.  E.g., "." will match one byte normally but with UTF8
+// set may match up to three bytes of a multi-byte character.
+//
+// Example:
+//    PCRE re(utf8_pattern, PCRE::UTF8);
+//    CHECK(PCRE::FullMatch(utf8_string, re));
+//
+// -----------------------------------------------------------------------
+// MATCHING WITH SUBSTRING EXTRACTION:
+//
+// You can supply extra pointer arguments to extract matched substrings.
+//
+// Example: extracts "ruby" into "s" and 1234 into "i"
+//    int i;
+//    std::string s;
+//    CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
+//
+// Example: fails because string cannot be stored in integer
+//    CHECK(!PCRE::FullMatch("ruby", "(.*)", &i));
+//
+// Example: fails because there aren't enough sub-patterns:
+//    CHECK(!PCRE::FullMatch("ruby:1234", "\\w+:\\d+", &s));
+//
+// Example: does not try to extract any extra sub-patterns
+//    CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s));
+//
+// Example: does not try to extract into NULL
+//    CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", NULL, &i));
+//
+// Example: integer overflow causes failure
+//    CHECK(!PCRE::FullMatch("ruby:1234567891234", "\\w+:(\\d+)", &i));
+//
+// -----------------------------------------------------------------------
+// PARTIAL MATCHES
+//
+// You can use the "PartialMatch" operation when you want the pattern
+// to match any substring of the text.
+//
+// Example: simple search for a string:
+//      CHECK(PCRE::PartialMatch("hello", "ell"));
+//
+// Example: find first number in a string
+//      int number;
+//      CHECK(PCRE::PartialMatch("x*100 + 20", "(\\d+)", &number));
+//      CHECK_EQ(number, 100);
+//
+// -----------------------------------------------------------------------
+// PPCRE-COMPILED PCREGULAR EXPPCRESSIONS
+//
+// PCRE makes it easy to use any string as a regular expression, without
+// requiring a separate compilation step.
+//
+// If speed is of the essence, you can create a pre-compiled "PCRE"
+// object from the pattern and use it multiple times.  If you do so,
+// you can typically parse text faster than with sscanf.
+//
+// Example: precompile pattern for faster matching:
+//    PCRE pattern("h.*o");
+//    while (ReadLine(&str)) {
+//      if (PCRE::FullMatch(str, pattern)) ...;
+//    }
+//
+// -----------------------------------------------------------------------
+// SCANNING TEXT INCPCREMENTALLY
+//
+// The "Consume" operation may be useful if you want to repeatedly
+// match regular expressions at the front of a string and skip over
+// them as they match.  This requires use of the "StringPiece" type,
+// which represents a sub-range of a real string.
+//
+// Example: read lines of the form "var = value" from a string.
+//      std::string contents = ...;     // Fill string somehow
+//      StringPiece input(contents);    // Wrap a StringPiece around it
+//
+//      std::string var;
+//      int value;
+//      while (PCRE::Consume(&input, "(\\w+) = (\\d+)\n", &var, &value)) {
+//        ...;
+//      }
+//
+// Each successful call to "Consume" will set "var/value", and also
+// advance "input" so it points past the matched text.  Note that if the
+// regular expression matches an empty string, input will advance
+// by 0 bytes.  If the regular expression being used might match
+// an empty string, the loop body must check for this case and either
+// advance the string or break out of the loop.
+//
+// The "FindAndConsume" operation is similar to "Consume" but does not
+// anchor your match at the beginning of the string.  For example, you
+// could extract all words from a string by repeatedly calling
+//     PCRE::FindAndConsume(&input, "(\\w+)", &word)
+//
+// -----------------------------------------------------------------------
+// PARSING HEX/OCTAL/C-RADIX NUMBERS
+//
+// By default, if you pass a pointer to a numeric value, the
+// corresponding text is interpreted as a base-10 number.  You can
+// instead wrap the pointer with a call to one of the operators Hex(),
+// Octal(), or CRadix() to interpret the text in another base.  The
+// CRadix operator interprets C-style "0" (base-8) and "0x" (base-16)
+// prefixes, but defaults to base-10.
+//
+// Example:
+//   int a, b, c, d;
+//   CHECK(PCRE::FullMatch("100 40 0100 0x40", "(.*) (.*) (.*) (.*)",
+//         Octal(&a), Hex(&b), CRadix(&c), CRadix(&d));
+// will leave 64 in a, b, c, and d.
+
+#include "util/util.h"
+#include "re2/stringpiece.h"
+
+#ifdef USEPCRE
+#include <pcre.h>
+namespace re2 {
+const bool UsingPCRE = true;
+}  // namespace re2
+#else
+struct pcre;  // opaque
+namespace re2 {
+const bool UsingPCRE = false;
+}  // namespace re2
+#endif
+
+namespace re2 {
+
+class PCRE_Options;
+
+// Interface for regular expression matching.  Also corresponds to a
+// pre-compiled regular expression.  An "PCRE" object is safe for
+// concurrent use by multiple threads.
+class PCRE {
+ public:
+  // We convert user-passed pointers into special Arg objects
+  class Arg;
+
+  // Marks end of arg list.
+  // ONLY USE IN OPTIONAL ARG DEFAULTS.
+  // DO NOT PASS EXPLICITLY.
+  static Arg no_more_args;
+
+  // Options are same value as those in pcre.  We provide them here
+  // to avoid users needing to include pcre.h and also to isolate
+  // users from pcre should we change the underlying library.
+  // Only those needed by Google programs are exposed here to
+  // avoid collision with options employed internally by regexp.cc
+  // Note that some options have equivalents that can be specified in
+  // the regexp itself.  For example, prefixing your regexp with
+  // "(?s)" has the same effect as the PCRE_DOTALL option.
+  enum Option {
+    None = 0x0000,
+    UTF8 = 0x0800,  // == PCRE_UTF8
+    EnabledCompileOptions = UTF8,
+    EnabledExecOptions = 0x0000,  // TODO: use to replace anchor flag
+  };
+
+  // We provide implicit conversions from strings so that users can
+  // pass in a string or a "const char*" wherever an "PCRE" is expected.
+  PCRE(const char* pattern);
+  PCRE(const char* pattern, Option option);
+  PCRE(const std::string& pattern);
+  PCRE(const std::string& pattern, Option option);
+  PCRE(const char *pattern, const PCRE_Options& re_option);
+  PCRE(const std::string& pattern, const PCRE_Options& re_option);
+
+  ~PCRE();
+
+  // The string specification for this PCRE.  E.g.
+  //   PCRE re("ab*c?d+");
+  //   re.pattern();    // "ab*c?d+"
+  const std::string& pattern() const { return pattern_; }
+
+  // If PCRE could not be created properly, returns an error string.
+  // Else returns the empty string.
+  const std::string& error() const { return *error_; }
+
+  // Whether the PCRE has hit a match limit during execution.
+  // Not thread safe.  Intended only for testing.
+  // If hitting match limits is a problem,
+  // you should be using PCRE2 (re2/re2.h)
+  // instead of checking this flag.
+  bool HitLimit();
+  void ClearHitLimit();
+
+  /***** The useful part: the matching interface *****/
+
+  // Matches "text" against "pattern".  If pointer arguments are
+  // supplied, copies matched sub-patterns into them.
+  //
+  // You can pass in a "const char*" or a "std::string" for "text".
+  // You can pass in a "const char*" or a "std::string" or a "PCRE" for "pattern".
+  //
+  // The provided pointer arguments can be pointers to any scalar numeric
+  // type, or one of:
+  //    std::string     (matched piece is copied to string)
+  //    StringPiece     (StringPiece is mutated to point to matched piece)
+  //    T               (where "bool T::ParseFrom(const char*, size_t)" exists)
+  //    (void*)NULL     (the corresponding matched sub-pattern is not copied)
+  //
+  // Returns true iff all of the following conditions are satisfied:
+  //   a. "text" matches "pattern" exactly
+  //   b. The number of matched sub-patterns is >= number of supplied pointers
+  //   c. The "i"th argument has a suitable type for holding the
+  //      string captured as the "i"th sub-pattern.  If you pass in
+  //      NULL for the "i"th argument, or pass fewer arguments than
+  //      number of sub-patterns, "i"th captured sub-pattern is
+  //      ignored.
+  //
+  // CAVEAT: An optional sub-pattern that does not exist in the
+  // matched string is assigned the empty string.  Therefore, the
+  // following will return false (because the empty string is not a
+  // valid number):
+  //    int number;
+  //    PCRE::FullMatch("abc", "[a-z]+(\\d+)?", &number);
+  struct FullMatchFunctor {
+    bool operator ()(const StringPiece& text, const PCRE& re, // 3..16 args
+                     const Arg& ptr1 = no_more_args,
+                     const Arg& ptr2 = no_more_args,
+                     const Arg& ptr3 = no_more_args,
+                     const Arg& ptr4 = no_more_args,
+                     const Arg& ptr5 = no_more_args,
+                     const Arg& ptr6 = no_more_args,
+                     const Arg& ptr7 = no_more_args,
+                     const Arg& ptr8 = no_more_args,
+                     const Arg& ptr9 = no_more_args,
+                     const Arg& ptr10 = no_more_args,
+                     const Arg& ptr11 = no_more_args,
+                     const Arg& ptr12 = no_more_args,
+                     const Arg& ptr13 = no_more_args,
+                     const Arg& ptr14 = no_more_args,
+                     const Arg& ptr15 = no_more_args,
+                     const Arg& ptr16 = no_more_args) const;
+  };
+
+  static const FullMatchFunctor FullMatch;
+
+  // Exactly like FullMatch(), except that "pattern" is allowed to match
+  // a substring of "text".
+  struct PartialMatchFunctor {
+    bool operator ()(const StringPiece& text, const PCRE& re, // 3..16 args
+                     const Arg& ptr1 = no_more_args,
+                     const Arg& ptr2 = no_more_args,
+                     const Arg& ptr3 = no_more_args,
+                     const Arg& ptr4 = no_more_args,
+                     const Arg& ptr5 = no_more_args,
+                     const Arg& ptr6 = no_more_args,
+                     const Arg& ptr7 = no_more_args,
+                     const Arg& ptr8 = no_more_args,
+                     const Arg& ptr9 = no_more_args,
+                     const Arg& ptr10 = no_more_args,
+                     const Arg& ptr11 = no_more_args,
+                     const Arg& ptr12 = no_more_args,
+                     const Arg& ptr13 = no_more_args,
+                     const Arg& ptr14 = no_more_args,
+                     const Arg& ptr15 = no_more_args,
+                     const Arg& ptr16 = no_more_args) const;
+  };
+
+  static const PartialMatchFunctor PartialMatch;
+
+  // Like FullMatch() and PartialMatch(), except that pattern has to
+  // match a prefix of "text", and "input" is advanced past the matched
+  // text.  Note: "input" is modified iff this routine returns true.
+  struct ConsumeFunctor {
+    bool operator ()(StringPiece* input, const PCRE& pattern, // 3..16 args
+                     const Arg& ptr1 = no_more_args,
+                     const Arg& ptr2 = no_more_args,
+                     const Arg& ptr3 = no_more_args,
+                     const Arg& ptr4 = no_more_args,
+                     const Arg& ptr5 = no_more_args,
+                     const Arg& ptr6 = no_more_args,
+                     const Arg& ptr7 = no_more_args,
+                     const Arg& ptr8 = no_more_args,
+                     const Arg& ptr9 = no_more_args,
+                     const Arg& ptr10 = no_more_args,
+                     const Arg& ptr11 = no_more_args,
+                     const Arg& ptr12 = no_more_args,
+                     const Arg& ptr13 = no_more_args,
+                     const Arg& ptr14 = no_more_args,
+                     const Arg& ptr15 = no_more_args,
+                     const Arg& ptr16 = no_more_args) const;
+  };
+
+  static const ConsumeFunctor Consume;
+
+  // Like Consume(..), but does not anchor the match at the beginning of the
+  // string.  That is, "pattern" need not start its match at the beginning of
+  // "input".  For example, "FindAndConsume(s, "(\\w+)", &word)" finds the next
+  // word in "s" and stores it in "word".
+  struct FindAndConsumeFunctor {
+    bool operator ()(StringPiece* input, const PCRE& pattern,
+                     const Arg& ptr1 = no_more_args,
+                     const Arg& ptr2 = no_more_args,
+                     const Arg& ptr3 = no_more_args,
+                     const Arg& ptr4 = no_more_args,
+                     const Arg& ptr5 = no_more_args,
+                     const Arg& ptr6 = no_more_args,
+                     const Arg& ptr7 = no_more_args,
+                     const Arg& ptr8 = no_more_args,
+                     const Arg& ptr9 = no_more_args,
+                     const Arg& ptr10 = no_more_args,
+                     const Arg& ptr11 = no_more_args,
+                     const Arg& ptr12 = no_more_args,
+                     const Arg& ptr13 = no_more_args,
+                     const Arg& ptr14 = no_more_args,
+                     const Arg& ptr15 = no_more_args,
+                     const Arg& ptr16 = no_more_args) const;
+  };
+
+  static const FindAndConsumeFunctor FindAndConsume;
+
+  // Replace the first match of "pattern" in "str" with "rewrite".
+  // Within "rewrite", backslash-escaped digits (\1 to \9) can be
+  // used to insert text matching corresponding parenthesized group
+  // from the pattern.  \0 in "rewrite" refers to the entire matching
+  // text.  E.g.,
+  //
+  //   std::string s = "yabba dabba doo";
+  //   CHECK(PCRE::Replace(&s, "b+", "d"));
+  //
+  // will leave "s" containing "yada dabba doo"
+  //
+  // Returns true if the pattern matches and a replacement occurs,
+  // false otherwise.
+  static bool Replace(std::string *str,
+                      const PCRE& pattern,
+                      const StringPiece& rewrite);
+
+  // Like Replace(), except replaces all occurrences of the pattern in
+  // the string with the rewrite.  Replacements are not subject to
+  // re-matching.  E.g.,
+  //
+  //   std::string s = "yabba dabba doo";
+  //   CHECK(PCRE::GlobalReplace(&s, "b+", "d"));
+  //
+  // will leave "s" containing "yada dada doo"
+  //
+  // Returns the number of replacements made.
+  static int GlobalReplace(std::string *str,
+                           const PCRE& pattern,
+                           const StringPiece& rewrite);
+
+  // Like Replace, except that if the pattern matches, "rewrite"
+  // is copied into "out" with substitutions.  The non-matching
+  // portions of "text" are ignored.
+  //
+  // Returns true iff a match occurred and the extraction happened
+  // successfully;  if no match occurs, the string is left unaffected.
+  static bool Extract(const StringPiece &text,
+                      const PCRE& pattern,
+                      const StringPiece &rewrite,
+                      std::string *out);
+
+  // Check that the given @p rewrite string is suitable for use with
+  // this PCRE.  It checks that:
+  //   * The PCRE has enough parenthesized subexpressions to satisfy all
+  //       of the \N tokens in @p rewrite, and
+  //   * The @p rewrite string doesn't have any syntax errors
+  //       ('\' followed by anything besides [0-9] and '\').
+  // Making this test will guarantee that "replace" and "extract"
+  // operations won't LOG(ERROR) or fail because of a bad rewrite
+  // string.
+  // @param rewrite The proposed rewrite string.
+  // @param error An error message is recorded here, iff we return false.
+  //              Otherwise, it is unchanged.
+  // @return true, iff @p rewrite is suitable for use with the PCRE.
+  bool CheckRewriteString(const StringPiece& rewrite,
+                          std::string* error) const;
+
+  // Returns a copy of 'unquoted' with all potentially meaningful
+  // regexp characters backslash-escaped.  The returned string, used
+  // as a regular expression, will exactly match the original string.
+  // For example,
+  //           1.5-2.0?
+  //  becomes:
+  //           1\.5\-2\.0\?
+  static std::string QuoteMeta(const StringPiece& unquoted);
+
+  /***** Generic matching interface (not so nice to use) *****/
+
+  // Type of match (TODO: Should be restructured as an Option)
+  enum Anchor {
+    UNANCHORED,         // No anchoring
+    ANCHOR_START,       // Anchor at start only
+    ANCHOR_BOTH,        // Anchor at start and end
+  };
+
+  // General matching routine.  Stores the length of the match in
+  // "*consumed" if successful.
+  bool DoMatch(const StringPiece& text,
+               Anchor anchor,
+               size_t* consumed,
+               const Arg* const* args, int n) const;
+
+  // Return the number of capturing subpatterns, or -1 if the
+  // regexp wasn't valid on construction.
+  int NumberOfCapturingGroups() const;
+
+ private:
+  void Init(const char* pattern, Option option, int match_limit,
+            int stack_limit, bool report_errors);
+
+  // Match against "text", filling in "vec" (up to "vecsize" * 2/3) with
+  // pairs of integers for the beginning and end positions of matched
+  // text.  The first pair corresponds to the entire matched text;
+  // subsequent pairs correspond, in order, to parentheses-captured
+  // matches.  Returns the number of pairs (one more than the number of
+  // the last subpattern with a match) if matching was successful
+  // and zero if the match failed.
+  // I.e. for PCRE("(foo)|(bar)|(baz)") it will return 2, 3, and 4 when matching
+  // against "foo", "bar", and "baz" respectively.
+  // When matching PCRE("(foo)|hello") against "hello", it will return 1.
+  // But the values for all subpattern are filled in into "vec".
+  int TryMatch(const StringPiece& text,
+               size_t startpos,
+               Anchor anchor,
+               bool empty_ok,
+               int *vec,
+               int vecsize) const;
+
+  // Append the "rewrite" string, with backslash subsitutions from "text"
+  // and "vec", to string "out".
+  bool Rewrite(std::string *out,
+               const StringPiece &rewrite,
+               const StringPiece &text,
+               int *vec,
+               int veclen) const;
+
+  // internal implementation for DoMatch
+  bool DoMatchImpl(const StringPiece& text,
+                   Anchor anchor,
+                   size_t* consumed,
+                   const Arg* const args[],
+                   int n,
+                   int* vec,
+                   int vecsize) const;
+
+  // Compile the regexp for the specified anchoring mode
+  pcre* Compile(Anchor anchor);
+
+  std::string         pattern_;
+  Option              options_;
+  pcre*               re_full_;        // For full matches
+  pcre*               re_partial_;     // For partial matches
+  const std::string*  error_;          // Error indicator (or empty string)
+  bool                report_errors_;  // Silences error logging if false
+  int                 match_limit_;    // Limit on execution resources
+  int                 stack_limit_;    // Limit on stack resources (bytes)
+  mutable int32_t     hit_limit_;      // Hit limit during execution (bool)
+
+  PCRE(const PCRE&) = delete;
+  PCRE& operator=(const PCRE&) = delete;
+};
+
+// PCRE_Options allow you to set the PCRE::Options, plus any pcre
+// "extra" options.  The only extras are match_limit, which limits
+// the CPU time of a match, and stack_limit, which limits the
+// stack usage.  Setting a limit to <= 0 lets PCRE pick a sensible default
+// that should not cause too many problems in production code.
+// If PCRE hits a limit during a match, it may return a false negative,
+// but (hopefully) it won't crash.
+//
+// NOTE: If you are handling regular expressions specified by
+// (external or internal) users, rather than hard-coded ones,
+// you should be using PCRE2, which uses an alternate implementation
+// that avoids these issues.  See http://go/re2quick.
+class PCRE_Options {
+ public:
+  // constructor
+  PCRE_Options() : option_(PCRE::None), match_limit_(0), stack_limit_(0), report_errors_(true) {}
+  // accessors
+  PCRE::Option option() const { return option_; }
+  void set_option(PCRE::Option option) {
+    option_ = option;
+  }
+  int match_limit() const { return match_limit_; }
+  void set_match_limit(int match_limit) {
+    match_limit_ = match_limit;
+  }
+  int stack_limit() const { return stack_limit_; }
+  void set_stack_limit(int stack_limit) {
+    stack_limit_ = stack_limit;
+  }
+
+  // If the regular expression is malformed, an error message will be printed
+  // iff report_errors() is true.  Default: true.
+  bool report_errors() const { return report_errors_; }
+  void set_report_errors(bool report_errors) {
+    report_errors_ = report_errors;
+  }
+ private:
+  PCRE::Option option_;
+  int match_limit_;
+  int stack_limit_;
+  bool report_errors_;
+};
+
+
+/***** Implementation details *****/
+
+// Hex/Octal/Binary?
+
+// Special class for parsing into objects that define a ParseFrom() method
+template <typename T>
+class _PCRE_MatchObject {
+ public:
+  static inline bool Parse(const char* str, size_t n, void* dest) {
+    if (dest == NULL) return true;
+    T* object = reinterpret_cast<T*>(dest);
+    return object->ParseFrom(str, n);
+  }
+};
+
+class PCRE::Arg {
+ public:
+  // Empty constructor so we can declare arrays of PCRE::Arg
+  Arg();
+
+  // Constructor specially designed for NULL arguments
+  Arg(void*);
+
+  typedef bool (*Parser)(const char* str, size_t n, void* dest);
+
+// Type-specific parsers
+#define MAKE_PARSER(type, name)            \
+  Arg(type* p) : arg_(p), parser_(name) {} \
+  Arg(type* p, Parser parser) : arg_(p), parser_(parser) {}
+
+  MAKE_PARSER(char,               parse_char);
+  MAKE_PARSER(signed char,        parse_schar);
+  MAKE_PARSER(unsigned char,      parse_uchar);
+  MAKE_PARSER(float,              parse_float);
+  MAKE_PARSER(double,             parse_double);
+  MAKE_PARSER(std::string,        parse_string);
+  MAKE_PARSER(StringPiece,        parse_stringpiece);
+
+  MAKE_PARSER(short,              parse_short);
+  MAKE_PARSER(unsigned short,     parse_ushort);
+  MAKE_PARSER(int,                parse_int);
+  MAKE_PARSER(unsigned int,       parse_uint);
+  MAKE_PARSER(long,               parse_long);
+  MAKE_PARSER(unsigned long,      parse_ulong);
+  MAKE_PARSER(long long,          parse_longlong);
+  MAKE_PARSER(unsigned long long, parse_ulonglong);
+
+#undef MAKE_PARSER
+
+  // Generic constructor
+  template <typename T> Arg(T*, Parser parser);
+  // Generic constructor template
+  template <typename T> Arg(T* p)
+    : arg_(p), parser_(_PCRE_MatchObject<T>::Parse) {
+  }
+
+  // Parse the data
+  bool Parse(const char* str, size_t n) const;
+
+ private:
+  void*         arg_;
+  Parser        parser_;
+
+  static bool parse_null          (const char* str, size_t n, void* dest);
+  static bool parse_char          (const char* str, size_t n, void* dest);
+  static bool parse_schar         (const char* str, size_t n, void* dest);
+  static bool parse_uchar         (const char* str, size_t n, void* dest);
+  static bool parse_float         (const char* str, size_t n, void* dest);
+  static bool parse_double        (const char* str, size_t n, void* dest);
+  static bool parse_string        (const char* str, size_t n, void* dest);
+  static bool parse_stringpiece   (const char* str, size_t n, void* dest);
+
+#define DECLARE_INTEGER_PARSER(name)                                       \
+ private:                                                                  \
+  static bool parse_##name(const char* str, size_t n, void* dest);         \
+  static bool parse_##name##_radix(const char* str, size_t n, void* dest,  \
+                                   int radix);                             \
+                                                                           \
+ public:                                                                   \
+  static bool parse_##name##_hex(const char* str, size_t n, void* dest);   \
+  static bool parse_##name##_octal(const char* str, size_t n, void* dest); \
+  static bool parse_##name##_cradix(const char* str, size_t n, void* dest)
+
+  DECLARE_INTEGER_PARSER(short);
+  DECLARE_INTEGER_PARSER(ushort);
+  DECLARE_INTEGER_PARSER(int);
+  DECLARE_INTEGER_PARSER(uint);
+  DECLARE_INTEGER_PARSER(long);
+  DECLARE_INTEGER_PARSER(ulong);
+  DECLARE_INTEGER_PARSER(longlong);
+  DECLARE_INTEGER_PARSER(ulonglong);
+
+#undef DECLARE_INTEGER_PARSER
+
+};
+
+inline PCRE::Arg::Arg() : arg_(NULL), parser_(parse_null) { }
+inline PCRE::Arg::Arg(void* p) : arg_(p), parser_(parse_null) { }
+
+inline bool PCRE::Arg::Parse(const char* str, size_t n) const {
+  return (*parser_)(str, n, arg_);
+}
+
+// This part of the parser, appropriate only for ints, deals with bases
+#define MAKE_INTEGER_PARSER(type, name)                      \
+  inline PCRE::Arg Hex(type* ptr) {                          \
+    return PCRE::Arg(ptr, PCRE::Arg::parse_##name##_hex);    \
+  }                                                          \
+  inline PCRE::Arg Octal(type* ptr) {                        \
+    return PCRE::Arg(ptr, PCRE::Arg::parse_##name##_octal);  \
+  }                                                          \
+  inline PCRE::Arg CRadix(type* ptr) {                       \
+    return PCRE::Arg(ptr, PCRE::Arg::parse_##name##_cradix); \
+  }
+
+MAKE_INTEGER_PARSER(short,              short);
+MAKE_INTEGER_PARSER(unsigned short,     ushort);
+MAKE_INTEGER_PARSER(int,                int);
+MAKE_INTEGER_PARSER(unsigned int,       uint);
+MAKE_INTEGER_PARSER(long,               long);
+MAKE_INTEGER_PARSER(unsigned long,      ulong);
+MAKE_INTEGER_PARSER(long long,          longlong);
+MAKE_INTEGER_PARSER(unsigned long long, ulonglong);
+
+#undef MAKE_INTEGER_PARSER
+
+}  // namespace re2
+
+#endif  // UTIL_PCRE_H_
diff --git a/util/rune.cc b/util/rune.cc
new file mode 100644
index 0000000000000000000000000000000000000000..4f625ea380f4c77e1c8f66f2caf4d4a0c67d6f7b
--- /dev/null
+++ b/util/rune.cc
@@ -0,0 +1,260 @@
+/*
+ * The authors of this software are Rob Pike and Ken Thompson.
+ *              Copyright (c) 2002 by Lucent Technologies.
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose without fee is hereby granted, provided that this entire notice
+ * is included in all copies of any software which is or includes a copy
+ * or modification of this software and in all copies of the supporting
+ * documentation for such software.
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
+ */
+
+#include <stdarg.h>
+#include <string.h>
+
+#include "util/utf.h"
+
+namespace re2 {
+
+enum
+{
+	Bit1	= 7,
+	Bitx	= 6,
+	Bit2	= 5,
+	Bit3	= 4,
+	Bit4	= 3,
+	Bit5	= 2, 
+
+	T1	= ((1<<(Bit1+1))-1) ^ 0xFF,	/* 0000 0000 */
+	Tx	= ((1<<(Bitx+1))-1) ^ 0xFF,	/* 1000 0000 */
+	T2	= ((1<<(Bit2+1))-1) ^ 0xFF,	/* 1100 0000 */
+	T3	= ((1<<(Bit3+1))-1) ^ 0xFF,	/* 1110 0000 */
+	T4	= ((1<<(Bit4+1))-1) ^ 0xFF,	/* 1111 0000 */
+	T5	= ((1<<(Bit5+1))-1) ^ 0xFF,	/* 1111 1000 */
+
+	Rune1	= (1<<(Bit1+0*Bitx))-1,		/* 0000 0000 0111 1111 */
+	Rune2	= (1<<(Bit2+1*Bitx))-1,		/* 0000 0111 1111 1111 */
+	Rune3	= (1<<(Bit3+2*Bitx))-1,		/* 1111 1111 1111 1111 */
+	Rune4	= (1<<(Bit4+3*Bitx))-1,
+                                        /* 0001 1111 1111 1111 1111 1111 */
+
+	Maskx	= (1<<Bitx)-1,			/* 0011 1111 */
+	Testx	= Maskx ^ 0xFF,			/* 1100 0000 */
+
+	Bad	= Runeerror,
+};
+
+int
+chartorune(Rune *rune, const char *str)
+{
+	int c, c1, c2, c3;
+	long l;
+
+	/*
+	 * one character sequence
+	 *	00000-0007F => T1
+	 */
+	c = *(unsigned char*)str;
+	if(c < Tx) {
+		*rune = c;
+		return 1;
+	}
+
+	/*
+	 * two character sequence
+	 *	0080-07FF => T2 Tx
+	 */
+	c1 = *(unsigned char*)(str+1) ^ Tx;
+	if(c1 & Testx)
+		goto bad;
+	if(c < T3) {
+		if(c < T2)
+			goto bad;
+		l = ((c << Bitx) | c1) & Rune2;
+		if(l <= Rune1)
+			goto bad;
+		*rune = l;
+		return 2;
+	}
+
+	/*
+	 * three character sequence
+	 *	0800-FFFF => T3 Tx Tx
+	 */
+	c2 = *(unsigned char*)(str+2) ^ Tx;
+	if(c2 & Testx)
+		goto bad;
+	if(c < T4) {
+		l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
+		if(l <= Rune2)
+			goto bad;
+		*rune = l;
+		return 3;
+	}
+
+	/*
+	 * four character sequence (21-bit value)
+	 *	10000-1FFFFF => T4 Tx Tx Tx
+	 */
+	c3 = *(unsigned char*)(str+3) ^ Tx;
+	if (c3 & Testx)
+		goto bad;
+	if (c < T5) {
+		l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
+		if (l <= Rune3)
+			goto bad;
+		*rune = l;
+		return 4;
+	}
+
+	/*
+	 * Support for 5-byte or longer UTF-8 would go here, but
+	 * since we don't have that, we'll just fall through to bad.
+	 */
+
+	/*
+	 * bad decoding
+	 */
+bad:
+	*rune = Bad;
+	return 1;
+}
+
+int
+runetochar(char *str, const Rune *rune)
+{
+	/* Runes are signed, so convert to unsigned for range check. */
+	unsigned long c;
+
+	/*
+	 * one character sequence
+	 *	00000-0007F => 00-7F
+	 */
+	c = *rune;
+	if(c <= Rune1) {
+		str[0] = static_cast<char>(c);
+		return 1;
+	}
+
+	/*
+	 * two character sequence
+	 *	0080-07FF => T2 Tx
+	 */
+	if(c <= Rune2) {
+		str[0] = T2 | static_cast<char>(c >> 1*Bitx);
+		str[1] = Tx | (c & Maskx);
+		return 2;
+	}
+
+	/*
+	 * If the Rune is out of range, convert it to the error rune.
+	 * Do this test here because the error rune encodes to three bytes.
+	 * Doing it earlier would duplicate work, since an out of range
+	 * Rune wouldn't have fit in one or two bytes.
+	 */
+	if (c > Runemax)
+		c = Runeerror;
+
+	/*
+	 * three character sequence
+	 *	0800-FFFF => T3 Tx Tx
+	 */
+	if (c <= Rune3) {
+		str[0] = T3 | static_cast<char>(c >> 2*Bitx);
+		str[1] = Tx | ((c >> 1*Bitx) & Maskx);
+		str[2] = Tx | (c & Maskx);
+		return 3;
+	}
+
+	/*
+	 * four character sequence (21-bit value)
+	 *     10000-1FFFFF => T4 Tx Tx Tx
+	 */
+	str[0] = T4 | static_cast<char>(c >> 3*Bitx);
+	str[1] = Tx | ((c >> 2*Bitx) & Maskx);
+	str[2] = Tx | ((c >> 1*Bitx) & Maskx);
+	str[3] = Tx | (c & Maskx);
+	return 4;
+}
+
+int
+runelen(Rune rune)
+{
+	char str[10];
+
+	return runetochar(str, &rune);
+}
+
+int
+fullrune(const char *str, int n)
+{
+	if (n > 0) {
+		int c = *(unsigned char*)str;
+		if (c < Tx)
+			return 1;
+		if (n > 1) {
+			if (c < T3)
+				return 1;
+			if (n > 2) {
+				if (c < T4 || n > 3)
+					return 1;
+			}
+		}
+	}
+	return 0;
+}
+
+
+int
+utflen(const char *s)
+{
+	int c;
+	long n;
+	Rune rune;
+
+	n = 0;
+	for(;;) {
+		c = *(unsigned char*)s;
+		if(c < Runeself) {
+			if(c == 0)
+				return n;
+			s++;
+		} else
+			s += chartorune(&rune, s);
+		n++;
+	}
+	return 0;
+}
+
+char*
+utfrune(const char *s, Rune c)
+{
+	long c1;
+	Rune r;
+	int n;
+
+	if(c < Runesync)		/* not part of utf sequence */
+		return strchr((char*)s, c);
+
+	for(;;) {
+		c1 = *(unsigned char*)s;
+		if(c1 < Runeself) {	/* one byte rune */
+			if(c1 == 0)
+				return 0;
+			if(c1 == c)
+				return (char*)s;
+			s++;
+			continue;
+		}
+		n = chartorune(&r, s);
+		if(r == c)
+			return (char*)s;
+		s += n;
+	}
+	return 0;
+}
+
+}  // namespace re2
diff --git a/util/strutil.cc b/util/strutil.cc
new file mode 100644
index 0000000000000000000000000000000000000000..fb7e6b1b0c776f86338845aa92cde1db2dc5c6cb
--- /dev/null
+++ b/util/strutil.cc
@@ -0,0 +1,149 @@
+// Copyright 1999-2005 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdarg.h>
+#include <stdio.h>
+
+#include "util/strutil.h"
+
+#ifdef _WIN32
+#define snprintf _snprintf
+#define vsnprintf _vsnprintf
+#endif
+
+namespace re2 {
+
+// ----------------------------------------------------------------------
+// CEscapeString()
+//    Copies 'src' to 'dest', escaping dangerous characters using
+//    C-style escape sequences.  'src' and 'dest' should not overlap.
+//    Returns the number of bytes written to 'dest' (not including the \0)
+//    or (size_t)-1 if there was insufficient space.
+// ----------------------------------------------------------------------
+static size_t CEscapeString(const char* src, size_t src_len,
+                            char* dest, size_t dest_len) {
+  const char* src_end = src + src_len;
+  size_t used = 0;
+
+  for (; src < src_end; src++) {
+    if (dest_len - used < 2)   // space for two-character escape
+      return (size_t)-1;
+
+    unsigned char c = *src;
+    switch (c) {
+      case '\n': dest[used++] = '\\'; dest[used++] = 'n';  break;
+      case '\r': dest[used++] = '\\'; dest[used++] = 'r';  break;
+      case '\t': dest[used++] = '\\'; dest[used++] = 't';  break;
+      case '\"': dest[used++] = '\\'; dest[used++] = '\"'; break;
+      case '\'': dest[used++] = '\\'; dest[used++] = '\''; break;
+      case '\\': dest[used++] = '\\'; dest[used++] = '\\'; break;
+      default:
+        // Note that if we emit \xNN and the src character after that is a hex
+        // digit then that digit must be escaped too to prevent it being
+        // interpreted as part of the character code by C.
+        if (c < ' ' || c > '~') {
+          if (dest_len - used < 5)   // space for four-character escape + \0
+            return (size_t)-1;
+          snprintf(dest + used, 5, "\\%03o", c);
+          used += 4;
+        } else {
+          dest[used++] = c; break;
+        }
+    }
+  }
+
+  if (dest_len - used < 1)   // make sure that there is room for \0
+    return (size_t)-1;
+
+  dest[used] = '\0';   // doesn't count towards return value though
+  return used;
+}
+
+// ----------------------------------------------------------------------
+// CEscape()
+//    Copies 'src' to result, escaping dangerous characters using
+//    C-style escape sequences.  'src' and 'dest' should not overlap.
+// ----------------------------------------------------------------------
+std::string CEscape(const StringPiece& src) {
+  const size_t dest_len = src.size() * 4 + 1; // Maximum possible expansion
+  char* dest = new char[dest_len];
+  const size_t used = CEscapeString(src.data(), src.size(),
+                                    dest, dest_len);
+  std::string s = std::string(dest, used);
+  delete[] dest;
+  return s;
+}
+
+void PrefixSuccessor(std::string* prefix) {
+  // We can increment the last character in the string and be done
+  // unless that character is 255, in which case we have to erase the
+  // last character and increment the previous character, unless that
+  // is 255, etc. If the string is empty or consists entirely of
+  // 255's, we just return the empty string.
+  while (!prefix->empty()) {
+    char& c = prefix->back();
+    if (c == '\xff') {  // char literal avoids signed/unsigned.
+      prefix->pop_back();
+    } else {
+      ++c;
+      break;
+    }
+  }
+}
+
+static void StringAppendV(std::string* dst, const char* format, va_list ap) {
+  // First try with a small fixed size buffer
+  char space[1024];
+
+  // It's possible for methods that use a va_list to invalidate
+  // the data in it upon use.  The fix is to make a copy
+  // of the structure before using it and use that copy instead.
+  va_list backup_ap;
+  va_copy(backup_ap, ap);
+  int result = vsnprintf(space, sizeof(space), format, backup_ap);
+  va_end(backup_ap);
+
+  if ((result >= 0) && (static_cast<size_t>(result) < sizeof(space))) {
+    // It fit
+    dst->append(space, result);
+    return;
+  }
+
+  // Repeatedly increase buffer size until it fits
+  int length = sizeof(space);
+  while (true) {
+    if (result < 0) {
+      // Older behavior: just try doubling the buffer size
+      length *= 2;
+    } else {
+      // We need exactly "result+1" characters
+      length = result+1;
+    }
+    char* buf = new char[length];
+
+    // Restore the va_list before we use it again
+    va_copy(backup_ap, ap);
+    result = vsnprintf(buf, length, format, backup_ap);
+    va_end(backup_ap);
+
+    if ((result >= 0) && (result < length)) {
+      // It fit
+      dst->append(buf, result);
+      delete[] buf;
+      return;
+    }
+    delete[] buf;
+  }
+}
+
+std::string StringPrintf(const char* format, ...) {
+  va_list ap;
+  va_start(ap, format);
+  std::string result;
+  StringAppendV(&result, format, ap);
+  va_end(ap);
+  return result;
+}
+
+}  // namespace re2
diff --git a/util/strutil.h b/util/strutil.h
new file mode 100644
index 0000000000000000000000000000000000000000..a69908a0dd94108de70d0b8a8401262ae070645a
--- /dev/null
+++ b/util/strutil.h
@@ -0,0 +1,21 @@
+// Copyright 2016 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef UTIL_STRUTIL_H_
+#define UTIL_STRUTIL_H_
+
+#include <string>
+
+#include "re2/stringpiece.h"
+#include "util/util.h"
+
+namespace re2 {
+
+std::string CEscape(const StringPiece& src);
+void PrefixSuccessor(std::string* prefix);
+std::string StringPrintf(const char* format, ...);
+
+}  // namespace re2
+
+#endif  // UTIL_STRUTIL_H_
diff --git a/util/test.cc b/util/test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..028616b359ac4d803d1a018420e1e2b43f49dbc9
--- /dev/null
+++ b/util/test.cc
@@ -0,0 +1,34 @@
+// Copyright 2009 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+#include <string>
+
+#include "util/test.h"
+
+namespace testing {
+std::string TempDir() { return "/tmp/"; }
+}  // namespace testing
+
+struct Test {
+  void (*fn)(void);
+  const char *name;
+};
+
+static Test tests[10000];
+static int ntests;
+
+void RegisterTest(void (*fn)(void), const char *name) {
+  tests[ntests].fn = fn;
+  tests[ntests++].name = name;
+}
+
+int main(int argc, char** argv) {
+  for (int i = 0; i < ntests; i++) {
+    printf("%s\n", tests[i].name);
+    tests[i].fn();
+  }
+  printf("PASS\n");
+  return 0;
+}
diff --git a/util/test.h b/util/test.h
new file mode 100644
index 0000000000000000000000000000000000000000..54e6f8fbbbc845ede96eaacd59f27d400fe3b971
--- /dev/null
+++ b/util/test.h
@@ -0,0 +1,50 @@
+// Copyright 2009 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef UTIL_TEST_H_
+#define UTIL_TEST_H_
+
+#include "util/util.h"
+#include "util/logging.h"
+
+namespace testing {
+std::string TempDir();
+}  // namespace testing
+
+#define TEST(x, y) \
+	void x##y(void); \
+	TestRegisterer r##x##y(x##y, # x "." # y); \
+	void x##y(void)
+
+void RegisterTest(void (*)(void), const char*);
+
+class TestRegisterer {
+ public:
+  TestRegisterer(void (*fn)(void), const char *s) {
+    RegisterTest(fn, s);
+  }
+};
+
+// fatal assertions
+#define ASSERT_TRUE CHECK
+#define ASSERT_FALSE(x) CHECK(!(x))
+#define ASSERT_EQ CHECK_EQ
+#define ASSERT_NE CHECK_NE
+#define ASSERT_LT CHECK_LT
+#define ASSERT_LE CHECK_LE
+#define ASSERT_GT CHECK_GT
+#define ASSERT_GE CHECK_GE
+
+// nonfatal assertions
+// TODO(rsc): Do a better job?
+#define EXPECT_TRUE CHECK
+#define EXPECT_FALSE(x) CHECK(!(x))
+#define EXPECT_EQ CHECK_EQ
+#define EXPECT_NE CHECK_NE
+#define EXPECT_LT CHECK_LT
+#define EXPECT_LE CHECK_LE
+#define EXPECT_GT CHECK_GT
+#define EXPECT_GE CHECK_GE
+
+#endif  // UTIL_TEST_H_
diff --git a/util/utf.h b/util/utf.h
new file mode 100644
index 0000000000000000000000000000000000000000..85b42972390159f8ef9286417485f0d252ce7c74
--- /dev/null
+++ b/util/utf.h
@@ -0,0 +1,44 @@
+/*
+ * The authors of this software are Rob Pike and Ken Thompson.
+ *              Copyright (c) 2002 by Lucent Technologies.
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose without fee is hereby granted, provided that this entire notice
+ * is included in all copies of any software which is or includes a copy
+ * or modification of this software and in all copies of the supporting
+ * documentation for such software.
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
+ *
+ * This file and rune.cc have been converted to compile as C++ code
+ * in name space re2.
+ */
+
+#ifndef UTIL_UTF_H_
+#define UTIL_UTF_H_
+
+#include <stdint.h>
+
+namespace re2 {
+
+typedef signed int Rune;	/* Code-point values in Unicode 4.0 are 21 bits wide.*/
+
+enum
+{
+  UTFmax	= 4,		/* maximum bytes per rune */
+  Runesync	= 0x80,		/* cannot represent part of a UTF sequence (<) */
+  Runeself	= 0x80,		/* rune and UTF sequences are the same (<) */
+  Runeerror	= 0xFFFD,	/* decoding error in UTF */
+  Runemax	= 0x10FFFF,	/* maximum rune value */
+};
+
+int runetochar(char* s, const Rune* r);
+int chartorune(Rune* r, const char* s);
+int fullrune(const char* s, int n);
+int utflen(const char* s);
+char* utfrune(const char*, Rune);
+
+}  // namespace re2
+
+#endif  // UTIL_UTF_H_
diff --git a/util/util.h b/util/util.h
new file mode 100644
index 0000000000000000000000000000000000000000..56e46c1a3385bbd7e21fcf321281f4f42ec81dd9
--- /dev/null
+++ b/util/util.h
@@ -0,0 +1,42 @@
+// Copyright 2009 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef UTIL_UTIL_H_
+#define UTIL_UTIL_H_
+
+#define arraysize(array) (sizeof(array)/sizeof((array)[0]))
+
+#ifndef ATTRIBUTE_NORETURN
+#if defined(__GNUC__)
+#define ATTRIBUTE_NORETURN __attribute__((noreturn))
+#elif defined(_MSC_VER)
+#define ATTRIBUTE_NORETURN __declspec(noreturn)
+#else
+#define ATTRIBUTE_NORETURN
+#endif
+#endif
+
+#ifndef ATTRIBUTE_UNUSED
+#if defined(__GNUC__)
+#define ATTRIBUTE_UNUSED __attribute__((unused))
+#else
+#define ATTRIBUTE_UNUSED
+#endif
+#endif
+
+#ifndef FALLTHROUGH_INTENDED
+#if defined(__clang__)
+#define FALLTHROUGH_INTENDED [[clang::fallthrough]]
+#elif defined(__GNUC__) && __GNUC__ >= 7
+#define FALLTHROUGH_INTENDED [[gnu::fallthrough]]
+#else
+#define FALLTHROUGH_INTENDED do {} while (0)
+#endif
+#endif
+
+#ifndef NO_THREAD_SAFETY_ANALYSIS
+#define NO_THREAD_SAFETY_ANALYSIS
+#endif
+
+#endif  // UTIL_UTIL_H_