diff --git a/Makefile b/Makefile index fa636df82f9f0a22f7604ebd7905cb1194bf477c..46d637e1e2ac3fe7ccd363941204e7dc1c68872e 100644 --- a/Makefile +++ b/Makefile @@ -78,17 +78,11 @@ INSTALL_HFILES=\ re2/stringpiece.h\ HFILES=\ - util/benchmark.h\ - util/flags.h\ - util/logging.h\ - util/malloc_counter.h\ - util/mix.h\ - util/mutex.h\ - util/pcre.h\ - util/strutil.h\ - util/test.h\ - util/utf.h\ - util/util.h\ + re2/testing/util/benchmark.h\ + re2/testing/util/logging.h\ + re2/testing/util/test.h\ + re2/testing/util/strutil.h\ + re2/testing/util/util.h\ re2/filtered_re2.h\ re2/re2.h\ re2/set.h\ @@ -98,6 +92,12 @@ HFILES=\ # re2/testing/regexp_generator.h\ # re2/testing/string_generator.h\ # re2/testing/tester.h\ + # util/pcre.h\ + # util/flags.h\ + # util/malloc_counter.h\ + # util/mix.h\ + # util/mutex.h\ + # util/utf.h\ # 仅保留接口stub OFILES=obj/re2/re2.o\ @@ -130,16 +130,16 @@ OFILES=obj/re2/re2.o\ # obj/re2/unicode_groups.o\ TESTOFILES=\ - obj/util/pcre.o\ - obj/util/strutil.o\ + obj/re2/testing/util/strutil.o\ - #obj/re2/testing/string_generator.o\ + # obj/re2/testing/string_generator.o\ # obj/re2/testing/backtrack.o\ # obj/re2/testing/dump.o\ # obj/re2/testing/exhaustive_tester.o\ # obj/re2/testing/null_walker.o\ # obj/re2/testing/regexp_generator.o\ # obj/re2/testing/tester.o\ + # obj/util/pcre.o\ TESTS=\ obj/test/set_test\ @@ -212,34 +212,35 @@ obj/so/libre2.$(SOEXT): $(SOFILES) libre2.symbols libre2.symbols.darwin ln -sf libre2.$(SOEXTVER) $@ .PRECIOUS: obj/dbg/test/% -obj/dbg/test/%: obj/dbg/libre2.a obj/dbg/re2/testing/%.o $(DTESTOFILES) obj/dbg/util/test.o +obj/dbg/test/%: obj/dbg/libre2.a obj/dbg/re2/testing/%.o $(DTESTOFILES) obj/dbg/re2/testing/util/test.o @mkdir -p obj/dbg/test - $(CXX) -o $@ obj/dbg/re2/testing/$*.o $(DTESTOFILES) obj/dbg/util/test.o obj/dbg/libre2.a $(RE2_LDFLAGS) $(LDFLAGS) + $(CXX) -o $@ obj/dbg/re2/testing/$*.o $(DTESTOFILES) obj/dbg/re2/testing/util/test.o obj/dbg/libre2.a $(RE2_LDFLAGS) $(LDFLAGS) .PRECIOUS: obj/test/% -obj/test/%: obj/libre2.a obj/re2/testing/%.o $(TESTOFILES) obj/util/test.o +obj/test/%: obj/libre2.a obj/re2/testing/%.o $(TESTOFILES) obj/re2/testing/util/test.o @mkdir -p obj/test - $(CXX) -o $@ obj/re2/testing/$*.o $(TESTOFILES) obj/util/test.o obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS) + $(CXX) -o $@ obj/re2/testing/$*.o $(TESTOFILES) obj/re2/testing/util/test.o obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS) # Test the shared lib, falling back to the static lib for private symbols .PRECIOUS: obj/so/test/% -obj/so/test/%: obj/so/libre2.$(SOEXT) obj/libre2.a obj/re2/testing/%.o $(TESTOFILES) obj/util/test.o +obj/so/test/%: obj/so/libre2.$(SOEXT) obj/libre2.a obj/re2/testing/%.o $(TESTOFILES) obj/re2/testing/util/test.o @mkdir -p obj/so/test - $(CXX) -o $@ obj/re2/testing/$*.o $(TESTOFILES) obj/util/test.o -Lobj/so -lre2 obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS) + $(CXX) -o $@ obj/re2/testing/$*.o $(TESTOFILES) obj/re2/testing/util/test.o -Lobj/so -lre2 obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS) # Filter out dump.o because testing::TempDir() isn't available for it. -obj/test/regexp_benchmark: obj/libre2.a obj/re2/testing/regexp_benchmark.o $(TESTOFILES) obj/util/benchmark.o +obj/test/regexp_benchmark: obj/libre2.a obj/re2/testing/regexp_benchmark.o $(TESTOFILES) obj/re2/testing/util/benchmark.o @mkdir -p obj/test - $(CXX) -o $@ obj/re2/testing/regexp_benchmark.o $(filter-out obj/re2/testing/dump.o, $(TESTOFILES)) obj/util/benchmark.o obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS) + $(CXX) -o $@ obj/re2/testing/regexp_benchmark.o $(filter-out obj/re2/testing/dump.o, $(TESTOFILES)) obj/re2/testing/util/benchmark.o obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS) # re2_fuzzer is a target for fuzzers like libFuzzer and AFL. This fake fuzzing # is simply a way to check that the target builds and then to run it against a # fixed set of inputs. To perform real fuzzing, refer to the documentation for # libFuzzer (llvm.org/docs/LibFuzzer.html) and AFL (lcamtuf.coredump.cx/afl/). -obj/test/re2_fuzzer: CXXFLAGS:=-I./re2/fuzzing/compiler-rt/include $(CXXFLAGS) -obj/test/re2_fuzzer: obj/libre2.a obj/re2/fuzzing/re2_fuzzer.o obj/util/fuzz.o - @mkdir -p obj/test - $(CXX) -o $@ obj/re2/fuzzing/re2_fuzzer.o obj/util/fuzz.o obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS) + +# obj/test/re2_fuzzer: CXXFLAGS:=-I./re2/fuzzing/compiler-rt/include $(CXXFLAGS) +# obj/test/re2_fuzzer: obj/libre2.a obj/re2/fuzzing/re2_fuzzer.o obj/util/fuzz.o +# @mkdir -p obj/test +# $(CXX) -o $@ obj/re2/fuzzing/re2_fuzzer.o obj/util/fuzz.o obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS) ifdef REBUILD_TABLES .PRECIOUS: re2/perl_groups.cc @@ -293,8 +294,8 @@ shared-bigtest: $(STESTS) $(SBIGTESTS) .PHONY: benchmark benchmark: obj/test/regexp_benchmark -.PHONY: fuzz -fuzz: obj/test/re2_fuzzer +# .PHONY: fuzz +# fuzz: obj/test/re2_fuzzer .PHONY: install install: static-install shared-install diff --git a/re2/filtered_re2.cc b/re2/filtered_re2.cc index 0f0f35e6a4bbe2deded5447497419f56989c6dc8..e37b3c1600a628a12241fea7a478a782f17cd4e0 100644 --- a/re2/filtered_re2.cc +++ b/re2/filtered_re2.cc @@ -20,8 +20,8 @@ #include #include -#include "util/util.h" -#include "util/logging.h" +#include "re2/testing/util/util.h" +#include "re2/testing/util/logging.h" // #include "re2/prefilter.h" extern "C" { diff --git a/re2/re2.cc b/re2/re2.cc index 4734a3cf93addc9041a9668fd34ab5f26c805709..d71eff17527a9b2bf4cad7ee801b713c1442b85f 100644 --- a/re2/re2.cc +++ b/re2/re2.cc @@ -32,10 +32,10 @@ #include #include -#include "util/util.h" -#include "util/logging.h" -#include "util/strutil.h" -#include "util/utf.h" +#include "re2/testing/util/util.h" +#include "re2/testing/util/logging.h" +// #include "util/strutil.h" +// #include "util/utf.h" #include "regex_internal.h" using namespace std; diff --git a/re2/set.cc b/re2/set.cc index 96ad1fc4a5d2403dbedd499f1616316f97acac70..c0e9d8f6732d396369336aac8501e3b621e1617a 100644 --- a/re2/set.cc +++ b/re2/set.cc @@ -22,8 +22,8 @@ #include #include -#include "util/util.h" -#include "util/logging.h" +#include "re2/testing/util/util.h" +#include "re2/testing/util/logging.h" #include "re2/re2.h" #include "regex_internal.h" #include "re2/stringpiece.h" diff --git a/re2/stringpiece.cc b/re2/stringpiece.cc index ef2e2874ead91d1324d08bf6a0e7c2c528e6e2d8..ea822d27de1a13425f70bc12e09833382b9a5147 100644 --- a/re2/stringpiece.cc +++ b/re2/stringpiece.cc @@ -6,7 +6,7 @@ #include -#include "util/util.h" +#include "re2/testing/util/util.h" namespace re2 { diff --git a/re2/testing/filtered_re2_test.cc b/re2/testing/filtered_re2_test.cc index 41c081e67398b674df4fc13f2dbed06f54dae38e..f4594132f1c6c69d2e5ebe72b6cf7f78b47ec22c 100644 --- a/re2/testing/filtered_re2_test.cc +++ b/re2/testing/filtered_re2_test.cc @@ -9,8 +9,8 @@ #include #include -#include "util/test.h" -#include "util/logging.h" +#include "re2/testing/util/test.h" +#include "re2/testing/util/logging.h" #include "re2/filtered_re2.h" #include "re2/re2.h" diff --git a/re2/testing/re2_arg_test.cc b/re2/testing/re2_arg_test.cc index f62e17cf4772c688a661cc262007c462ae373dd2..a5d2b82c5020b85f459de7c688b97b54efea9537 100644 --- a/re2/testing/re2_arg_test.cc +++ b/re2/testing/re2_arg_test.cc @@ -10,8 +10,8 @@ #include #include -#include "util/test.h" -#include "util/logging.h" +#include "re2/testing/util/test.h" +#include "re2/testing/util/logging.h" #include "re2/re2.h" namespace re2 { diff --git a/re2/testing/re2_test.cc b/re2/testing/re2_test.cc index 88551090788514690097fa51eac00f1c3f3e2663..80aa5b691c0b23cc2b772a5011fca226295e2fa5 100644 --- a/re2/testing/re2_test.cc +++ b/re2/testing/re2_test.cc @@ -18,9 +18,9 @@ #include /* for sysconf */ #endif -#include "util/test.h" -#include "util/logging.h" -#include "util/strutil.h" +#include "re2/testing/util/test.h" +#include "re2/testing/util/logging.h" +#include "re2/testing/util/strutil.h" #include "re2/re2.h" /*被注释掉的 #include "re2/regexp.h" diff --git a/re2/testing/regexp_benchmark.cc b/re2/testing/regexp_benchmark.cc index eb0e36ae8acc0d18430a28f54430869c70dd916c..bb4fa14a54e2e67d4cca900fbedd0ab3c8940a3f 100644 --- a/re2/testing/regexp_benchmark.cc +++ b/re2/testing/regexp_benchmark.cc @@ -16,18 +16,18 @@ #include #include -#include "util/benchmark.h" -#include "util/test.h" -#include "util/flags.h" -#include "util/logging.h" -#include "util/malloc_counter.h" -#include "util/strutil.h" +#include "re2/testing/util/benchmark.h" +#include "re2/testing/util/test.h" +// #include "util/flags.h" +#include "re2/testing/util/logging.h" +// #include "util/malloc_counter.h" +// #include "util/strutil.h" // #include "re2/prog.h" #include "re2/re2.h" #include "re2/set.h" // #include "re2/regexp.h" -#include "util/mutex.h" -#include "util/pcre.h" +// #include "util/mutex.h" +// #include "util/pcre.h" extern "C" { @@ -39,7 +39,7 @@ void Test(); void MemoryUsage(); } // namespace re2 -typedef testing::MallocCounter MallocCounter; +// typedef testing::MallocCounter MallocCounter; namespace re2 { @@ -87,13 +87,6 @@ void FindAndConsume(benchmark::State& state) { // BENCHMARK_RANGE(FindAndConsume, 8, 16)->ThreadRange(1, NumCPUs()); -void EmptyPartialMatchPCRE(benchmark::State& state) { - PCRE re(""); - for (auto _ : state) { - PCRE::PartialMatch("", re); - } -} - void EmptyPartialMatchRE2(benchmark::State& state) { RE2 re(""); for (auto _ : state) { @@ -113,42 +106,21 @@ void EmptyPartialMatchRE2_text_re2_1KB(benchmark::State& state) { state.SetBytesProcessed(state.iterations() * state.range(0)); } -#ifdef USEPCRE -BENCHMARK(EmptyPartialMatchPCRE)->ThreadRange(1, NumCPUs()); -#endif BENCHMARK(EmptyPartialMatchRE2)->ThreadRange(1, NumCPUs()); BENCHMARK_RANGE(EmptyPartialMatchRE2_text_re2_1KB, 2 << 6, 2 << 9); -void SimplePartialMatchPCRE(benchmark::State& state) { - PCRE re("abcdefg"); - for (auto _ : state) { - PCRE::PartialMatch("abcdefg", re); - } -} - void SimplePartialMatchRE2(benchmark::State& state) { RE2 re("abcdefg"); for (auto _ : state) { RE2::PartialMatch("abcdefg", re); } } -#ifdef USEPCRE -BENCHMARK(SimplePartialMatchPCRE)->ThreadRange(1, NumCPUs()); -#endif BENCHMARK(SimplePartialMatchRE2)->ThreadRange(1, NumCPUs()); static std::string http_text = "GET /asdfhjasdhfasdlfhasdflkjasdfkljasdhflaskdjhf" "alksdjfhasdlkfhasdlkjfhasdljkfhadsjklf HTTP/1.1"; -void HTTPPartialMatchPCRE(benchmark::State& state) { - StringPiece a; - PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP"); - for (auto _ : state) { - PCRE::PartialMatch(http_text, re, &a); - } -} - void HTTPPartialMatchRE2(benchmark::State& state) { StringPiece a; RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP"); @@ -157,22 +129,11 @@ void HTTPPartialMatchRE2(benchmark::State& state) { } } -#ifdef USEPCRE -BENCHMARK(HTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs()); -#endif BENCHMARK(HTTPPartialMatchRE2)->ThreadRange(1, NumCPUs()); static std::string smallhttp_text = "GET /abc HTTP/1.1"; -void SmallHTTPPartialMatchPCRE(benchmark::State& state) { - StringPiece a; - PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP"); - for (auto _ : state) { - PCRE::PartialMatch(smallhttp_text, re, &a); - } -} - void SmallHTTPPartialMatchRE2(benchmark::State& state) { StringPiece a; RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP"); @@ -181,19 +142,8 @@ void SmallHTTPPartialMatchRE2(benchmark::State& state) { } } -#ifdef USEPCRE -BENCHMARK(SmallHTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs()); -#endif BENCHMARK(SmallHTTPPartialMatchRE2)->ThreadRange(1, NumCPUs()); -void DotMatchPCRE(benchmark::State& state) { - StringPiece a; - PCRE re("(?-s)^(.+)"); - for (auto _ : state) { - PCRE::PartialMatch(http_text, re, &a); - } -} - void DotMatchRE2(benchmark::State& state) { StringPiece a; RE2 re("(?-s)^(.+)"); @@ -202,19 +152,8 @@ void DotMatchRE2(benchmark::State& state) { } } -#ifdef USEPCRE -BENCHMARK(DotMatchPCRE)->ThreadRange(1, NumCPUs()); -#endif BENCHMARK(DotMatchRE2)->ThreadRange(1, NumCPUs()); -void ASCIIMatchPCRE(benchmark::State& state) { - StringPiece a; - PCRE re("(?-s)^([ -~]+)"); - for (auto _ : state) { - PCRE::PartialMatch(http_text, re, &a); - } -} - void ASCIIMatchRE2(benchmark::State& state) { StringPiece a; RE2 re("(?-s)^([ -~]+)"); @@ -235,21 +174,9 @@ void ASCIIMatchRE2_text_re2_1KB(benchmark::State& state) { state.SetBytesProcessed(state.iterations() * state.range(0)); } -#ifdef USEPCRE -BENCHMARK(ASCIIMatchPCRE)->ThreadRange(1, NumCPUs()); -#endif BENCHMARK(ASCIIMatchRE2)->ThreadRange(1, NumCPUs()); BENCHMARK_RANGE(ASCIIMatchRE2_text_re2_1KB, 2 << 6, 2 << 9); -void FullMatchPCRE(benchmark::State& state, const char *regexp) { - std::string s = RandomText(state.range(0)); - s += "ABCDEFGHIJ"; - PCRE re(regexp); - for (auto _ : state) { - CHECK(PCRE::FullMatch(s, re)); - } - state.SetBytesProcessed(state.iterations() * state.range(0)); -} void FullMatchRE2(benchmark::State& state, const char *regexp) { std::string s = RandomText(state.range(0)); @@ -512,29 +439,16 @@ BENCHMARK_RANGE(FullMatch_RE2_DotStar_text_re2_1KB, 2 << 6, 2 << 9); BENCHMARK_RANGE(FullMatch_RE2_DotStarDollar_text_re2_1KB, 2 << 6, 2 << 9); BENCHMARK_RANGE(FullMatch_RE2_DotStarCapture_text_re2_1KB, 2 << 6, 2 << 9); -void FullMatch_DotStar_CachedPCRE(benchmark::State& state) { FullMatchPCRE(state, "(?s).*"); } void FullMatch_DotStar_CachedRE2(benchmark::State& state) { FullMatchRE2(state, "(?s).*"); } -void FullMatch_DotStarDollar_CachedPCRE(benchmark::State& state) { FullMatchPCRE(state, "(?s).*$"); } void FullMatch_DotStarDollar_CachedRE2(benchmark::State& state) { FullMatchRE2(state, "(?s).*$"); } -void FullMatch_DotStarCapture_CachedPCRE(benchmark::State& state) { FullMatchPCRE(state, "(?s)((.*)()()($))"); } void FullMatch_DotStarCapture_CachedRE2(benchmark::State& state) { FullMatchRE2(state, "(?s)((.*)()()($))"); } -#ifdef USEPCRE -BENCHMARK_RANGE(FullMatch_DotStar_CachedPCRE, 8, 2 << 20); -#endif BENCHMARK_RANGE(FullMatch_DotStar_CachedRE2, 2 << 19, 2 << 19); -#ifdef USEPCRE -BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedPCRE, 8, 2 << 20); -#endif BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedRE2, 2 << 6, 2 << 9); -#ifdef USEPCRE -BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedPCRE, 8, 2 << 20); -#endif - BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedRE2, 2 << 6, 2 << 9); } // namespace re2 diff --git a/re2/testing/set_test.cc b/re2/testing/set_test.cc index a7feb4c7bb928596f39fed29ef7fc134b573441f..d19b6574410ed7ef630154673a15fff5cc68d51d 100644 --- a/re2/testing/set_test.cc +++ b/re2/testing/set_test.cc @@ -7,8 +7,8 @@ #include #include -#include "util/test.h" -#include "util/logging.h" +#include "re2/testing/util/test.h" +#include "re2/testing/util/logging.h" #include "re2/re2.h" #include "re2/set.h" diff --git a/util/benchmark.cc b/re2/testing/util/benchmark.cc similarity index 97% rename from util/benchmark.cc rename to re2/testing/util/benchmark.cc index e39c3349abd3818694e3ecb7cbcca40dfbb22734..0545c9bd675b4c89c184b64ccd0e5f6bbc3132de 100644 --- a/util/benchmark.cc +++ b/re2/testing/util/benchmark.cc @@ -8,8 +8,8 @@ #include #include -#include "util/benchmark.h" -#include "util/flags.h" +#include "re2/testing/util/benchmark.h" +// #include "util/flags.h" #include "re2/re2.h" #ifdef _WIN32 diff --git a/util/benchmark.h b/re2/testing/util/benchmark.h similarity index 98% rename from util/benchmark.h rename to re2/testing/util/benchmark.h index 40181a5bdd9c91df34d2125c30cfdb1ba7e8aa2d..a45269af82be3018ad2951a1984be8b5219305d8 100644 --- a/util/benchmark.h +++ b/re2/testing/util/benchmark.h @@ -7,8 +7,8 @@ #include #include -#include "util/logging.h" -#include "util/util.h" +#include "re2/testing/util/logging.h" +#include "re2/testing/util/util.h" // Globals for the old benchmark API. void StartBenchmarkTiming(); diff --git a/util/logging.h b/re2/testing/util/logging.h similarity index 98% rename from util/logging.h rename to re2/testing/util/logging.h index 917b4816a2ba2441be37bda8d254dc590a96d87a..7a6dfcadf53735430b4dd9664677b1e9adbef5fc 100644 --- a/util/logging.h +++ b/re2/testing/util/logging.h @@ -12,7 +12,7 @@ #include #include -#include "util/util.h" +#include "re2/testing/util/util.h" // Debug-only checking. #define DCHECK(condition) assert(condition) diff --git a/util/strutil.cc b/re2/testing/util/strutil.cc similarity index 99% rename from util/strutil.cc rename to re2/testing/util/strutil.cc index fb7e6b1b0c776f86338845aa92cde1db2dc5c6cb..7b86cf2ef0ac673caa15378118e2128d6e72637b 100644 --- a/util/strutil.cc +++ b/re2/testing/util/strutil.cc @@ -5,7 +5,7 @@ #include #include -#include "util/strutil.h" +#include "re2/testing/util/strutil.h" #ifdef _WIN32 #define snprintf _snprintf diff --git a/util/strutil.h b/re2/testing/util/strutil.h similarity index 91% rename from util/strutil.h rename to re2/testing/util/strutil.h index e2a7f0b3cf73f4999fb6351de6c021cec2b9ee06..891780886006eaeccae0bdcae269cd90634120a3 100644 --- a/util/strutil.h +++ b/re2/testing/util/strutil.h @@ -7,7 +7,7 @@ #include #include "re2/stringpiece.h" -#include "util/util.h" +#include "re2/testing/util/util.h" namespace re2 { diff --git a/util/test.cc b/re2/testing/util/test.cc similarity index 95% rename from util/test.cc rename to re2/testing/util/test.cc index 028616b359ac4d803d1a018420e1e2b43f49dbc9..a7e0d2fe89273888a60fd2cd140d772b12c1374a 100644 --- a/util/test.cc +++ b/re2/testing/util/test.cc @@ -5,7 +5,7 @@ #include #include -#include "util/test.h" +#include "re2/testing/util/test.h" namespace testing { std::string TempDir() { return "/tmp/"; } diff --git a/util/test.h b/re2/testing/util/test.h similarity index 93% rename from util/test.h rename to re2/testing/util/test.h index e52d883ed332ca3737dc161219f7bf59a7ec180b..cd72e41b6410af0353bf556deaa44a0011e353e8 100644 --- a/util/test.h +++ b/re2/testing/util/test.h @@ -4,8 +4,8 @@ #pragma once -#include "util/util.h" -#include "util/logging.h" +#include "re2/testing/util/util.h" +#include "re2/testing/util/logging.h" namespace testing { std::string TempDir(); diff --git a/util/util.h b/re2/testing/util/util.h similarity index 100% rename from util/util.h rename to re2/testing/util/util.h diff --git a/util/flags.h b/util/flags.h deleted file mode 100644 index c6d66e34756f40697901a42d8a70b4fe737aba88..0000000000000000000000000000000000000000 --- a/util/flags.h +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright 2009 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#pragma once - -// Simplified version of Google's command line flags. -// Does not support parsing the command line. -// If you want to do that, see -// https://gflags.github.io/gflags/ - -#define DEFINE_FLAG(type, name, deflt, desc) \ - namespace re2 { type FLAGS_##name = deflt; } - -#define DECLARE_FLAG(type, name) \ - namespace re2 { extern type FLAGS_##name; } - -namespace re2 { -template -T GetFlag(const T& flag) { - return flag; -} -} // namespace re2 \ No newline at end of file diff --git a/util/fuzz.cc b/util/fuzz.cc deleted file mode 100644 index 9cac1185ac65106e53ba7178d828bce82e8f163d..0000000000000000000000000000000000000000 --- a/util/fuzz.cc +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2016 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#include -#include -#include - -// Entry point for libFuzzer. -extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size); - -int main(int argc, char** argv) { - uint8_t data[32]; - for (int i = 0; i < 32; i++) { - for (int j = 0; j < 32; j++) { - data[j] = random() & 0xFF; - } - LLVMFuzzerTestOneInput(data, 32); - } - return 0; -} diff --git a/util/malloc_counter.h b/util/malloc_counter.h deleted file mode 100644 index 9beabef14cf3390b9ec13210869179ae8af94af0..0000000000000000000000000000000000000000 --- a/util/malloc_counter.h +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright 2009 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#pragma once - -namespace testing { -class MallocCounter { - public: - MallocCounter(int x) {} - static const int THIS_THREAD_ONLY = 0; - long long HeapGrowth() { return 0; } - long long PeakHeapGrowth() { return 0; } - void Reset() {} -}; -} // namespace testing \ No newline at end of file diff --git a/util/mix.h b/util/mix.h deleted file mode 100644 index b8953f03c668060d8b03337bedb84aea6772cfd3..0000000000000000000000000000000000000000 --- a/util/mix.h +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright 2016 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#pragma once - -#include -#include - -namespace re2 { - -// Silence "truncation of constant value" warning for kMul in 32-bit mode. -// Since this is a header file, push and then pop to limit the scope. -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable: 4309) -#endif - -class HashMix { - public: - HashMix() : hash_(1) {} - explicit HashMix(size_t val) : hash_(val + 83) {} - void Mix(size_t val) { - static const size_t kMul = static_cast(0xdc3eb94af8ab4c93ULL); - hash_ *= kMul; - hash_ = ((hash_ << 19) | - (hash_ >> (std::numeric_limits::digits - 19))) + val; - } - size_t get() const { return hash_; } - private: - size_t hash_; -}; - -#ifdef _MSC_VER -#pragma warning(pop) -#endif - -} // namespace re2 \ No newline at end of file diff --git a/util/mutex.h b/util/mutex.h deleted file mode 100644 index a5ebc8a6de211dff7a20aa34bacceb726b4ec0d5..0000000000000000000000000000000000000000 --- a/util/mutex.h +++ /dev/null @@ -1,145 +0,0 @@ -// Copyright 2007 The RE2 Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#pragma once - -/* - * A simple mutex wrapper, supporting locks and read-write locks. - * You should assume the locks are *not* re-entrant. - */ - -#ifdef _WIN32 -// Requires Windows Vista or Windows Server 2008 at minimum. -#include -#if defined(WINVER) && WINVER >= 0x0600 -#define MUTEX_IS_WIN32_SRWLOCK -#endif -#else -#ifndef _POSIX_C_SOURCE -#define _POSIX_C_SOURCE 200809L -#endif -#include -#if defined(_POSIX_READER_WRITER_LOCKS) && _POSIX_READER_WRITER_LOCKS > 0 -#define MUTEX_IS_PTHREAD_RWLOCK -#endif -#endif - -#if defined(MUTEX_IS_WIN32_SRWLOCK) -typedef SRWLOCK MutexType; -#elif defined(MUTEX_IS_PTHREAD_RWLOCK) -#include -#include -typedef pthread_rwlock_t MutexType; -#else -#include -typedef std::mutex MutexType; -#endif - -namespace re2 { - -class Mutex { - public: - inline Mutex(); - inline ~Mutex(); - inline void Lock(); // Block if needed until free then acquire exclusively - inline void Unlock(); // Release a lock acquired via Lock() - // Note that on systems that don't support read-write locks, these may - // be implemented as synonyms to Lock() and Unlock(). So you can use - // these for efficiency, but don't use them anyplace where being able - // to do shared reads is necessary to avoid deadlock. - inline void ReaderLock(); // Block until free or shared then acquire a share - inline void ReaderUnlock(); // Release a read share of this Mutex - inline void WriterLock() { Lock(); } // Acquire an exclusive lock - inline void WriterUnlock() { Unlock(); } // Release a lock from WriterLock() - - private: - MutexType mutex_; - - // Catch the error of writing Mutex when intending MutexLock. - Mutex(Mutex *ignored); - - Mutex(const Mutex&) = delete; - Mutex& operator=(const Mutex&) = delete; -}; - -#if defined(MUTEX_IS_WIN32_SRWLOCK) - -Mutex::Mutex() : mutex_(SRWLOCK_INIT) { } -Mutex::~Mutex() { } -void Mutex::Lock() { AcquireSRWLockExclusive(&mutex_); } -void Mutex::Unlock() { ReleaseSRWLockExclusive(&mutex_); } -void Mutex::ReaderLock() { AcquireSRWLockShared(&mutex_); } -void Mutex::ReaderUnlock() { ReleaseSRWLockShared(&mutex_); } - -#elif defined(MUTEX_IS_PTHREAD_RWLOCK) - -#define SAFE_PTHREAD(fncall) \ - do { \ - if ((fncall) != 0) abort(); \ - } while (0) - -Mutex::Mutex() { SAFE_PTHREAD(pthread_rwlock_init(&mutex_, NULL)); } -Mutex::~Mutex() { SAFE_PTHREAD(pthread_rwlock_destroy(&mutex_)); } -void Mutex::Lock() { SAFE_PTHREAD(pthread_rwlock_wrlock(&mutex_)); } -void Mutex::Unlock() { SAFE_PTHREAD(pthread_rwlock_unlock(&mutex_)); } -void Mutex::ReaderLock() { SAFE_PTHREAD(pthread_rwlock_rdlock(&mutex_)); } -void Mutex::ReaderUnlock() { SAFE_PTHREAD(pthread_rwlock_unlock(&mutex_)); } - -#undef SAFE_PTHREAD - -#else - -Mutex::Mutex() { } -Mutex::~Mutex() { } -void Mutex::Lock() { mutex_.lock(); } -void Mutex::Unlock() { mutex_.unlock(); } -void Mutex::ReaderLock() { Lock(); } // C++11 doesn't have std::shared_mutex. -void Mutex::ReaderUnlock() { Unlock(); } - -#endif - -// -------------------------------------------------------------------------- -// Some helper classes - -// MutexLock(mu) acquires mu when constructed and releases it when destroyed. -class MutexLock { - public: - explicit MutexLock(Mutex *mu) : mu_(mu) { mu_->Lock(); } - ~MutexLock() { mu_->Unlock(); } - private: - Mutex * const mu_; - - MutexLock(const MutexLock&) = delete; - MutexLock& operator=(const MutexLock&) = delete; -}; - -// ReaderMutexLock and WriterMutexLock do the same, for rwlocks -class ReaderMutexLock { - public: - explicit ReaderMutexLock(Mutex *mu) : mu_(mu) { mu_->ReaderLock(); } - ~ReaderMutexLock() { mu_->ReaderUnlock(); } - private: - Mutex * const mu_; - - ReaderMutexLock(const ReaderMutexLock&) = delete; - ReaderMutexLock& operator=(const ReaderMutexLock&) = delete; -}; - -class WriterMutexLock { - public: - explicit WriterMutexLock(Mutex *mu) : mu_(mu) { mu_->WriterLock(); } - ~WriterMutexLock() { mu_->WriterUnlock(); } - private: - Mutex * const mu_; - - WriterMutexLock(const WriterMutexLock&) = delete; - WriterMutexLock& operator=(const WriterMutexLock&) = delete; -}; - -// Catch bug where variable name is omitted, e.g. MutexLock (&mu); -#define MutexLock(x) static_assert(false, "MutexLock declaration missing variable name") -#define ReaderMutexLock(x) static_assert(false, "ReaderMutexLock declaration missing variable name") -#define WriterMutexLock(x) static_assert(false, "WriterMutexLock declaration missing variable name") - -} // namespace re2 \ No newline at end of file diff --git a/util/pcre.cc b/util/pcre.cc deleted file mode 100644 index b68985144ff6439182e849c485636b9fe697732b..0000000000000000000000000000000000000000 --- a/util/pcre.cc +++ /dev/null @@ -1,1025 +0,0 @@ -// Copyright 2003-2009 Google Inc. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// This is a variant of PCRE's pcrecpp.cc, originally written at Google. -// The main changes are the addition of the HitLimit method and -// compilation as PCRE in namespace re2. - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "util/util.h" -#include "util/flags.h" -#include "util/logging.h" -#include "util/pcre.h" -#include "util/strutil.h" - -// Silence warnings about the wacky formatting in the operator() functions. -#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 6 -#pragma GCC diagnostic ignored "-Wmisleading-indentation" -#endif - -#define PCREPORT(level) LOG(level) - -// Default PCRE limits. -// Defaults chosen to allow a plausible amount of CPU and -// not exceed main thread stacks. Note that other threads -// often have smaller stacks, and therefore tightening -// regexp_stack_limit may frequently be necessary. -DEFINE_FLAG(int, regexp_stack_limit, 256 << 10, - "default PCRE stack limit (bytes)"); -DEFINE_FLAG(int, regexp_match_limit, 1000000, - "default PCRE match limit (function calls)"); - -#ifndef USEPCRE - -// Fake just enough of the PCRE API to allow this file to build. :) - -struct pcre_extra { - int flags; - int match_limit; - int match_limit_recursion; -}; - -#define PCRE_EXTRA_MATCH_LIMIT 0 -#define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0 -#define PCRE_ANCHORED 0 -#define PCRE_NOTEMPTY 0 -#define PCRE_ERROR_NOMATCH 1 -#define PCRE_ERROR_MATCHLIMIT 2 -#define PCRE_ERROR_RECURSIONLIMIT 3 -#define PCRE_INFO_CAPTURECOUNT 0 - -void pcre_free(void*) { -} - -pcre* pcre_compile(const char*, int, const char**, int*, const unsigned char*) { - return NULL; -} - -int pcre_exec(const pcre*, const pcre_extra*, const char*, int, int, int, int*, int) { - return 0; -} - -int pcre_fullinfo(const pcre*, const pcre_extra*, int, void*) { - return 0; -} - -#endif - -namespace re2 { - -// Maximum number of args we can set -static const int kMaxArgs = 16; -static const int kVecSize = (1 + kMaxArgs) * 3; // results + PCRE workspace - -// Approximate size of a recursive invocation of PCRE's -// internal "match()" frame. This varies depending on the -// compiler and architecture, of course, so the constant is -// just a conservative estimate. To find the exact number, -// run regexp_unittest with --regexp_stack_limit=0 under -// a debugger and look at the frames when it crashes. -// The exact frame size was 656 in production on 2008/02/03. -static const int kPCREFrameSize = 700; - -// Special name for missing C++ arguments. -PCRE::Arg PCRE::no_more_args((void*)NULL); - -const PCRE::PartialMatchFunctor PCRE::PartialMatch = { }; -const PCRE::FullMatchFunctor PCRE::FullMatch = { } ; -const PCRE::ConsumeFunctor PCRE::Consume = { }; -const PCRE::FindAndConsumeFunctor PCRE::FindAndConsume = { }; - -// If a regular expression has no error, its error_ field points here -static const std::string empty_string; - -void PCRE::Init(const char* pattern, Option options, int match_limit, - int stack_limit, bool report_errors) { - pattern_ = pattern; - options_ = options; - match_limit_ = match_limit; - stack_limit_ = stack_limit; - hit_limit_ = false; - error_ = &empty_string; - report_errors_ = report_errors; - re_full_ = NULL; - re_partial_ = NULL; - - if (options & ~(EnabledCompileOptions | EnabledExecOptions)) { - error_ = new std::string("illegal regexp option"); - PCREPORT(ERROR) - << "Error compiling '" << pattern << "': illegal regexp option"; - } else { - re_partial_ = Compile(UNANCHORED); - if (re_partial_ != NULL) { - re_full_ = Compile(ANCHOR_BOTH); - } - } -} - -PCRE::PCRE(const char* pattern) { - Init(pattern, None, 0, 0, true); -} -PCRE::PCRE(const char* pattern, Option option) { - Init(pattern, option, 0, 0, true); -} -PCRE::PCRE(const std::string& pattern) { - Init(pattern.c_str(), None, 0, 0, true); -} -PCRE::PCRE(const std::string& pattern, Option option) { - Init(pattern.c_str(), option, 0, 0, true); -} -PCRE::PCRE(const std::string& pattern, const PCRE_Options& re_option) { - Init(pattern.c_str(), re_option.option(), re_option.match_limit(), - re_option.stack_limit(), re_option.report_errors()); -} - -PCRE::PCRE(const char *pattern, const PCRE_Options& re_option) { - Init(pattern, re_option.option(), re_option.match_limit(), - re_option.stack_limit(), re_option.report_errors()); -} - -PCRE::~PCRE() { - if (re_full_ != NULL) pcre_free(re_full_); - if (re_partial_ != NULL) pcre_free(re_partial_); - if (error_ != &empty_string) delete error_; -} - -pcre* PCRE::Compile(Anchor anchor) { - // Special treatment for anchoring. This is needed because at - // runtime pcre only provides an option for anchoring at the - // beginning of a string. - // - // There are three types of anchoring we want: - // UNANCHORED Compile the original pattern, and use - // a pcre unanchored match. - // ANCHOR_START Compile the original pattern, and use - // a pcre anchored match. - // ANCHOR_BOTH Tack a "\z" to the end of the original pattern - // and use a pcre anchored match. - - const char* error = ""; - int eoffset; - pcre* re; - if (anchor != ANCHOR_BOTH) { - re = pcre_compile(pattern_.c_str(), - (options_ & EnabledCompileOptions), - &error, &eoffset, NULL); - } else { - // Tack a '\z' at the end of PCRE. Parenthesize it first so that - // the '\z' applies to all top-level alternatives in the regexp. - std::string wrapped = "(?:"; // A non-counting grouping operator - wrapped += pattern_; - wrapped += ")\\z"; - re = pcre_compile(wrapped.c_str(), - (options_ & EnabledCompileOptions), - &error, &eoffset, NULL); - } - if (re == NULL) { - if (error_ == &empty_string) error_ = new std::string(error); - PCREPORT(ERROR) << "Error compiling '" << pattern_ << "': " << error; - } - return re; -} - -/***** Convenience interfaces *****/ - -bool PCRE::FullMatchFunctor::operator ()(const StringPiece& text, - const PCRE& re, - const Arg& a0, - const Arg& a1, - const Arg& a2, - const Arg& a3, - const Arg& a4, - const Arg& a5, - const Arg& a6, - const Arg& a7, - const Arg& a8, - const Arg& a9, - const Arg& a10, - const Arg& a11, - const Arg& a12, - const Arg& a13, - const Arg& a14, - const Arg& a15) const { - const Arg* args[kMaxArgs]; - int n = 0; - if (&a0 == &no_more_args) goto done; args[n++] = &a0; - if (&a1 == &no_more_args) goto done; args[n++] = &a1; - if (&a2 == &no_more_args) goto done; args[n++] = &a2; - if (&a3 == &no_more_args) goto done; args[n++] = &a3; - if (&a4 == &no_more_args) goto done; args[n++] = &a4; - if (&a5 == &no_more_args) goto done; args[n++] = &a5; - if (&a6 == &no_more_args) goto done; args[n++] = &a6; - if (&a7 == &no_more_args) goto done; args[n++] = &a7; - if (&a8 == &no_more_args) goto done; args[n++] = &a8; - if (&a9 == &no_more_args) goto done; args[n++] = &a9; - if (&a10 == &no_more_args) goto done; args[n++] = &a10; - if (&a11 == &no_more_args) goto done; args[n++] = &a11; - if (&a12 == &no_more_args) goto done; args[n++] = &a12; - if (&a13 == &no_more_args) goto done; args[n++] = &a13; - if (&a14 == &no_more_args) goto done; args[n++] = &a14; - if (&a15 == &no_more_args) goto done; args[n++] = &a15; -done: - - size_t consumed; - int vec[kVecSize] = {}; - return re.DoMatchImpl(text, ANCHOR_BOTH, &consumed, args, n, vec, kVecSize); -} - -bool PCRE::PartialMatchFunctor::operator ()(const StringPiece& text, - const PCRE& re, - const Arg& a0, - const Arg& a1, - const Arg& a2, - const Arg& a3, - const Arg& a4, - const Arg& a5, - const Arg& a6, - const Arg& a7, - const Arg& a8, - const Arg& a9, - const Arg& a10, - const Arg& a11, - const Arg& a12, - const Arg& a13, - const Arg& a14, - const Arg& a15) const { - const Arg* args[kMaxArgs]; - int n = 0; - if (&a0 == &no_more_args) goto done; args[n++] = &a0; - if (&a1 == &no_more_args) goto done; args[n++] = &a1; - if (&a2 == &no_more_args) goto done; args[n++] = &a2; - if (&a3 == &no_more_args) goto done; args[n++] = &a3; - if (&a4 == &no_more_args) goto done; args[n++] = &a4; - if (&a5 == &no_more_args) goto done; args[n++] = &a5; - if (&a6 == &no_more_args) goto done; args[n++] = &a6; - if (&a7 == &no_more_args) goto done; args[n++] = &a7; - if (&a8 == &no_more_args) goto done; args[n++] = &a8; - if (&a9 == &no_more_args) goto done; args[n++] = &a9; - if (&a10 == &no_more_args) goto done; args[n++] = &a10; - if (&a11 == &no_more_args) goto done; args[n++] = &a11; - if (&a12 == &no_more_args) goto done; args[n++] = &a12; - if (&a13 == &no_more_args) goto done; args[n++] = &a13; - if (&a14 == &no_more_args) goto done; args[n++] = &a14; - if (&a15 == &no_more_args) goto done; args[n++] = &a15; -done: - - size_t consumed; - int vec[kVecSize] = {}; - return re.DoMatchImpl(text, UNANCHORED, &consumed, args, n, vec, kVecSize); -} - -bool PCRE::ConsumeFunctor::operator ()(StringPiece* input, - const PCRE& pattern, - const Arg& a0, - const Arg& a1, - const Arg& a2, - const Arg& a3, - const Arg& a4, - const Arg& a5, - const Arg& a6, - const Arg& a7, - const Arg& a8, - const Arg& a9, - const Arg& a10, - const Arg& a11, - const Arg& a12, - const Arg& a13, - const Arg& a14, - const Arg& a15) const { - const Arg* args[kMaxArgs]; - int n = 0; - if (&a0 == &no_more_args) goto done; args[n++] = &a0; - if (&a1 == &no_more_args) goto done; args[n++] = &a1; - if (&a2 == &no_more_args) goto done; args[n++] = &a2; - if (&a3 == &no_more_args) goto done; args[n++] = &a3; - if (&a4 == &no_more_args) goto done; args[n++] = &a4; - if (&a5 == &no_more_args) goto done; args[n++] = &a5; - if (&a6 == &no_more_args) goto done; args[n++] = &a6; - if (&a7 == &no_more_args) goto done; args[n++] = &a7; - if (&a8 == &no_more_args) goto done; args[n++] = &a8; - if (&a9 == &no_more_args) goto done; args[n++] = &a9; - if (&a10 == &no_more_args) goto done; args[n++] = &a10; - if (&a11 == &no_more_args) goto done; args[n++] = &a11; - if (&a12 == &no_more_args) goto done; args[n++] = &a12; - if (&a13 == &no_more_args) goto done; args[n++] = &a13; - if (&a14 == &no_more_args) goto done; args[n++] = &a14; - if (&a15 == &no_more_args) goto done; args[n++] = &a15; -done: - - size_t consumed; - int vec[kVecSize] = {}; - if (pattern.DoMatchImpl(*input, ANCHOR_START, &consumed, - args, n, vec, kVecSize)) { - input->remove_prefix(consumed); - return true; - } else { - return false; - } -} - -bool PCRE::FindAndConsumeFunctor::operator ()(StringPiece* input, - const PCRE& pattern, - const Arg& a0, - const Arg& a1, - const Arg& a2, - const Arg& a3, - const Arg& a4, - const Arg& a5, - const Arg& a6, - const Arg& a7, - const Arg& a8, - const Arg& a9, - const Arg& a10, - const Arg& a11, - const Arg& a12, - const Arg& a13, - const Arg& a14, - const Arg& a15) const { - const Arg* args[kMaxArgs]; - int n = 0; - if (&a0 == &no_more_args) goto done; args[n++] = &a0; - if (&a1 == &no_more_args) goto done; args[n++] = &a1; - if (&a2 == &no_more_args) goto done; args[n++] = &a2; - if (&a3 == &no_more_args) goto done; args[n++] = &a3; - if (&a4 == &no_more_args) goto done; args[n++] = &a4; - if (&a5 == &no_more_args) goto done; args[n++] = &a5; - if (&a6 == &no_more_args) goto done; args[n++] = &a6; - if (&a7 == &no_more_args) goto done; args[n++] = &a7; - if (&a8 == &no_more_args) goto done; args[n++] = &a8; - if (&a9 == &no_more_args) goto done; args[n++] = &a9; - if (&a10 == &no_more_args) goto done; args[n++] = &a10; - if (&a11 == &no_more_args) goto done; args[n++] = &a11; - if (&a12 == &no_more_args) goto done; args[n++] = &a12; - if (&a13 == &no_more_args) goto done; args[n++] = &a13; - if (&a14 == &no_more_args) goto done; args[n++] = &a14; - if (&a15 == &no_more_args) goto done; args[n++] = &a15; -done: - - size_t consumed; - int vec[kVecSize] = {}; - if (pattern.DoMatchImpl(*input, UNANCHORED, &consumed, - args, n, vec, kVecSize)) { - input->remove_prefix(consumed); - return true; - } else { - return false; - } -} - -bool PCRE::Replace(std::string *str, - const PCRE& pattern, - const StringPiece& rewrite) { - int vec[kVecSize] = {}; - int matches = pattern.TryMatch(*str, 0, UNANCHORED, true, vec, kVecSize); - if (matches == 0) - return false; - - std::string s; - if (!pattern.Rewrite(&s, rewrite, *str, vec, matches)) - return false; - - assert(vec[0] >= 0); - assert(vec[1] >= 0); - str->replace(vec[0], vec[1] - vec[0], s); - return true; -} - -int PCRE::GlobalReplace(std::string *str, - const PCRE& pattern, - const StringPiece& rewrite) { - int count = 0; - int vec[kVecSize] = {}; - std::string out; - size_t start = 0; - bool last_match_was_empty_string = false; - - while (start <= str->size()) { - // If the previous match was for the empty string, we shouldn't - // just match again: we'll match in the same way and get an - // infinite loop. Instead, we do the match in a special way: - // anchored -- to force another try at the same position -- - // and with a flag saying that this time, ignore empty matches. - // If this special match returns, that means there's a non-empty - // match at this position as well, and we can continue. If not, - // we do what perl does, and just advance by one. - // Notice that perl prints '@@@' for this; - // perl -le '$_ = "aa"; s/b*|aa/@/g; print' - int matches; - if (last_match_was_empty_string) { - matches = pattern.TryMatch(*str, start, ANCHOR_START, false, - vec, kVecSize); - if (matches <= 0) { - if (start < str->size()) - out.push_back((*str)[start]); - start++; - last_match_was_empty_string = false; - continue; - } - } else { - matches = pattern.TryMatch(*str, start, UNANCHORED, true, - vec, kVecSize); - if (matches <= 0) - break; - } - size_t matchstart = vec[0], matchend = vec[1]; - assert(matchstart >= start); - assert(matchend >= matchstart); - - out.append(*str, start, matchstart - start); - pattern.Rewrite(&out, rewrite, *str, vec, matches); - start = matchend; - count++; - last_match_was_empty_string = (matchstart == matchend); - } - - if (count == 0) - return 0; - - if (start < str->size()) - out.append(*str, start, str->size() - start); - using std::swap; - swap(out, *str); - return count; -} - -bool PCRE::Extract(const StringPiece &text, - const PCRE& pattern, - const StringPiece &rewrite, - std::string *out) { - int vec[kVecSize] = {}; - int matches = pattern.TryMatch(text, 0, UNANCHORED, true, vec, kVecSize); - if (matches == 0) - return false; - out->clear(); - return pattern.Rewrite(out, rewrite, text, vec, matches); -} - -std::string PCRE::QuoteMeta(const StringPiece& unquoted) { - std::string result; - result.reserve(unquoted.size() << 1); - - // Escape any ascii character not in [A-Za-z_0-9]. - // - // Note that it's legal to escape a character even if it has no - // special meaning in a regular expression -- so this function does - // that. (This also makes it identical to the perl function of the - // same name except for the null-character special case; - // see `perldoc -f quotemeta`.) - for (size_t ii = 0; ii < unquoted.size(); ++ii) { - // Note that using 'isalnum' here raises the benchmark time from - // 32ns to 58ns: - if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') && - (unquoted[ii] < 'A' || unquoted[ii] > 'Z') && - (unquoted[ii] < '0' || unquoted[ii] > '9') && - unquoted[ii] != '_' && - // If this is the part of a UTF8 or Latin1 character, we need - // to copy this byte without escaping. Experimentally this is - // what works correctly with the regexp library. - !(unquoted[ii] & 128)) { - if (unquoted[ii] == '\0') { // Special handling for null chars. - // Can't use "\\0" since the next character might be a digit. - result += "\\x00"; - continue; - } - result += '\\'; - } - result += unquoted[ii]; - } - - return result; -} - -/***** Actual matching and rewriting code *****/ - -bool PCRE::HitLimit() { - return hit_limit_ != 0; -} - -void PCRE::ClearHitLimit() { - hit_limit_ = 0; -} - -int PCRE::TryMatch(const StringPiece& text, - size_t startpos, - Anchor anchor, - bool empty_ok, - int *vec, - int vecsize) const { - pcre* re = (anchor == ANCHOR_BOTH) ? re_full_ : re_partial_; - if (re == NULL) { - PCREPORT(ERROR) << "Matching against invalid re: " << *error_; - return 0; - } - - int match_limit = match_limit_; - if (match_limit <= 0) { - match_limit = GetFlag(FLAGS_regexp_match_limit); - } - - int stack_limit = stack_limit_; - if (stack_limit <= 0) { - stack_limit = GetFlag(FLAGS_regexp_stack_limit); - } - - pcre_extra extra = { 0 }; - if (match_limit > 0) { - extra.flags |= PCRE_EXTRA_MATCH_LIMIT; - extra.match_limit = match_limit; - } - if (stack_limit > 0) { - extra.flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; - extra.match_limit_recursion = stack_limit / kPCREFrameSize; - } - - int options = 0; - if (anchor != UNANCHORED) - options |= PCRE_ANCHORED; - if (!empty_ok) - options |= PCRE_NOTEMPTY; - - int rc = pcre_exec(re, // The regular expression object - &extra, - (text.data() == NULL) ? "" : text.data(), - static_cast(text.size()), - static_cast(startpos), - options, - vec, - vecsize); - - // Handle errors - if (rc == 0) { - // pcre_exec() returns 0 as a special case when the number of - // capturing subpatterns exceeds the size of the vector. - // When this happens, there is a match and the output vector - // is filled, but we miss out on the positions of the extra subpatterns. - rc = vecsize / 2; - } else if (rc < 0) { - switch (rc) { - case PCRE_ERROR_NOMATCH: - return 0; - case PCRE_ERROR_MATCHLIMIT: - // Writing to hit_limit is not safe if multiple threads - // are using the PCRE, but the flag is only intended - // for use by unit tests anyway, so we let it go. - hit_limit_ = true; - PCREPORT(WARNING) << "Exceeded match limit of " << match_limit - << " when matching '" << pattern_ << "'" - << " against text that is " << text.size() << " bytes."; - return 0; - case PCRE_ERROR_RECURSIONLIMIT: - // See comment about hit_limit above. - hit_limit_ = true; - PCREPORT(WARNING) << "Exceeded stack limit of " << stack_limit - << " when matching '" << pattern_ << "'" - << " against text that is " << text.size() << " bytes."; - return 0; - default: - // There are other return codes from pcre.h : - // PCRE_ERROR_NULL (-2) - // PCRE_ERROR_BADOPTION (-3) - // PCRE_ERROR_BADMAGIC (-4) - // PCRE_ERROR_UNKNOWN_NODE (-5) - // PCRE_ERROR_NOMEMORY (-6) - // PCRE_ERROR_NOSUBSTRING (-7) - // ... - PCREPORT(ERROR) << "Unexpected return code: " << rc - << " when matching '" << pattern_ << "'" - << ", re=" << re - << ", text=" << text - << ", vec=" << vec - << ", vecsize=" << vecsize; - return 0; - } - } - - return rc; -} - -bool PCRE::DoMatchImpl(const StringPiece& text, - Anchor anchor, - size_t* consumed, - const Arg* const* args, - int n, - int* vec, - int vecsize) const { - assert((1 + n) * 3 <= vecsize); // results + PCRE workspace - if (NumberOfCapturingGroups() < n) { - // RE has fewer capturing groups than number of Arg pointers passed in. - return false; - } - - int matches = TryMatch(text, 0, anchor, true, vec, vecsize); - assert(matches >= 0); // TryMatch never returns negatives - if (matches == 0) - return false; - - *consumed = vec[1]; - - if (n == 0 || args == NULL) { - // We are not interested in results - return true; - } - - // If we got here, we must have matched the whole pattern. - // We do not need (can not do) any more checks on the value of 'matches' here - // -- see the comment for TryMatch. - for (int i = 0; i < n; i++) { - const int start = vec[2*(i+1)]; - const int limit = vec[2*(i+1)+1]; - - // Avoid invoking undefined behavior when text.data() happens - // to be null and start happens to be -1, the latter being the - // case for an unmatched subexpression. Even if text.data() is - // not null, pointing one byte before was a longstanding bug. - const char* addr = NULL; - if (start != -1) { - addr = text.data() + start; - } - - if (!args[i]->Parse(addr, limit-start)) { - // TODO: Should we indicate what the error was? - return false; - } - } - - return true; -} - -bool PCRE::DoMatch(const StringPiece& text, - Anchor anchor, - size_t* consumed, - const Arg* const args[], - int n) const { - assert(n >= 0); - const int vecsize = (1 + n) * 3; // results + PCRE workspace - // (as for kVecSize) - int* vec = new int[vecsize]; - bool b = DoMatchImpl(text, anchor, consumed, args, n, vec, vecsize); - delete[] vec; - return b; -} - -bool PCRE::Rewrite(std::string *out, const StringPiece &rewrite, - const StringPiece &text, int *vec, int veclen) const { - int number_of_capturing_groups = NumberOfCapturingGroups(); - for (const char *s = rewrite.data(), *end = s + rewrite.size(); - s < end; s++) { - int c = *s; - if (c == '\\') { - c = *++s; - if (isdigit(c)) { - int n = (c - '0'); - if (n >= veclen) { - if (n <= number_of_capturing_groups) { - // unmatched optional capturing group. treat - // its value as empty string; i.e., nothing to append. - } else { - PCREPORT(ERROR) << "requested group " << n - << " in regexp " << rewrite.data(); - return false; - } - } - int start = vec[2 * n]; - if (start >= 0) - out->append(text.data() + start, vec[2 * n + 1] - start); - } else if (c == '\\') { - out->push_back('\\'); - } else { - PCREPORT(ERROR) << "invalid rewrite pattern: " << rewrite.data(); - return false; - } - } else { - out->push_back(c); - } - } - return true; -} - -bool PCRE::CheckRewriteString(const StringPiece& rewrite, - std::string* error) const { - int max_token = -1; - for (const char *s = rewrite.data(), *end = s + rewrite.size(); - s < end; s++) { - int c = *s; - if (c != '\\') { - continue; - } - if (++s == end) { - *error = "Rewrite schema error: '\\' not allowed at end."; - return false; - } - c = *s; - if (c == '\\') { - continue; - } - if (!isdigit(c)) { - *error = "Rewrite schema error: " - "'\\' must be followed by a digit or '\\'."; - return false; - } - int n = (c - '0'); - if (max_token < n) { - max_token = n; - } - } - - if (max_token > NumberOfCapturingGroups()) { - *error = StringPrintf( - "Rewrite schema requests %d matches, but the regexp only has %d " - "parenthesized subexpressions.", - max_token, NumberOfCapturingGroups()); - return false; - } - return true; -} - - -// Return the number of capturing subpatterns, or -1 if the -// regexp wasn't valid on construction. -int PCRE::NumberOfCapturingGroups() const { - if (re_partial_ == NULL) return -1; - - int result; - int rc = pcre_fullinfo(re_partial_, // The regular expression object - NULL, // We did not study the pattern - PCRE_INFO_CAPTURECOUNT, - &result); - if (rc != 0) { - PCREPORT(ERROR) << "Unexpected return code: " << rc; - return -1; - } - return result; -} - - -/***** Parsers for various types *****/ - -bool PCRE::Arg::parse_null(const char* str, size_t n, void* dest) { - // We fail if somebody asked us to store into a non-NULL void* pointer - return (dest == NULL); -} - -bool PCRE::Arg::parse_string(const char* str, size_t n, void* dest) { - if (dest == NULL) return true; - reinterpret_cast(dest)->assign(str, n); - return true; -} - -bool PCRE::Arg::parse_stringpiece(const char* str, size_t n, void* dest) { - if (dest == NULL) return true; - *(reinterpret_cast(dest)) = StringPiece(str, n); - return true; -} - -bool PCRE::Arg::parse_char(const char* str, size_t n, void* dest) { - if (n != 1) return false; - if (dest == NULL) return true; - *(reinterpret_cast(dest)) = str[0]; - return true; -} - -bool PCRE::Arg::parse_schar(const char* str, size_t n, void* dest) { - if (n != 1) return false; - if (dest == NULL) return true; - *(reinterpret_cast(dest)) = str[0]; - return true; -} - -bool PCRE::Arg::parse_uchar(const char* str, size_t n, void* dest) { - if (n != 1) return false; - if (dest == NULL) return true; - *(reinterpret_cast(dest)) = str[0]; - return true; -} - -// Largest number spec that we are willing to parse -static const int kMaxNumberLength = 32; - -// PCREQUIPCRES "buf" must have length at least kMaxNumberLength+1 -// PCREQUIPCRES "n > 0" -// Copies "str" into "buf" and null-terminates if necessary. -// Returns one of: -// a. "str" if no termination is needed -// b. "buf" if the string was copied and null-terminated -// c. "" if the input was invalid and has no hope of being parsed -static const char* TerminateNumber(char* buf, const char* str, size_t n) { - if ((n > 0) && isspace(*str)) { - // We are less forgiving than the strtoxxx() routines and do not - // allow leading spaces. - return ""; - } - - // See if the character right after the input text may potentially - // look like a digit. - if (isdigit(str[n]) || - ((str[n] >= 'a') && (str[n] <= 'f')) || - ((str[n] >= 'A') && (str[n] <= 'F'))) { - if (n > kMaxNumberLength) return ""; // Input too big to be a valid number - memcpy(buf, str, n); - buf[n] = '\0'; - return buf; - } else { - // We can parse right out of the supplied string, so return it. - return str; - } -} - -bool PCRE::Arg::parse_long_radix(const char* str, - size_t n, - void* dest, - int radix) { - if (n == 0) return false; - char buf[kMaxNumberLength+1]; - str = TerminateNumber(buf, str, n); - char* end; - errno = 0; - long r = strtol(str, &end, radix); - if (end != str + n) return false; // Leftover junk - if (errno) return false; - if (dest == NULL) return true; - *(reinterpret_cast(dest)) = r; - return true; -} - -bool PCRE::Arg::parse_ulong_radix(const char* str, - size_t n, - void* dest, - int radix) { - if (n == 0) return false; - char buf[kMaxNumberLength+1]; - str = TerminateNumber(buf, str, n); - if (str[0] == '-') { - // strtoul() will silently accept negative numbers and parse - // them. This module is more strict and treats them as errors. - return false; - } - - char* end; - errno = 0; - unsigned long r = strtoul(str, &end, radix); - if (end != str + n) return false; // Leftover junk - if (errno) return false; - if (dest == NULL) return true; - *(reinterpret_cast(dest)) = r; - return true; -} - -bool PCRE::Arg::parse_short_radix(const char* str, - size_t n, - void* dest, - int radix) { - long r; - if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse - if ((short)r != r) return false; // Out of range - if (dest == NULL) return true; - *(reinterpret_cast(dest)) = (short)r; - return true; -} - -bool PCRE::Arg::parse_ushort_radix(const char* str, - size_t n, - void* dest, - int radix) { - unsigned long r; - if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse - if ((unsigned short)r != r) return false; // Out of range - if (dest == NULL) return true; - *(reinterpret_cast(dest)) = (unsigned short)r; - return true; -} - -bool PCRE::Arg::parse_int_radix(const char* str, - size_t n, - void* dest, - int radix) { - long r; - if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse - if ((int)r != r) return false; // Out of range - if (dest == NULL) return true; - *(reinterpret_cast(dest)) = (int)r; - return true; -} - -bool PCRE::Arg::parse_uint_radix(const char* str, - size_t n, - void* dest, - int radix) { - unsigned long r; - if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse - if ((unsigned int)r != r) return false; // Out of range - if (dest == NULL) return true; - *(reinterpret_cast(dest)) = (unsigned int)r; - return true; -} - -bool PCRE::Arg::parse_longlong_radix(const char* str, - size_t n, - void* dest, - int radix) { - if (n == 0) return false; - char buf[kMaxNumberLength+1]; - str = TerminateNumber(buf, str, n); - char* end; - errno = 0; - long long r = strtoll(str, &end, radix); - if (end != str + n) return false; // Leftover junk - if (errno) return false; - if (dest == NULL) return true; - *(reinterpret_cast(dest)) = r; - return true; -} - -bool PCRE::Arg::parse_ulonglong_radix(const char* str, - size_t n, - void* dest, - int radix) { - if (n == 0) return false; - char buf[kMaxNumberLength+1]; - str = TerminateNumber(buf, str, n); - if (str[0] == '-') { - // strtoull() will silently accept negative numbers and parse - // them. This module is more strict and treats them as errors. - return false; - } - char* end; - errno = 0; - unsigned long long r = strtoull(str, &end, radix); - if (end != str + n) return false; // Leftover junk - if (errno) return false; - if (dest == NULL) return true; - *(reinterpret_cast(dest)) = r; - return true; -} - -static bool parse_double_float(const char* str, size_t n, bool isfloat, - void* dest) { - if (n == 0) return false; - static const int kMaxLength = 200; - char buf[kMaxLength]; - if (n >= kMaxLength) return false; - memcpy(buf, str, n); - buf[n] = '\0'; - char* end; - errno = 0; - double r; - if (isfloat) { - r = strtof(buf, &end); - } else { - r = strtod(buf, &end); - } - if (end != buf + n) return false; // Leftover junk - if (errno) return false; - if (dest == NULL) return true; - if (isfloat) { - *(reinterpret_cast(dest)) = (float)r; - } else { - *(reinterpret_cast(dest)) = r; - } - return true; -} - -bool PCRE::Arg::parse_double(const char* str, size_t n, void* dest) { - return parse_double_float(str, n, false, dest); -} - -bool PCRE::Arg::parse_float(const char* str, size_t n, void* dest) { - return parse_double_float(str, n, true, dest); -} - -#define DEFINE_INTEGER_PARSER(name) \ - bool PCRE::Arg::parse_##name(const char* str, size_t n, void* dest) { \ - return parse_##name##_radix(str, n, dest, 10); \ - } \ - bool PCRE::Arg::parse_##name##_hex(const char* str, size_t n, void* dest) { \ - return parse_##name##_radix(str, n, dest, 16); \ - } \ - bool PCRE::Arg::parse_##name##_octal(const char* str, size_t n, \ - void* dest) { \ - return parse_##name##_radix(str, n, dest, 8); \ - } \ - bool PCRE::Arg::parse_##name##_cradix(const char* str, size_t n, \ - void* dest) { \ - return parse_##name##_radix(str, n, dest, 0); \ - } - -DEFINE_INTEGER_PARSER(short); -DEFINE_INTEGER_PARSER(ushort); -DEFINE_INTEGER_PARSER(int); -DEFINE_INTEGER_PARSER(uint); -DEFINE_INTEGER_PARSER(long); -DEFINE_INTEGER_PARSER(ulong); -DEFINE_INTEGER_PARSER(longlong); -DEFINE_INTEGER_PARSER(ulonglong); - -#undef DEFINE_INTEGER_PARSER - -} // namespace re2 diff --git a/util/pcre.h b/util/pcre.h deleted file mode 100644 index 7cf2de49d847db0094e5663cd876145a4d624ac4..0000000000000000000000000000000000000000 --- a/util/pcre.h +++ /dev/null @@ -1,678 +0,0 @@ -// Copyright 2003-2010 Google Inc. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#pragma once - -// This is a variant of PCRE's pcrecpp.h, originally written at Google. -// The main changes are the addition of the HitLimit method and -// compilation as PCRE in namespace re2. - -// C++ interface to the pcre regular-expression library. PCRE supports -// Perl-style regular expressions (with extensions like \d, \w, \s, -// ...). -// -// ----------------------------------------------------------------------- -// REGEXP SYNTAX: -// -// This module uses the pcre library and hence supports its syntax -// for regular expressions: -// -// http://www.google.com/search?q=pcre -// -// The syntax is pretty similar to Perl's. For those not familiar -// with Perl's regular expressions, here are some examples of the most -// commonly used extensions: -// -// "hello (\\w+) world" -- \w matches a "word" character -// "version (\\d+)" -- \d matches a digit -// "hello\\s+world" -- \s matches any whitespace character -// "\\b(\\w+)\\b" -- \b matches empty string at a word boundary -// "(?i)hello" -- (?i) turns on case-insensitive matching -// "/\\*(.*?)\\*/" -- .*? matches . minimum no. of times possible -// -// ----------------------------------------------------------------------- -// MATCHING INTERFACE: -// -// The "FullMatch" operation checks that supplied text matches a -// supplied pattern exactly. -// -// Example: successful match -// CHECK(PCRE::FullMatch("hello", "h.*o")); -// -// Example: unsuccessful match (requires full match): -// CHECK(!PCRE::FullMatch("hello", "e")); -// -// ----------------------------------------------------------------------- -// UTF-8 AND THE MATCHING INTERFACE: -// -// By default, pattern and text are plain text, one byte per character. -// The UTF8 flag, passed to the constructor, causes both pattern -// and string to be treated as UTF-8 text, still a byte stream but -// potentially multiple bytes per character. In practice, the text -// is likelier to be UTF-8 than the pattern, but the match returned -// may depend on the UTF8 flag, so always use it when matching -// UTF8 text. E.g., "." will match one byte normally but with UTF8 -// set may match up to three bytes of a multi-byte character. -// -// Example: -// PCRE re(utf8_pattern, PCRE::UTF8); -// CHECK(PCRE::FullMatch(utf8_string, re)); -// -// ----------------------------------------------------------------------- -// MATCHING WITH SUBSTRING EXTRACTION: -// -// You can supply extra pointer arguments to extract matched substrings. -// -// Example: extracts "ruby" into "s" and 1234 into "i" -// int i; -// std::string s; -// CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i)); -// -// Example: fails because string cannot be stored in integer -// CHECK(!PCRE::FullMatch("ruby", "(.*)", &i)); -// -// Example: fails because there aren't enough sub-patterns: -// CHECK(!PCRE::FullMatch("ruby:1234", "\\w+:\\d+", &s)); -// -// Example: does not try to extract any extra sub-patterns -// CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s)); -// -// Example: does not try to extract into NULL -// CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", NULL, &i)); -// -// Example: integer overflow causes failure -// CHECK(!PCRE::FullMatch("ruby:1234567891234", "\\w+:(\\d+)", &i)); -// -// ----------------------------------------------------------------------- -// PARTIAL MATCHES -// -// You can use the "PartialMatch" operation when you want the pattern -// to match any substring of the text. -// -// Example: simple search for a string: -// CHECK(PCRE::PartialMatch("hello", "ell")); -// -// Example: find first number in a string -// int number; -// CHECK(PCRE::PartialMatch("x*100 + 20", "(\\d+)", &number)); -// CHECK_EQ(number, 100); -// -// ----------------------------------------------------------------------- -// PPCRE-COMPILED PCREGULAR EXPPCRESSIONS -// -// PCRE makes it easy to use any string as a regular expression, without -// requiring a separate compilation step. -// -// If speed is of the essence, you can create a pre-compiled "PCRE" -// object from the pattern and use it multiple times. If you do so, -// you can typically parse text faster than with sscanf. -// -// Example: precompile pattern for faster matching: -// PCRE pattern("h.*o"); -// while (ReadLine(&str)) { -// if (PCRE::FullMatch(str, pattern)) ...; -// } -// -// ----------------------------------------------------------------------- -// SCANNING TEXT INCPCREMENTALLY -// -// The "Consume" operation may be useful if you want to repeatedly -// match regular expressions at the front of a string and skip over -// them as they match. This requires use of the "StringPiece" type, -// which represents a sub-range of a real string. -// -// Example: read lines of the form "var = value" from a string. -// std::string contents = ...; // Fill string somehow -// StringPiece input(contents); // Wrap a StringPiece around it -// -// std::string var; -// int value; -// while (PCRE::Consume(&input, "(\\w+) = (\\d+)\n", &var, &value)) { -// ...; -// } -// -// Each successful call to "Consume" will set "var/value", and also -// advance "input" so it points past the matched text. Note that if the -// regular expression matches an empty string, input will advance -// by 0 bytes. If the regular expression being used might match -// an empty string, the loop body must check for this case and either -// advance the string or break out of the loop. -// -// The "FindAndConsume" operation is similar to "Consume" but does not -// anchor your match at the beginning of the string. For example, you -// could extract all words from a string by repeatedly calling -// PCRE::FindAndConsume(&input, "(\\w+)", &word) -// -// ----------------------------------------------------------------------- -// PARSING HEX/OCTAL/C-RADIX NUMBERS -// -// By default, if you pass a pointer to a numeric value, the -// corresponding text is interpreted as a base-10 number. You can -// instead wrap the pointer with a call to one of the operators Hex(), -// Octal(), or CRadix() to interpret the text in another base. The -// CRadix operator interprets C-style "0" (base-8) and "0x" (base-16) -// prefixes, but defaults to base-10. -// -// Example: -// int a, b, c, d; -// CHECK(PCRE::FullMatch("100 40 0100 0x40", "(.*) (.*) (.*) (.*)", -// Octal(&a), Hex(&b), CRadix(&c), CRadix(&d)); -// will leave 64 in a, b, c, and d. - -#include "util/util.h" -#include "re2/stringpiece.h" - -#ifdef USEPCRE -#include -namespace re2 { -const bool UsingPCRE = true; -} // namespace re2 -#else -struct pcre; // opaque -namespace re2 { -const bool UsingPCRE = false; -} // namespace re2 -#endif - -namespace re2 { - -class PCRE_Options; - -// Interface for regular expression matching. Also corresponds to a -// pre-compiled regular expression. An "PCRE" object is safe for -// concurrent use by multiple threads. -class PCRE { - public: - // We convert user-passed pointers into special Arg objects - class Arg; - - // Marks end of arg list. - // ONLY USE IN OPTIONAL ARG DEFAULTS. - // DO NOT PASS EXPLICITLY. - static Arg no_more_args; - - // Options are same value as those in pcre. We provide them here - // to avoid users needing to include pcre.h and also to isolate - // users from pcre should we change the underlying library. - // Only those needed by Google programs are exposed here to - // avoid collision with options employed internally by regexp.cc - // Note that some options have equivalents that can be specified in - // the regexp itself. For example, prefixing your regexp with - // "(?s)" has the same effect as the PCRE_DOTALL option. - enum Option { - None = 0x0000, - UTF8 = 0x0800, // == PCRE_UTF8 - EnabledCompileOptions = UTF8, - EnabledExecOptions = 0x0000, // TODO: use to replace anchor flag - }; - - // We provide implicit conversions from strings so that users can - // pass in a string or a "const char*" wherever an "PCRE" is expected. - PCRE(const char* pattern); - PCRE(const char* pattern, Option option); - PCRE(const std::string& pattern); - PCRE(const std::string& pattern, Option option); - PCRE(const char *pattern, const PCRE_Options& re_option); - PCRE(const std::string& pattern, const PCRE_Options& re_option); - - ~PCRE(); - - // The string specification for this PCRE. E.g. - // PCRE re("ab*c?d+"); - // re.pattern(); // "ab*c?d+" - const std::string& pattern() const { return pattern_; } - - // If PCRE could not be created properly, returns an error string. - // Else returns the empty string. - const std::string& error() const { return *error_; } - - // Whether the PCRE has hit a match limit during execution. - // Not thread safe. Intended only for testing. - // If hitting match limits is a problem, - // you should be using PCRE2 (re2/re2.h) - // instead of checking this flag. - bool HitLimit(); - void ClearHitLimit(); - - /***** The useful part: the matching interface *****/ - - // Matches "text" against "pattern". If pointer arguments are - // supplied, copies matched sub-patterns into them. - // - // You can pass in a "const char*" or a "std::string" for "text". - // You can pass in a "const char*" or a "std::string" or a "PCRE" for "pattern". - // - // The provided pointer arguments can be pointers to any scalar numeric - // type, or one of: - // std::string (matched piece is copied to string) - // StringPiece (StringPiece is mutated to point to matched piece) - // T (where "bool T::ParseFrom(const char*, size_t)" exists) - // (void*)NULL (the corresponding matched sub-pattern is not copied) - // - // Returns true iff all of the following conditions are satisfied: - // a. "text" matches "pattern" exactly - // b. The number of matched sub-patterns is >= number of supplied pointers - // c. The "i"th argument has a suitable type for holding the - // string captured as the "i"th sub-pattern. If you pass in - // NULL for the "i"th argument, or pass fewer arguments than - // number of sub-patterns, "i"th captured sub-pattern is - // ignored. - // - // CAVEAT: An optional sub-pattern that does not exist in the - // matched string is assigned the empty string. Therefore, the - // following will return false (because the empty string is not a - // valid number): - // int number; - // PCRE::FullMatch("abc", "[a-z]+(\\d+)?", &number); - struct FullMatchFunctor { - bool operator ()(const StringPiece& text, const PCRE& re, // 3..16 args - const Arg& ptr1 = no_more_args, - const Arg& ptr2 = no_more_args, - const Arg& ptr3 = no_more_args, - const Arg& ptr4 = no_more_args, - const Arg& ptr5 = no_more_args, - const Arg& ptr6 = no_more_args, - const Arg& ptr7 = no_more_args, - const Arg& ptr8 = no_more_args, - const Arg& ptr9 = no_more_args, - const Arg& ptr10 = no_more_args, - const Arg& ptr11 = no_more_args, - const Arg& ptr12 = no_more_args, - const Arg& ptr13 = no_more_args, - const Arg& ptr14 = no_more_args, - const Arg& ptr15 = no_more_args, - const Arg& ptr16 = no_more_args) const; - }; - - static const FullMatchFunctor FullMatch; - - // Exactly like FullMatch(), except that "pattern" is allowed to match - // a substring of "text". - struct PartialMatchFunctor { - bool operator ()(const StringPiece& text, const PCRE& re, // 3..16 args - const Arg& ptr1 = no_more_args, - const Arg& ptr2 = no_more_args, - const Arg& ptr3 = no_more_args, - const Arg& ptr4 = no_more_args, - const Arg& ptr5 = no_more_args, - const Arg& ptr6 = no_more_args, - const Arg& ptr7 = no_more_args, - const Arg& ptr8 = no_more_args, - const Arg& ptr9 = no_more_args, - const Arg& ptr10 = no_more_args, - const Arg& ptr11 = no_more_args, - const Arg& ptr12 = no_more_args, - const Arg& ptr13 = no_more_args, - const Arg& ptr14 = no_more_args, - const Arg& ptr15 = no_more_args, - const Arg& ptr16 = no_more_args) const; - }; - - static const PartialMatchFunctor PartialMatch; - - // Like FullMatch() and PartialMatch(), except that pattern has to - // match a prefix of "text", and "input" is advanced past the matched - // text. Note: "input" is modified iff this routine returns true. - struct ConsumeFunctor { - bool operator ()(StringPiece* input, const PCRE& pattern, // 3..16 args - const Arg& ptr1 = no_more_args, - const Arg& ptr2 = no_more_args, - const Arg& ptr3 = no_more_args, - const Arg& ptr4 = no_more_args, - const Arg& ptr5 = no_more_args, - const Arg& ptr6 = no_more_args, - const Arg& ptr7 = no_more_args, - const Arg& ptr8 = no_more_args, - const Arg& ptr9 = no_more_args, - const Arg& ptr10 = no_more_args, - const Arg& ptr11 = no_more_args, - const Arg& ptr12 = no_more_args, - const Arg& ptr13 = no_more_args, - const Arg& ptr14 = no_more_args, - const Arg& ptr15 = no_more_args, - const Arg& ptr16 = no_more_args) const; - }; - - static const ConsumeFunctor Consume; - - // Like Consume(..), but does not anchor the match at the beginning of the - // string. That is, "pattern" need not start its match at the beginning of - // "input". For example, "FindAndConsume(s, "(\\w+)", &word)" finds the next - // word in "s" and stores it in "word". - struct FindAndConsumeFunctor { - bool operator ()(StringPiece* input, const PCRE& pattern, - const Arg& ptr1 = no_more_args, - const Arg& ptr2 = no_more_args, - const Arg& ptr3 = no_more_args, - const Arg& ptr4 = no_more_args, - const Arg& ptr5 = no_more_args, - const Arg& ptr6 = no_more_args, - const Arg& ptr7 = no_more_args, - const Arg& ptr8 = no_more_args, - const Arg& ptr9 = no_more_args, - const Arg& ptr10 = no_more_args, - const Arg& ptr11 = no_more_args, - const Arg& ptr12 = no_more_args, - const Arg& ptr13 = no_more_args, - const Arg& ptr14 = no_more_args, - const Arg& ptr15 = no_more_args, - const Arg& ptr16 = no_more_args) const; - }; - - static const FindAndConsumeFunctor FindAndConsume; - - // Replace the first match of "pattern" in "str" with "rewrite". - // Within "rewrite", backslash-escaped digits (\1 to \9) can be - // used to insert text matching corresponding parenthesized group - // from the pattern. \0 in "rewrite" refers to the entire matching - // text. E.g., - // - // std::string s = "yabba dabba doo"; - // CHECK(PCRE::Replace(&s, "b+", "d")); - // - // will leave "s" containing "yada dabba doo" - // - // Returns true if the pattern matches and a replacement occurs, - // false otherwise. - static bool Replace(std::string *str, - const PCRE& pattern, - const StringPiece& rewrite); - - // Like Replace(), except replaces all occurrences of the pattern in - // the string with the rewrite. Replacements are not subject to - // re-matching. E.g., - // - // std::string s = "yabba dabba doo"; - // CHECK(PCRE::GlobalReplace(&s, "b+", "d")); - // - // will leave "s" containing "yada dada doo" - // - // Returns the number of replacements made. - static int GlobalReplace(std::string *str, - const PCRE& pattern, - const StringPiece& rewrite); - - // Like Replace, except that if the pattern matches, "rewrite" - // is copied into "out" with substitutions. The non-matching - // portions of "text" are ignored. - // - // Returns true iff a match occurred and the extraction happened - // successfully; if no match occurs, the string is left unaffected. - static bool Extract(const StringPiece &text, - const PCRE& pattern, - const StringPiece &rewrite, - std::string *out); - - // Check that the given @p rewrite string is suitable for use with - // this PCRE. It checks that: - // * The PCRE has enough parenthesized subexpressions to satisfy all - // of the \N tokens in @p rewrite, and - // * The @p rewrite string doesn't have any syntax errors - // ('\' followed by anything besides [0-9] and '\'). - // Making this test will guarantee that "replace" and "extract" - // operations won't LOG(ERROR) or fail because of a bad rewrite - // string. - // @param rewrite The proposed rewrite string. - // @param error An error message is recorded here, iff we return false. - // Otherwise, it is unchanged. - // @return true, iff @p rewrite is suitable for use with the PCRE. - bool CheckRewriteString(const StringPiece& rewrite, - std::string* error) const; - - // Returns a copy of 'unquoted' with all potentially meaningful - // regexp characters backslash-escaped. The returned string, used - // as a regular expression, will exactly match the original string. - // For example, - // 1.5-2.0? - // becomes: - // 1\.5\-2\.0\? - static std::string QuoteMeta(const StringPiece& unquoted); - - /***** Generic matching interface (not so nice to use) *****/ - - // Type of match (TODO: Should be restructured as an Option) - enum Anchor { - UNANCHORED, // No anchoring - ANCHOR_START, // Anchor at start only - ANCHOR_BOTH, // Anchor at start and end - }; - - // General matching routine. Stores the length of the match in - // "*consumed" if successful. - bool DoMatch(const StringPiece& text, - Anchor anchor, - size_t* consumed, - const Arg* const* args, int n) const; - - // Return the number of capturing subpatterns, or -1 if the - // regexp wasn't valid on construction. - int NumberOfCapturingGroups() const; - - private: - void Init(const char* pattern, Option option, int match_limit, - int stack_limit, bool report_errors); - - // Match against "text", filling in "vec" (up to "vecsize" * 2/3) with - // pairs of integers for the beginning and end positions of matched - // text. The first pair corresponds to the entire matched text; - // subsequent pairs correspond, in order, to parentheses-captured - // matches. Returns the number of pairs (one more than the number of - // the last subpattern with a match) if matching was successful - // and zero if the match failed. - // I.e. for PCRE("(foo)|(bar)|(baz)") it will return 2, 3, and 4 when matching - // against "foo", "bar", and "baz" respectively. - // When matching PCRE("(foo)|hello") against "hello", it will return 1. - // But the values for all subpattern are filled in into "vec". - int TryMatch(const StringPiece& text, - size_t startpos, - Anchor anchor, - bool empty_ok, - int *vec, - int vecsize) const; - - // Append the "rewrite" string, with backslash subsitutions from "text" - // and "vec", to string "out". - bool Rewrite(std::string *out, - const StringPiece &rewrite, - const StringPiece &text, - int *vec, - int veclen) const; - - // internal implementation for DoMatch - bool DoMatchImpl(const StringPiece& text, - Anchor anchor, - size_t* consumed, - const Arg* const args[], - int n, - int* vec, - int vecsize) const; - - // Compile the regexp for the specified anchoring mode - pcre* Compile(Anchor anchor); - - std::string pattern_; - Option options_; - pcre* re_full_; // For full matches - pcre* re_partial_; // For partial matches - const std::string* error_; // Error indicator (or empty string) - bool report_errors_; // Silences error logging if false - int match_limit_; // Limit on execution resources - int stack_limit_; // Limit on stack resources (bytes) - mutable int32_t hit_limit_; // Hit limit during execution (bool) - - PCRE(const PCRE&) = delete; - PCRE& operator=(const PCRE&) = delete; -}; - -// PCRE_Options allow you to set the PCRE::Options, plus any pcre -// "extra" options. The only extras are match_limit, which limits -// the CPU time of a match, and stack_limit, which limits the -// stack usage. Setting a limit to <= 0 lets PCRE pick a sensible default -// that should not cause too many problems in production code. -// If PCRE hits a limit during a match, it may return a false negative, -// but (hopefully) it won't crash. -// -// NOTE: If you are handling regular expressions specified by -// (external or internal) users, rather than hard-coded ones, -// you should be using PCRE2, which uses an alternate implementation -// that avoids these issues. See http://go/re2quick. -class PCRE_Options { - public: - // constructor - PCRE_Options() : option_(PCRE::None), match_limit_(0), stack_limit_(0), report_errors_(true) {} - // accessors - PCRE::Option option() const { return option_; } - void set_option(PCRE::Option option) { - option_ = option; - } - int match_limit() const { return match_limit_; } - void set_match_limit(int match_limit) { - match_limit_ = match_limit; - } - int stack_limit() const { return stack_limit_; } - void set_stack_limit(int stack_limit) { - stack_limit_ = stack_limit; - } - - // If the regular expression is malformed, an error message will be printed - // iff report_errors() is true. Default: true. - bool report_errors() const { return report_errors_; } - void set_report_errors(bool report_errors) { - report_errors_ = report_errors; - } - private: - PCRE::Option option_; - int match_limit_; - int stack_limit_; - bool report_errors_; -}; - - -/***** Implementation details *****/ - -// Hex/Octal/Binary? - -// Special class for parsing into objects that define a ParseFrom() method -template -class _PCRE_MatchObject { - public: - static inline bool Parse(const char* str, size_t n, void* dest) { - if (dest == NULL) return true; - T* object = reinterpret_cast(dest); - return object->ParseFrom(str, n); - } -}; - -class PCRE::Arg { - public: - // Empty constructor so we can declare arrays of PCRE::Arg - Arg(); - - // Constructor specially designed for NULL arguments - Arg(void*); - - typedef bool (*Parser)(const char* str, size_t n, void* dest); - -// Type-specific parsers -#define MAKE_PARSER(type, name) \ - Arg(type* p) : arg_(p), parser_(name) {} \ - Arg(type* p, Parser parser) : arg_(p), parser_(parser) {} - - MAKE_PARSER(char, parse_char); - MAKE_PARSER(signed char, parse_schar); - MAKE_PARSER(unsigned char, parse_uchar); - MAKE_PARSER(float, parse_float); - MAKE_PARSER(double, parse_double); - MAKE_PARSER(std::string, parse_string); - MAKE_PARSER(StringPiece, parse_stringpiece); - - MAKE_PARSER(short, parse_short); - MAKE_PARSER(unsigned short, parse_ushort); - MAKE_PARSER(int, parse_int); - MAKE_PARSER(unsigned int, parse_uint); - MAKE_PARSER(long, parse_long); - MAKE_PARSER(unsigned long, parse_ulong); - MAKE_PARSER(long long, parse_longlong); - MAKE_PARSER(unsigned long long, parse_ulonglong); - -#undef MAKE_PARSER - - // Generic constructor - template Arg(T*, Parser parser); - // Generic constructor template - template Arg(T* p) - : arg_(p), parser_(_PCRE_MatchObject::Parse) { - } - - // Parse the data - bool Parse(const char* str, size_t n) const; - - private: - void* arg_; - Parser parser_; - - static bool parse_null (const char* str, size_t n, void* dest); - static bool parse_char (const char* str, size_t n, void* dest); - static bool parse_schar (const char* str, size_t n, void* dest); - static bool parse_uchar (const char* str, size_t n, void* dest); - static bool parse_float (const char* str, size_t n, void* dest); - static bool parse_double (const char* str, size_t n, void* dest); - static bool parse_string (const char* str, size_t n, void* dest); - static bool parse_stringpiece (const char* str, size_t n, void* dest); - -#define DECLARE_INTEGER_PARSER(name) \ - private: \ - static bool parse_##name(const char* str, size_t n, void* dest); \ - static bool parse_##name##_radix(const char* str, size_t n, void* dest, \ - int radix); \ - \ - public: \ - static bool parse_##name##_hex(const char* str, size_t n, void* dest); \ - static bool parse_##name##_octal(const char* str, size_t n, void* dest); \ - static bool parse_##name##_cradix(const char* str, size_t n, void* dest) - - DECLARE_INTEGER_PARSER(short); - DECLARE_INTEGER_PARSER(ushort); - DECLARE_INTEGER_PARSER(int); - DECLARE_INTEGER_PARSER(uint); - DECLARE_INTEGER_PARSER(long); - DECLARE_INTEGER_PARSER(ulong); - DECLARE_INTEGER_PARSER(longlong); - DECLARE_INTEGER_PARSER(ulonglong); - -#undef DECLARE_INTEGER_PARSER - -}; - -inline PCRE::Arg::Arg() : arg_(NULL), parser_(parse_null) { } -inline PCRE::Arg::Arg(void* p) : arg_(p), parser_(parse_null) { } - -inline bool PCRE::Arg::Parse(const char* str, size_t n) const { - return (*parser_)(str, n, arg_); -} - -// This part of the parser, appropriate only for ints, deals with bases -#define MAKE_INTEGER_PARSER(type, name) \ - inline PCRE::Arg Hex(type* ptr) { \ - return PCRE::Arg(ptr, PCRE::Arg::parse_##name##_hex); \ - } \ - inline PCRE::Arg Octal(type* ptr) { \ - return PCRE::Arg(ptr, PCRE::Arg::parse_##name##_octal); \ - } \ - inline PCRE::Arg CRadix(type* ptr) { \ - return PCRE::Arg(ptr, PCRE::Arg::parse_##name##_cradix); \ - } - -MAKE_INTEGER_PARSER(short, short); -MAKE_INTEGER_PARSER(unsigned short, ushort); -MAKE_INTEGER_PARSER(int, int); -MAKE_INTEGER_PARSER(unsigned int, uint); -MAKE_INTEGER_PARSER(long, long); -MAKE_INTEGER_PARSER(unsigned long, ulong); -MAKE_INTEGER_PARSER(long long, longlong); -MAKE_INTEGER_PARSER(unsigned long long, ulonglong); - -#undef MAKE_INTEGER_PARSER - -} // namespace re2 diff --git a/util/rune.cc b/util/rune.cc deleted file mode 100644 index 4f625ea380f4c77e1c8f66f2caf4d4a0c67d6f7b..0000000000000000000000000000000000000000 --- a/util/rune.cc +++ /dev/null @@ -1,260 +0,0 @@ -/* - * The authors of this software are Rob Pike and Ken Thompson. - * Copyright (c) 2002 by Lucent Technologies. - * Permission to use, copy, modify, and distribute this software for any - * purpose without fee is hereby granted, provided that this entire notice - * is included in all copies of any software which is or includes a copy - * or modification of this software and in all copies of the supporting - * documentation for such software. - * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY - * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY - * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. - */ - -#include -#include - -#include "util/utf.h" - -namespace re2 { - -enum -{ - Bit1 = 7, - Bitx = 6, - Bit2 = 5, - Bit3 = 4, - Bit4 = 3, - Bit5 = 2, - - T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */ - Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */ - T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */ - T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */ - T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */ - T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */ - - Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */ - Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */ - Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */ - Rune4 = (1<<(Bit4+3*Bitx))-1, - /* 0001 1111 1111 1111 1111 1111 */ - - Maskx = (1< T1 - */ - c = *(unsigned char*)str; - if(c < Tx) { - *rune = c; - return 1; - } - - /* - * two character sequence - * 0080-07FF => T2 Tx - */ - c1 = *(unsigned char*)(str+1) ^ Tx; - if(c1 & Testx) - goto bad; - if(c < T3) { - if(c < T2) - goto bad; - l = ((c << Bitx) | c1) & Rune2; - if(l <= Rune1) - goto bad; - *rune = l; - return 2; - } - - /* - * three character sequence - * 0800-FFFF => T3 Tx Tx - */ - c2 = *(unsigned char*)(str+2) ^ Tx; - if(c2 & Testx) - goto bad; - if(c < T4) { - l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3; - if(l <= Rune2) - goto bad; - *rune = l; - return 3; - } - - /* - * four character sequence (21-bit value) - * 10000-1FFFFF => T4 Tx Tx Tx - */ - c3 = *(unsigned char*)(str+3) ^ Tx; - if (c3 & Testx) - goto bad; - if (c < T5) { - l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4; - if (l <= Rune3) - goto bad; - *rune = l; - return 4; - } - - /* - * Support for 5-byte or longer UTF-8 would go here, but - * since we don't have that, we'll just fall through to bad. - */ - - /* - * bad decoding - */ -bad: - *rune = Bad; - return 1; -} - -int -runetochar(char *str, const Rune *rune) -{ - /* Runes are signed, so convert to unsigned for range check. */ - unsigned long c; - - /* - * one character sequence - * 00000-0007F => 00-7F - */ - c = *rune; - if(c <= Rune1) { - str[0] = static_cast(c); - return 1; - } - - /* - * two character sequence - * 0080-07FF => T2 Tx - */ - if(c <= Rune2) { - str[0] = T2 | static_cast(c >> 1*Bitx); - str[1] = Tx | (c & Maskx); - return 2; - } - - /* - * If the Rune is out of range, convert it to the error rune. - * Do this test here because the error rune encodes to three bytes. - * Doing it earlier would duplicate work, since an out of range - * Rune wouldn't have fit in one or two bytes. - */ - if (c > Runemax) - c = Runeerror; - - /* - * three character sequence - * 0800-FFFF => T3 Tx Tx - */ - if (c <= Rune3) { - str[0] = T3 | static_cast(c >> 2*Bitx); - str[1] = Tx | ((c >> 1*Bitx) & Maskx); - str[2] = Tx | (c & Maskx); - return 3; - } - - /* - * four character sequence (21-bit value) - * 10000-1FFFFF => T4 Tx Tx Tx - */ - str[0] = T4 | static_cast(c >> 3*Bitx); - str[1] = Tx | ((c >> 2*Bitx) & Maskx); - str[2] = Tx | ((c >> 1*Bitx) & Maskx); - str[3] = Tx | (c & Maskx); - return 4; -} - -int -runelen(Rune rune) -{ - char str[10]; - - return runetochar(str, &rune); -} - -int -fullrune(const char *str, int n) -{ - if (n > 0) { - int c = *(unsigned char*)str; - if (c < Tx) - return 1; - if (n > 1) { - if (c < T3) - return 1; - if (n > 2) { - if (c < T4 || n > 3) - return 1; - } - } - } - return 0; -} - - -int -utflen(const char *s) -{ - int c; - long n; - Rune rune; - - n = 0; - for(;;) { - c = *(unsigned char*)s; - if(c < Runeself) { - if(c == 0) - return n; - s++; - } else - s += chartorune(&rune, s); - n++; - } - return 0; -} - -char* -utfrune(const char *s, Rune c) -{ - long c1; - Rune r; - int n; - - if(c < Runesync) /* not part of utf sequence */ - return strchr((char*)s, c); - - for(;;) { - c1 = *(unsigned char*)s; - if(c1 < Runeself) { /* one byte rune */ - if(c1 == 0) - return 0; - if(c1 == c) - return (char*)s; - s++; - continue; - } - n = chartorune(&r, s); - if(r == c) - return (char*)s; - s += n; - } - return 0; -} - -} // namespace re2 diff --git a/util/utf.h b/util/utf.h deleted file mode 100644 index a25f762d95941c558b008221da43b181518370b8..0000000000000000000000000000000000000000 --- a/util/utf.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * The authors of this software are Rob Pike and Ken Thompson. - * Copyright (c) 2002 by Lucent Technologies. - * Permission to use, copy, modify, and distribute this software for any - * purpose without fee is hereby granted, provided that this entire notice - * is included in all copies of any software which is or includes a copy - * or modification of this software and in all copies of the supporting - * documentation for such software. - * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY - * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY - * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. - * - * This file and rune.cc have been converted to compile as C++ code - * in name space re2. - */ - -#pragma once - -#include - -namespace re2 { - -typedef signed int Rune; /* Code-point values in Unicode 4.0 are 21 bits wide.*/ - -enum -{ - UTFmax = 4, /* maximum bytes per rune */ - Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */ - Runeself = 0x80, /* rune and UTF sequences are the same (<) */ - Runeerror = 0xFFFD, /* decoding error in UTF */ - Runemax = 0x10FFFF, /* maximum rune value */ -}; - -int runetochar(char* s, const Rune* r); -int chartorune(Rune* r, const char* s); -int fullrune(const char* s, int n); -int utflen(const char* s); -char* utfrune(const char*, Rune); - -} // namespace re2 \ No newline at end of file