From 2f24b1a27f8d48d3122306853dd6f2058e1af570 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=BF=97=E6=B6=9B?= Date: Thu, 27 Oct 2022 14:14:37 +0000 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E4=BA=86=E5=AF=B9=E5=BE=88?= =?UTF-8?q?=E5=A4=9A=E4=B8=AA=E2=80=98\n=E2=80=99=E7=9A=84=E6=B5=8B?= =?UTF-8?q?=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 刘志涛 --- re2/testing/regexp_benchmark.cc | 61 +++++++++++++++++++++++++++++---- 1 file changed, 54 insertions(+), 7 deletions(-) diff --git a/re2/testing/regexp_benchmark.cc b/re2/testing/regexp_benchmark.cc index 00c2ee6..23364d5 100644 --- a/re2/testing/regexp_benchmark.cc +++ b/re2/testing/regexp_benchmark.cc @@ -598,6 +598,44 @@ void FullMatchRE2_text_re2_1KB(benchmark::State& state, const char *regexp) { state.SetBytesProcessed(state.iterations() * state.range(0)); } +void FullMatchRE2_text_dotnl_10(benchmark::State& state, const char *regexp) { + + const char * s = "aaa\nbbb\nccc\nddd\neee\naaa\nbbb\nccc\nddd\neee\nppp\n"; + RE2::Options opt; + opt.set_never_nl(true); + RE2 re(regexp, opt); + for (auto _ : state) { + + CHECK(RE2::PartialMatch(s, re)); + } + state.SetBytesProcessed(state.iterations() * state.range(0)); +} + +void FullMatchRE2_text_dotnl_30(benchmark::State& state, const char *regexp) { + + const char * s = "aaa\nbbb\nccc\nddd\neee\naaa\nbbb\nccc\nddd\neee\naaa\nbbb\nccc\nddd\neee\naaa\nbbb\nccc\nddd\neee\naaa\nbbb\nccc\nddd\neee\naaa\nbbb\nccc\nddd\neee\njjj\n"; + RE2::Options opt; + opt.set_never_nl(true); + RE2 re(regexp, opt); + for (auto _ : state) { + + CHECK(RE2::PartialMatch(s, re)); + } + state.SetBytesProcessed(state.iterations() * state.range(0)); +} +void FullMatchRE2_text_dotnl_90(benchmark::State& state, const char *regexp) { + + const char * s = "aaa\nbbb\nccc\nddd\neee\naaa\nbbb\nccc\nddd\neee\naaa\nbbb\nccc\nddd\neee\naaa\nbbb\nccc\nddd\neee\naaa\nbbb\nccc\nddd\neee\naaa\nbbb\nccc\nddd\neee\naaa\nbbb\nccc\nddd\neee\naaa\nbbb\nccc\nddd\neee\naaa\nbbb\nccc\nddd\neee\naaa\nbbb\nccc\nddd\neee\naaa\nbbb\nccc\nddd\neee\naaa\nbbb\nccc\nddd\neee\naaa\nbbb\nccc\nddd\neee\naaa\nbbb\nccc\nddd\neee\naaa\nbbb\nccc\nddd\neee\naaa\nbbb\nccc\nddd\neee\naaa\nbbb\nccc\nddd\neee\naaa\nbbb\nccc\nddd\nqqq\n"; + RE2::Options opt; + opt.set_never_nl(true); + RE2 re(regexp, opt); + for (auto _ : state) { + + CHECK(RE2::PartialMatch(s, re)); + } + state.SetBytesProcessed(state.iterations() * state.range(0)); +} + void Rure_Find_RE2(benchmark::State& state, const char *regexp) { std::ifstream in("re2/testing/text_re2_1KB.txt"); @@ -642,18 +680,27 @@ void Rure_is_Match_RE2_DotStar_text_re2_1KB(benchmark::State& state) { Rure_is_ void Rure_is_Match_RE2_DotStarDollar_text_re2_1KB(benchmark::State& state) { Rure_is_Match_RE2(state, "(?s).*$"); } void Rure_is_Match_RE2_DotStarCapture_text_re2_1KB(benchmark::State& state) { Rure_is_Match_RE2(state, "(?s)((.*)()()($))"); } -BENCHMARK_RANGE(Rure_is_Match_RE2_DotStar_text_re2_1KB, 2 << 6, 2 << 9); -BENCHMARK_RANGE(Rure_is_Match_RE2_DotStarDollar_text_re2_1KB, 2 << 6, 2 << 9); -BENCHMARK_RANGE(Rure_is_Match_RE2_DotStarCapture_text_re2_1KB, 2 << 6, 2 << 9); +// BENCHMARK_RANGE(Rure_is_Match_RE2_DotStar_text_re2_1KB, 2 << 6, 2 << 9); +// BENCHMARK_RANGE(Rure_is_Match_RE2_DotStarDollar_text_re2_1KB, 2 << 6, 2 << 9); +// BENCHMARK_RANGE(Rure_is_Match_RE2_DotStarCapture_text_re2_1KB, 2 << 6, 2 << 9); // 加起止符 ^ 结束符 $ 的正则表达式,也就是FullMatch,通过regex对外接口rure_is_match()函数直接测试 void Rure_is_Match_RE2_Begin_DotStar_End_text_re2_1KB(benchmark::State& state) { Rure_is_Match_RE2(state, "^(?s).*$"); } void Rure_is_Match_RE2_Begin_DotStarDollar_End_text_re2_1KB(benchmark::State& state) { Rure_is_Match_RE2(state, "^(?s).*$$"); } void Rure_is_Match_RE2_Begin_DotStarCapture_End_text_re2_1KB(benchmark::State& state) { Rure_is_Match_RE2(state, "^(?s)((.*)()()($))$"); } -BENCHMARK_RANGE(Rure_is_Match_RE2_Begin_DotStar_End_text_re2_1KB, 2 << 6, 2 << 9); -BENCHMARK_RANGE(Rure_is_Match_RE2_Begin_DotStarDollar_End_text_re2_1KB, 2 << 6, 2 << 9); -BENCHMARK_RANGE(Rure_is_Match_RE2_Begin_DotStarCapture_End_text_re2_1KB, 2 << 6, 2 << 9); +// BENCHMARK_RANGE(Rure_is_Match_RE2_Begin_DotStar_End_text_re2_1KB, 2 << 6, 2 << 9); +// BENCHMARK_RANGE(Rure_is_Match_RE2_Begin_DotStarDollar_End_text_re2_1KB, 2 << 6, 2 << 9); +// BENCHMARK_RANGE(Rure_is_Match_RE2_Begin_DotStarCapture_End_text_re2_1KB, 2 << 6, 2 << 9); + +// 测试dot_nl=true模式 +void FullMatch_RE2_text_dotnl_10(benchmark::State& state) { FullMatchRE2_text_dotnl_10(state, "pp"); } +void FullMatch_RE2_text_dotnl_30(benchmark::State& state) { FullMatchRE2_text_dotnl_30(state, "jj"); } +void FullMatch_RE2_text_dotnl_90(benchmark::State& state) { FullMatchRE2_text_dotnl_90(state, "qq"); } + +BENCHMARK_RANGE(FullMatch_RE2_text_dotnl_10, 2 << 9, 2 << 9); +BENCHMARK_RANGE(FullMatch_RE2_text_dotnl_30, 2 << 9, 2 << 9); +BENCHMARK_RANGE(FullMatch_RE2_text_dotnl_90, 2 << 9, 2 << 9); // 加起止符 ^ 结束符 $ 的正则表达式,也就是FullMatch,通过原本RE2项目对外接口FullMatch()函数测试 void FullMatch_RE2_DotStar_text_re2_1KB(benchmark::State& state) { FullMatchRE2_text_re2_1KB(state, "(?s).*"); } @@ -676,7 +723,7 @@ void FullMatch_DotStarCapture_CachedRE2(benchmark::State& state) { FullMatchRE2 #ifdef USEPCRE BENCHMARK_RANGE(FullMatch_DotStar_CachedPCRE, 8, 2 << 20); #endif -BENCHMARK_RANGE(FullMatch_DotStar_CachedRE2, 2 << 6, 2 << 9); +BENCHMARK_RANGE(FullMatch_DotStar_CachedRE2, 2 << 19, 2 << 19); #ifdef USEPCRE BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedPCRE, 8, 2 << 20); -- Gitee