diff --git a/re2/re2.cc b/re2/re2.cc index 64afcd9e23a25e3633bc21339e68673afbec7c34..60ee331b4577985585dda8b1461409831bc159ad 100644 --- a/re2/re2.cc +++ b/re2/re2.cc @@ -210,7 +210,11 @@ namespace re2 if(options_.dot_nl()) flags = RURE_FLAG_DOTNL; if(options_.never_nl()) flags = RURE_DEFAULT_FLAGS; // 空字符串的处理??? + std::string FullMatch_rure_str = rure_str; + FullMatch_rure_str.insert(0, "^"); + FullMatch_rure_str.append("$"); rure *re = rure_compile((const uint8_t *)rure_str.c_str(), strlen(rure_str.c_str()), flags, NULL, err); + rure *re1 = rure_compile((const uint8_t *)FullMatch_rure_str.c_str(), strlen(FullMatch_rure_str.c_str()), flags, NULL, err); //如果编译失败,打印错误信息 if (re == NULL) @@ -246,6 +250,7 @@ namespace re2 else { prog_ = (Prog *)re; + rprog_ = (Prog *)re1; error_ = empty_string; error_code_ = RE2::NoError; } @@ -724,12 +729,36 @@ namespace re2 // } // return true; // } - + // rure_error *err = rure_error_new(); + // rure *re = rure_compile((const uint8_t *) pattern_.c_str(), strlen(pattern_.c_str()), RURE_DEFAULT_FLAGS, NULL, err); const char *haystack = text.data(); rure *re = (rure *)prog_; + rure *re1 = (rure *)rprog_; rure_match match = {0}; - bool matched = rure_find(re, (const uint8_t *)haystack, strlen(haystack), 0, &match); + // bool matched = rure_find(re, (const uint8_t *)haystack, strlen(haystack), 0, &match); + if(re_anchor == UNANCHORED) + { + bool matched = rure_is_match(re, (const uint8_t *)haystack, strlen(haystack), 0); + if(!matched){ + return false; + } + else if(!nsubmatch){ + return true; + } + } + else if(re_anchor == ANCHOR_BOTH) + { + bool matched = rure_is_match(re1, (const uint8_t *)haystack, strlen(haystack), 0); + if(!matched){ + return false; + } + else if(!nsubmatch){ + return true; + } + } + + /* switch (re_anchor) { // ANCHOR_BOTH FullMatch @@ -776,7 +805,8 @@ namespace re2 } else { - if (matched && match.end != 0) + // if (matched && match.end != 0) + if (matched) return true; else return false; @@ -799,9 +829,9 @@ namespace re2 } } } - + */ // Demo 获取捕获组内容,存储到submatch数组中 - + /* size_t length = strlen(haystack); rure_captures *caps = rure_captures_new(re); @@ -833,6 +863,7 @@ namespace re2 return true; + */ } // std::string_view in MSVC has iterators that aren't just pointers and @@ -869,6 +900,7 @@ namespace re2 return false; } + /* // 判断是否FullMatch, 判空 std::string haystack; if (text.data() == NULL || text[0] == '\0') @@ -890,6 +922,7 @@ namespace re2 rure *re = (rure *)prog_; rure_match match = {0}; bool matched = rure_find(re, (const uint8_t *)haystack.c_str(), strlen(haystack.c_str()), 0, &match); + */ // Count number of capture groups needed. int nvec; @@ -897,6 +930,8 @@ namespace re2 nvec = 0; // 0个捕获组 else nvec = n + 1; + + /* // 0个捕获组的匹配判断 if (nvec == 0) { @@ -963,7 +998,7 @@ namespace re2 } } } - + */ StringPiece *vec; StringPiece stkvec[kVecSize]; StringPiece *heapvec = NULL; @@ -1003,12 +1038,12 @@ namespace re2 // 结下来就是要对正表达式中存在的捕获组进行处理 // 如果不需要捕获组,直接返回true - // if (n == 0 || args == NULL) - // { - // // We are not interested in results - // delete[] heapvec; - // return true; - // } + if (n == 0 || args == NULL) + { + // We are not interested in results + delete[] heapvec; + return true; + } // If we got here, we must have matched the whole pattern. for (int i = 0; i < n; i++) diff --git a/re2/testing/regexp_benchmark.cc b/re2/testing/regexp_benchmark.cc index c2e9b504d10ccc1ef234653ba315a4a9f8a25d6c..e8ba4cfa305b9e987b35f6736fca98ac67c7dda7 100644 --- a/re2/testing/regexp_benchmark.cc +++ b/re2/testing/regexp_benchmark.cc @@ -207,7 +207,7 @@ void FindAndConsume(benchmark::State& state) { state.SetBytesProcessed(state.iterations() * state.range(0)); } -BENCHMARK_RANGE(FindAndConsume, 8, 16)->ThreadRange(1, NumCPUs()); +// BENCHMARK_RANGE(FindAndConsume, 8, 16)->ThreadRange(1, NumCPUs()); //////////////////////////////////////////////////////////////////////// // @@ -423,7 +423,7 @@ void EmptyPartialMatchRE2(benchmark::State& state) { } } -void EmptyPartialMatchRE2_LiuZhitao(benchmark::State& state) { +void EmptyPartialMatchRE2_text_re2_1KB(benchmark::State& state) { std::ifstream in("re2/testing/text_re2_1KB.txt"); std::stringstream buffer; buffer << in.rdbuf(); @@ -439,7 +439,7 @@ void EmptyPartialMatchRE2_LiuZhitao(benchmark::State& state) { BENCHMARK(EmptyPartialMatchPCRE)->ThreadRange(1, NumCPUs()); #endif BENCHMARK(EmptyPartialMatchRE2)->ThreadRange(1, NumCPUs()); -BENCHMARK_RANGE(EmptyPartialMatchRE2_LiuZhitao, 8, 2<<9); +BENCHMARK_RANGE(EmptyPartialMatchRE2_text_re2_1KB, 2<<6, 2<<9); void SimplePartialMatchPCRE(benchmark::State& state) { PCRE re("abcdefg"); @@ -545,7 +545,7 @@ void ASCIIMatchRE2(benchmark::State& state) { } } -void ASCIIMatchRE2_LiuZhitao(benchmark::State& state) { +void ASCIIMatchRE2_text_re2_1KB(benchmark::State& state) { std::ifstream in("re2/testing/text_re2_1KB.txt"); std::stringstream buffer; buffer << in.rdbuf(); @@ -561,7 +561,7 @@ void ASCIIMatchRE2_LiuZhitao(benchmark::State& state) { BENCHMARK(ASCIIMatchPCRE)->ThreadRange(1, NumCPUs()); #endif BENCHMARK(ASCIIMatchRE2)->ThreadRange(1, NumCPUs()); -BENCHMARK_RANGE(ASCIIMatchRE2_LiuZhitao, 8, 2<<9); +BENCHMARK_RANGE(ASCIIMatchRE2_text_re2_1KB, 2<<6, 2<<9); void FullMatchPCRE(benchmark::State& state, const char *regexp) { std::string s = RandomText(state.range(0)); @@ -578,12 +578,13 @@ void FullMatchRE2(benchmark::State& state, const char *regexp) { s += "ABCDEFGHIJ"; RE2 re(regexp, RE2::Latin1); for (auto _ : state) { + CHECK(RE2::FullMatch(s, re)); } state.SetBytesProcessed(state.iterations() * state.range(0)); } -void FullMatchRE2_LiuZhitao(benchmark::State& state, const char *regexp) { +void FullMatchRE2_text_re2_1KB(benchmark::State& state, const char *regexp) { std::ifstream in("re2/testing/text_re2_1KB.txt"); std::stringstream buffer; @@ -591,6 +592,7 @@ void FullMatchRE2_LiuZhitao(benchmark::State& state, const char *regexp) { std::string s = buffer.str(); RE2 re(regexp, RE2::Latin1); for (auto _ : state) { + CHECK(RE2::FullMatch(s.substr(0, state.range(0)), re)); } state.SetBytesProcessed(state.iterations() * state.range(0)); @@ -602,7 +604,6 @@ void Rure_Find_RE2(benchmark::State& state, const char *regexp) std::stringstream buffer; buffer << in.rdbuf(); std::string s = buffer.str().substr(0, state.range(0)); - RE2 re(regexp); rure_error *err = rure_error_new(); rure *re1 = rure_compile((const uint8_t *)regexp, strlen(regexp), RURE_DEFAULT_FLAGS, NULL, err); rure_match match = {0}; @@ -619,40 +620,49 @@ void Rure_is_Match_RE2(benchmark::State& state, const char *regexp) std::stringstream buffer; buffer << in.rdbuf(); std::string s = buffer.str().substr(0, state.range(0)); - RE2 re(regexp); rure_error *err = rure_error_new(); rure *re1 = rure_compile((const uint8_t *)regexp, strlen(regexp), RURE_DEFAULT_FLAGS, NULL, err); for (auto _ : state) { - bool matched = rure_is_match(re1, (const uint8_t *)s.c_str(), strlen(s.c_str()), 0); + bool matched = rure_is_match((rure *)re1, (const uint8_t *)s.c_str(), strlen(s.c_str()), 0); CHECK(matched); } state.SetBytesProcessed(state.iterations() * state.range(0)); } -void Rure_Find_RE2_Bench1(benchmark::State& state) { Rure_Find_RE2(state, "(?s).*"); } -void Rure_Find_RE2_Bench2(benchmark::State& state) { Rure_Find_RE2(state, "(?s).*$"); } -void Rure_Find_RE2_Bench3(benchmark::State& state) { Rure_Find_RE2(state, "(?s)((.*)()()($))"); } +void Rure_Find_RE2_DotStar_text_re2_1KB(benchmark::State& state) { Rure_Find_RE2(state, "(?s).*"); } +void Rure_Find_RE2_DotStarDollar_text_re2_1KB(benchmark::State& state) { Rure_Find_RE2(state, "(?s).*$"); } +void Rure_Find_RE2_DotStarCapture_text_re2_1KB(benchmark::State& state) { Rure_Find_RE2(state, "(?s)((.*)()()($))"); } -BENCHMARK_RANGE(Rure_Find_RE2_Bench1, 2<<6, 2<<9); -BENCHMARK_RANGE(Rure_Find_RE2_Bench2, 2<<6, 2<<9); -BENCHMARK_RANGE(Rure_Find_RE2_Bench3, 2<<6, 2<<9); +// BENCHMARK_RANGE(Rure_Find_RE2_DotStar_text_re2_1KB, 2<<3, 2<<9); +// BENCHMARK_RANGE(Rure_Find_RE2_DotStarDollar_text_re2_1KB, 2<<3, 2<<9); +// BENCHMARK_RANGE(Rure_Find_RE2_DotStarCapture_text_re2_1KB, 2<<3, 2<<9); -void Rure_is_Match_RE2_Bench1(benchmark::State& state) { Rure_is_Match_RE2(state, "(?s).*"); } -void Rure_is_Match_RE2_Bench2(benchmark::State& state) { Rure_is_Match_RE2(state, "(?s).*$"); } -void Rure_is_Match_RE2_Bench3(benchmark::State& state) { Rure_is_Match_RE2(state, "(?s)((.*)()()($))"); } +// 不加起止符 ^ 结束符 $ 的正则表达式,也就是PartialMatch,通过regex对外接口rure_is_match()函数直接测试 +void Rure_is_Match_RE2_DotStar_text_re2_1KB(benchmark::State& state) { Rure_is_Match_RE2(state, "(?s).*"); } +void Rure_is_Match_RE2_DotStarDollar_text_re2_1KB(benchmark::State& state) { Rure_is_Match_RE2(state, "(?s).*$"); } +void Rure_is_Match_RE2_DotStarCapture_text_re2_1KB(benchmark::State& state) { Rure_is_Match_RE2(state, "(?s)((.*)()()($))"); } -BENCHMARK_RANGE(Rure_is_Match_RE2_Bench1, 2<<6, 2<<9); -BENCHMARK_RANGE(Rure_is_Match_RE2_Bench2, 2<<6, 2<<9); -BENCHMARK_RANGE(Rure_is_Match_RE2_Bench3, 2<<6, 2<<9); +BENCHMARK_RANGE(Rure_is_Match_RE2_DotStar_text_re2_1KB, 2<<6, 2<<9); +BENCHMARK_RANGE(Rure_is_Match_RE2_DotStarDollar_text_re2_1KB, 2<<6, 2<<9); +BENCHMARK_RANGE(Rure_is_Match_RE2_DotStarCapture_text_re2_1KB, 2<<6, 2<<9); +// 加起止符 ^ 结束符 $ 的正则表达式,也就是FullMatch,通过regex对外接口rure_is_match()函数直接测试 +void Rure_is_Match_RE2_Begin_DotStar_End_text_re2_1KB(benchmark::State& state) { Rure_is_Match_RE2(state, "^(?s).*$"); } +void Rure_is_Match_RE2_Begin_DotStarDollar_End_text_re2_1KB(benchmark::State& state) { Rure_is_Match_RE2(state, "^(?s).*$$"); } +void Rure_is_Match_RE2_Begin_DotStarCapture_End_text_re2_1KB(benchmark::State& state) { Rure_is_Match_RE2(state, "^(?s)((.*)()()($))$"); } -void FullMatch_DotStar_CachedRE2_LiuZhitao(benchmark::State& state) { FullMatchRE2_LiuZhitao(state, "(?s).*"); } -void FullMatch_DotStarDollar_CachedRE2_LiuZhitao(benchmark::State& state) { FullMatchRE2_LiuZhitao(state, "(?s).*$"); } -void FullMatch_DotStarCapture_CachedRE2_LiuZhitao(benchmark::State& state) { FullMatchRE2_LiuZhitao(state, "(?s)((.*)()()($))"); } +BENCHMARK_RANGE(Rure_is_Match_RE2_Begin_DotStar_End_text_re2_1KB, 2<<6, 2<<9); +BENCHMARK_RANGE(Rure_is_Match_RE2_Begin_DotStarDollar_End_text_re2_1KB, 2<<6, 2<<9); +BENCHMARK_RANGE(Rure_is_Match_RE2_Begin_DotStarCapture_End_text_re2_1KB, 2<<6, 2<<9); -BENCHMARK_RANGE(FullMatch_DotStar_CachedRE2_LiuZhitao, 8, 2<<9); -BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedRE2_LiuZhitao, 8, 2<<9); -BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedRE2_LiuZhitao, 8, 2<<9); +// 加起止符 ^ 结束符 $ 的正则表达式,也就是FullMatch,通过原本RE2项目对外接口FullMatch()函数测试 +void FullMatch_RE2_DotStar_text_re2_1KB(benchmark::State& state) { FullMatchRE2_text_re2_1KB(state, "(?s).*"); } +void FullMatch_RE2_DotStarDollar_text_re2_1KB(benchmark::State& state) { FullMatchRE2_text_re2_1KB(state, "(?s).*$"); } +void FullMatch_RE2_DotStarCapture_text_re2_1KB(benchmark::State& state) { FullMatchRE2_text_re2_1KB(state, "(?s)((.*)()()($))"); } + +BENCHMARK_RANGE(FullMatch_RE2_DotStar_text_re2_1KB, 2<<6, 2<<9); +BENCHMARK_RANGE(FullMatch_RE2_DotStarDollar_text_re2_1KB, 2<<6, 2<<9); +BENCHMARK_RANGE(FullMatch_RE2_DotStarCapture_text_re2_1KB, 2<<6, 2<<9); void FullMatch_DotStar_CachedPCRE(benchmark::State& state) { FullMatchPCRE(state, "(?s).*"); } void FullMatch_DotStar_CachedRE2(benchmark::State& state) { FullMatchRE2(state, "(?s).*"); } @@ -666,17 +676,17 @@ void FullMatch_DotStarCapture_CachedRE2(benchmark::State& state) { FullMatchRE2 #ifdef USEPCRE BENCHMARK_RANGE(FullMatch_DotStar_CachedPCRE, 8, 2<<20); #endif -BENCHMARK_RANGE(FullMatch_DotStar_CachedRE2, 8, 2<<20); +BENCHMARK_RANGE(FullMatch_DotStar_CachedRE2, 2<<6, 2<<9); #ifdef USEPCRE BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedPCRE, 8, 2<<20); #endif -BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedRE2, 8, 2<<20); +BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedRE2, 2<<6, 2<<9); #ifdef USEPCRE BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedPCRE, 8, 2<<20); #endif -BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedRE2, 8, 2<<20); +BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedRE2, 2<<6, 2<<9); } // namespace re2