diff --git a/re2/filtered_re2.cc b/re2/filtered_re2.cc index c73929a40c1a988ee8466e9a164eef8db5423573..e86401c416da82718e421a65fcb047d3947ae918 100644 --- a/re2/filtered_re2.cc +++ b/re2/filtered_re2.cc @@ -283,6 +283,21 @@ void MyCompile(std::string str, int start_post, int end_post) for (int i = start_post; i <= end_post; i++) { + // 处理 + if(str[i]=='\\') + { + if(atoms_tmp_string.size() > 0) + { + my_atoms.push_back(atoms_tmp_string); + atoms_tmp_string.clear(); + } + + int escape_char_post = i; + if(JudgeIsCharOrNumber(++escape_char_post)) ++i; + int escape_plus_post = i; + if(str[++escape_plus_post] == '+') ++i; + continue; + } if(str[i] == '*') { continue; @@ -462,6 +477,8 @@ int FilteredRE2::SlowFirstMatch(const StringPiece& text) const { return -1; } + + int FilteredRE2::FirstMatch(const StringPiece& text, const std::vector& atoms) const { if (!compiled_) { @@ -518,10 +535,42 @@ bool FilteredRE2::AllMatches( std::vector* matching_regexps) const { matching_regexps->clear(); std::vector regexps; - // for(int i = 0; i < atoms_tmp.size(); i++){ - - // } - // prefilter_tree_->RegexpsGivenStrings(atoms, ®exps); + // 根据atoms索引获取regexp索引的规则 + /* + * 如果没有原子, 那么直接会把re加进去。 + * 如果这个正则表达式有原子,那么要把该正则表达式的所有的原子的索引全加入,这个正则表达式才能加入成功。 + */ + for(size_t i = 0; i < re2_vec_.size(); i++) + { + my_atoms.clear(); + vec_atoms_tmp.clear(); + vec_con.clear(); + atoms_tmp.clear(); + MyCompile(re2_vec_[i]->pattern(), 0, re2_vec_[i]->pattern().size() - 1); + if(my_atoms.size() == 0) + { + regexps.push_back(i); + continue; + } + else + { + for(auto x : my_atoms) + { + int flag = 0; + for(auto y : atoms) + { + if(x == my_atoms[y]) + continue; + else + { + flag = 1; + break; + } + if(flag == 0) regexps.push_back(i); + } + } + } + } for (size_t i = 0; i < re2_vec_.size(); i++) if (RE2::PartialMatch(text, *re2_vec_[i])) matching_regexps->push_back(i); diff --git a/re2/testing/filtered_re2_test.cc b/re2/testing/filtered_re2_test.cc index a6d7f831a891d2e7432a0c0fb2d9ff0d4c86271f..e3c8c94f6ddcd25ee0913cc8b7f659da7883e60b 100644 --- a/re2/testing/filtered_re2_test.cc +++ b/re2/testing/filtered_re2_test.cc @@ -237,50 +237,50 @@ TEST(FilteredRE2Test, MatchEmptyPattern) { EXPECT_EQ(0, v.f.FirstMatch(text, atom_ids)); } -// TEST(FilteredRE2Test, MatchTests) { -// FilterTestVars v; -// AtomTest* t = &atom_tests[2]; -// // We are using the regexps used in one of the atom tests -// // for this test. -// EXPECT_EQ("SubstrAtomRemovesSuperStrInOr", std::string(t->testname)); -// size_t nregexp; -// for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++) -// if (t->regexps[nregexp] == NULL) -// break; -// AddRegexpsAndCompile(t->regexps, nregexp, &v); - -// std::string text = "abc121212xyz"; -// // atoms = abc -// std::vector atom_ids; -// std::vector atoms; -// atoms.push_back("abc"); -// FindAtomIndices(v.atoms, atoms, &atom_ids); -// std::vector matching_regexps; -// v.f.AllMatches(text, atom_ids, &matching_regexps); -// EXPECT_EQ(1, matching_regexps.size()); - -// text = "abc12312yyyzzz"; -// atoms.clear(); -// atoms.push_back("abc"); -// atoms.push_back("yyy"); -// atoms.push_back("yyyzzz"); -// FindAtomIndices(v.atoms, atoms, &atom_ids); -// v.f.AllMatches(text, atom_ids, &matching_regexps); -// EXPECT_EQ(1, matching_regexps.size()); - -// text = "abcd12yyy32yyyzzz"; -// atoms.clear(); -// atoms.push_back("abc"); -// atoms.push_back("abcd"); -// atoms.push_back("yyy"); -// atoms.push_back("yyyzzz"); -// FindAtomIndices(v.atoms, atoms, &atom_ids); -// LOG(INFO) << "S: " << atom_ids.size(); -// for (size_t i = 0; i < atom_ids.size(); i++) -// LOG(INFO) << "i: " << i << " : " << atom_ids[i]; -// v.f.AllMatches(text, atom_ids, &matching_regexps); -// EXPECT_EQ(2, matching_regexps.size()); -// } +TEST(FilteredRE2Test, MatchTests) { + FilterTestVars v; + AtomTest* t = &atom_tests[2]; + // We are using the regexps used in one of the atom tests + // for this test. + EXPECT_EQ("SubstrAtomRemovesSuperStrInOr", std::string(t->testname)); + size_t nregexp; + for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++) + if (t->regexps[nregexp] == NULL) + break; + AddRegexpsAndCompile(t->regexps, nregexp, &v); + + std::string text = "abc121212xyz"; + // atoms = abc + std::vector atom_ids; + std::vector atoms; + atoms.push_back("abc"); + FindAtomIndices(v.atoms, atoms, &atom_ids); + std::vector matching_regexps; + v.f.AllMatches(text, atom_ids, &matching_regexps); + EXPECT_EQ(1, matching_regexps.size()); + + text = "abc12312yyyzzz"; + atoms.clear(); + atoms.push_back("abc"); + atoms.push_back("yyy"); + atoms.push_back("yyyzzz"); + FindAtomIndices(v.atoms, atoms, &atom_ids); + v.f.AllMatches(text, atom_ids, &matching_regexps); + EXPECT_EQ(1, matching_regexps.size()); + + text = "abcd12yyy32yyyzzz"; + atoms.clear(); + atoms.push_back("abc"); + atoms.push_back("abcd"); + atoms.push_back("yyy"); + atoms.push_back("yyyzzz"); + FindAtomIndices(v.atoms, atoms, &atom_ids); + LOG(INFO) << "S: " << atom_ids.size(); + for (size_t i = 0; i < atom_ids.size(); i++) + LOG(INFO) << "i: " << i << " : " << atom_ids[i]; + v.f.AllMatches(text, atom_ids, &matching_regexps); + EXPECT_EQ(2, matching_regexps.size()); +} // TEST(FilteredRE2Test, EmptyStringInStringSetBug) { // // Bug due to find() finding "" at the start of everything in a string @@ -296,50 +296,50 @@ TEST(FilteredRE2Test, MatchEmptyPattern) { // "EmptyStringInStringSetBug", &v)); // } -// TEST(FilteredRE2Test, MoveSemantics) { -// FilterTestVars v1; -// int id; -// v1.f.Add("foo\\d+", v1.opts, &id); -// EXPECT_EQ(0, id); -// v1.f.Compile(&v1.atoms); -// EXPECT_EQ(1, v1.atoms.size()); -// EXPECT_EQ("foo", v1.atoms[0]); -// v1.f.AllMatches("abc foo1 xyz", {0}, &v1.matches); -// EXPECT_EQ(1, v1.matches.size()); -// EXPECT_EQ(0, v1.matches[0]); -// v1.f.AllMatches("abc bar2 xyz", {0}, &v1.matches); -// EXPECT_EQ(0, v1.matches.size()); - -// // The moved-to object should do what the moved-from object did. -// FilterTestVars v2; -// v2.f = std::move(v1.f); -// v2.f.AllMatches("abc foo1 xyz", {0}, &v2.matches); -// EXPECT_EQ(1, v2.matches.size()); -// EXPECT_EQ(0, v2.matches[0]); -// v2.f.AllMatches("abc bar2 xyz", {0}, &v2.matches); -// EXPECT_EQ(0, v2.matches.size()); - -// // The moved-from object should have been reset and be reusable. -// v1.f.Add("bar\\d+", v1.opts, &id); -// EXPECT_EQ(0, id); -// v1.f.Compile(&v1.atoms); -// EXPECT_EQ(1, v1.atoms.size()); -// EXPECT_EQ("bar", v1.atoms[0]); -// v1.f.AllMatches("abc foo1 xyz", {0}, &v1.matches); -// EXPECT_EQ(0, v1.matches.size()); -// v1.f.AllMatches("abc bar2 xyz", {0}, &v1.matches); -// EXPECT_EQ(1, v1.matches.size()); -// EXPECT_EQ(0, v1.matches[0]); - -// // Verify that "overwriting" works and also doesn't leak memory. -// // (The latter will need a leak detector such as LeakSanitizer.) -// v1.f = std::move(v2.f); -// v1.f.AllMatches("abc foo1 xyz", {0}, &v1.matches); -// EXPECT_EQ(1, v1.matches.size()); -// EXPECT_EQ(0, v1.matches[0]); -// v1.f.AllMatches("abc bar2 xyz", {0}, &v1.matches); -// EXPECT_EQ(0, v1.matches.size()); -// } +TEST(FilteredRE2Test, MoveSemantics) { + FilterTestVars v1; + int id; + v1.f.Add("foo\\d+", v1.opts, &id); + EXPECT_EQ(0, id); + v1.f.Compile(&v1.atoms); + EXPECT_EQ(1, v1.atoms.size()); + EXPECT_EQ("foo", v1.atoms[0]); + v1.f.AllMatches("abc foo1 xyz", {0}, &v1.matches); + EXPECT_EQ(1, v1.matches.size()); + EXPECT_EQ(0, v1.matches[0]); + v1.f.AllMatches("abc bar2 xyz", {0}, &v1.matches); + EXPECT_EQ(0, v1.matches.size()); + + // The moved-to object should do what the moved-from object did. + FilterTestVars v2; + v2.f = std::move(v1.f); + v2.f.AllMatches("abc foo1 xyz", {0}, &v2.matches); + EXPECT_EQ(1, v2.matches.size()); + EXPECT_EQ(0, v2.matches[0]); + v2.f.AllMatches("abc bar2 xyz", {0}, &v2.matches); + EXPECT_EQ(0, v2.matches.size()); + + // The moved-from object should have been reset and be reusable. + v1.f.Add("bar\\d+", v1.opts, &id); + EXPECT_EQ(0, id); + v1.f.Compile(&v1.atoms); + EXPECT_EQ(1, v1.atoms.size()); + EXPECT_EQ("bar", v1.atoms[0]); + v1.f.AllMatches("abc foo1 xyz", {0}, &v1.matches); + EXPECT_EQ(0, v1.matches.size()); + v1.f.AllMatches("abc bar2 xyz", {0}, &v1.matches); + EXPECT_EQ(1, v1.matches.size()); + EXPECT_EQ(0, v1.matches[0]); + + // Verify that "overwriting" works and also doesn't leak memory. + // (The latter will need a leak detector such as LeakSanitizer.) + v1.f = std::move(v2.f); + v1.f.AllMatches("abc foo1 xyz", {0}, &v1.matches); + EXPECT_EQ(1, v1.matches.size()); + EXPECT_EQ(0, v1.matches[0]); + v1.f.AllMatches("abc bar2 xyz", {0}, &v1.matches); + EXPECT_EQ(0, v1.matches.size()); +} } // namespace re2