diff --git a/re2/filtered_re2.cc b/re2/filtered_re2.cc index db0a5c9bc5cdf6879aca702a3676fa80a742baa7..c738b8b90329899f19cdf81f4f00b0e7015b2852 100644 --- a/re2/filtered_re2.cc +++ b/re2/filtered_re2.cc @@ -519,34 +519,53 @@ void AtomsToRegexps(std::vector re2_vec_, std::vector atoms, std::vec * 如果没有原子, 那么直接会把re加进去。 * 如果这个正则表达式有原子,那么要把该正则表达式的所有的原子的索引全加入,这个正则表达式才能加入成功。 */ + // std::map map; + + std::vector atoms_total; + std::vector vec_per_num; + std::vector atoms_tmp; + std::vector re_v; + for(size_t i = 0; i < re2_vec_.size(); i++) { std::vector my_atoms = MyCompile(re2_vec_[i]->pattern(), min_atom_len); - if(my_atoms.size() == 0) + if(my_atoms.size() != 0) { + for(auto x : my_atoms) + atoms_total.push_back(x); + } + + } + for(size_t i = 0; i < atoms.size(); i++) + { + atoms_tmp.push_back(atoms_total[atoms[i]]); + } + for(size_t i = 0; i < re2_vec_.size(); i++) + { + std::vector my_atoms = MyCompile(re2_vec_[i]->pattern(), min_atom_len); + if(my_atoms.size() == 0){ regexps->push_back(i); continue; } else { - for(auto x : my_atoms) + int count = 0; + for(size_t ii = 0; ii < my_atoms.size(); ii++) { - int flag = 0; - for(auto y : atoms) + for(size_t jj = 0; jj < atoms_tmp.size(); jj++) { - if(x == my_atoms[y]) - continue; - else - { - flag = 1; + if(my_atoms[ii] == atoms_tmp[jj]){ + count++; break; } - if(flag == 0) regexps->push_back(i); } } + if(count == (int)my_atoms.size()) regexps->push_back(int(i)); } } + + } int FilteredRE2::FirstMatch(const StringPiece& text, diff --git a/re2/testing/filtered_re2_test.cc b/re2/testing/filtered_re2_test.cc index 3e25bf10b2faf2e9acca5f639db9dd18a9d4057f..7cace6d5e33b8a06428815ac897dbe71a4ec585c 100644 --- a/re2/testing/filtered_re2_test.cc +++ b/re2/testing/filtered_re2_test.cc @@ -1,7 +1,7 @@ // Copyright 2009 The RE2 Authors. All Rights Reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. - +#include #include #include #include @@ -338,5 +338,65 @@ TEST(FilteredRE2Test, MoveSemantics) { EXPECT_EQ(0, v1.matches.size()); } +TEST(FilteredRE2Test, SlowFirstMatch) { + FilterTestVars v; // override the minimum atom length + int id1; + v.f.Add("h.*o", v.opts, &id1); + int id2; + v.f.Add("(\\w+):(\\d+)", v.opts, &id2); + + v.f.Compile(&v.atoms); + EXPECT_EQ(0, v.atoms.size()); + + std::string text = "hello world"; + std::vector atom_ids; + std::vector matching_regexps; + + EXPECT_EQ(0, v.f.FirstMatch(text, atom_ids)); +} + +TEST(FilteredRE2Test, AllPotentials) { + FilterTestVars v; + AtomTest* t = &atom_tests[1]; + EXPECT_EQ("AllAtomsGtMinLengthFound", std::string(t->testname)); + size_t nregexp; + for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++) + if (t->regexps[nregexp] == NULL) + break; + AddRegexpsAndCompile(t->regexps, nregexp, &v); + std::vector atoms; + atoms.push_back(0); + atoms.push_back(1); + atoms.push_back(2); + atoms.push_back(3); + atoms.push_back(4); + atoms.push_back(5); + atoms.push_back(6); + atoms.push_back(7); + std::vector potential_regexps; + v.f.AllPotentials(atoms, &potential_regexps); + EXPECT_EQ(3 ,potential_regexps.size()); + +} + +TEST(FilteredRE2Test, RegexpsGivenStrings) { + FilterTestVars v; + AtomTest* t = &atom_tests[2]; + + EXPECT_EQ("SubstrAtomRemovesSuperStrInOr", std::string(t->testname)); + size_t nregexp; + for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++) + if (t->regexps[nregexp] == NULL) + break; + AddRegexpsAndCompile(t->regexps, nregexp, &v); + std::vector atoms; + + atoms.push_back(5); + atoms.push_back(6); + std::vector potential_regexps; + v.f.AllPotentials(atoms, &potential_regexps); + EXPECT_EQ(1 ,potential_regexps.size()); +} + } // namespace re2