From 0fc67cce6533dd104f1b0f7a9bc87478d4836ad8 Mon Sep 17 00:00:00 2001 From: yangwentong <425822674@qq.com> Date: Thu, 29 Sep 2022 02:38:14 +0800 Subject: [PATCH 1/2] =?UTF-8?q?=E4=BF=AE=E5=A4=8DAtomsToRegexps=E6=8E=A5?= =?UTF-8?q?=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- re2/filtered_re2.cc | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/re2/filtered_re2.cc b/re2/filtered_re2.cc index db0a5c9..c738b8b 100644 --- a/re2/filtered_re2.cc +++ b/re2/filtered_re2.cc @@ -519,34 +519,53 @@ void AtomsToRegexps(std::vector re2_vec_, std::vector atoms, std::vec * 如果没有原子, 那么直接会把re加进去。 * 如果这个正则表达式有原子,那么要把该正则表达式的所有的原子的索引全加入,这个正则表达式才能加入成功。 */ + // std::map map; + + std::vector atoms_total; + std::vector vec_per_num; + std::vector atoms_tmp; + std::vector re_v; + for(size_t i = 0; i < re2_vec_.size(); i++) { std::vector my_atoms = MyCompile(re2_vec_[i]->pattern(), min_atom_len); - if(my_atoms.size() == 0) + if(my_atoms.size() != 0) { + for(auto x : my_atoms) + atoms_total.push_back(x); + } + + } + for(size_t i = 0; i < atoms.size(); i++) + { + atoms_tmp.push_back(atoms_total[atoms[i]]); + } + for(size_t i = 0; i < re2_vec_.size(); i++) + { + std::vector my_atoms = MyCompile(re2_vec_[i]->pattern(), min_atom_len); + if(my_atoms.size() == 0){ regexps->push_back(i); continue; } else { - for(auto x : my_atoms) + int count = 0; + for(size_t ii = 0; ii < my_atoms.size(); ii++) { - int flag = 0; - for(auto y : atoms) + for(size_t jj = 0; jj < atoms_tmp.size(); jj++) { - if(x == my_atoms[y]) - continue; - else - { - flag = 1; + if(my_atoms[ii] == atoms_tmp[jj]){ + count++; break; } - if(flag == 0) regexps->push_back(i); } } + if(count == (int)my_atoms.size()) regexps->push_back(int(i)); } } + + } int FilteredRE2::FirstMatch(const StringPiece& text, -- Gitee From 1c90c5f209e32353441bd5a1c20454e83f839342 Mon Sep 17 00:00:00 2001 From: yangwentong <425822674@qq.com> Date: Thu, 29 Sep 2022 02:40:01 +0800 Subject: [PATCH 2/2] =?UTF-8?q?=E4=B8=BAfiltered=5Fre2=5Ftest.cc=E5=A2=9E?= =?UTF-8?q?=E5=8A=A0=E4=B8=89=E7=BB=84=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B?= =?UTF-8?q?=EF=BC=8C=E5=88=86=E5=88=AB=E5=AF=B9SlowFirstMatch=E3=80=81AllP?= =?UTF-8?q?otentials=E5=92=8CRegexpsGivenStrings=E8=BF=9B=E8=A1=8C?= =?UTF-8?q?=E4=BA=86=E6=B5=8B=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- re2/testing/filtered_re2_test.cc | 62 +++++++++++++++++++++++++++++++- 1 file changed, 61 insertions(+), 1 deletion(-) diff --git a/re2/testing/filtered_re2_test.cc b/re2/testing/filtered_re2_test.cc index 3e25bf1..7cace6d 100644 --- a/re2/testing/filtered_re2_test.cc +++ b/re2/testing/filtered_re2_test.cc @@ -1,7 +1,7 @@ // Copyright 2009 The RE2 Authors. All Rights Reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. - +#include #include #include #include @@ -338,5 +338,65 @@ TEST(FilteredRE2Test, MoveSemantics) { EXPECT_EQ(0, v1.matches.size()); } +TEST(FilteredRE2Test, SlowFirstMatch) { + FilterTestVars v; // override the minimum atom length + int id1; + v.f.Add("h.*o", v.opts, &id1); + int id2; + v.f.Add("(\\w+):(\\d+)", v.opts, &id2); + + v.f.Compile(&v.atoms); + EXPECT_EQ(0, v.atoms.size()); + + std::string text = "hello world"; + std::vector atom_ids; + std::vector matching_regexps; + + EXPECT_EQ(0, v.f.FirstMatch(text, atom_ids)); +} + +TEST(FilteredRE2Test, AllPotentials) { + FilterTestVars v; + AtomTest* t = &atom_tests[1]; + EXPECT_EQ("AllAtomsGtMinLengthFound", std::string(t->testname)); + size_t nregexp; + for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++) + if (t->regexps[nregexp] == NULL) + break; + AddRegexpsAndCompile(t->regexps, nregexp, &v); + std::vector atoms; + atoms.push_back(0); + atoms.push_back(1); + atoms.push_back(2); + atoms.push_back(3); + atoms.push_back(4); + atoms.push_back(5); + atoms.push_back(6); + atoms.push_back(7); + std::vector potential_regexps; + v.f.AllPotentials(atoms, &potential_regexps); + EXPECT_EQ(3 ,potential_regexps.size()); + +} + +TEST(FilteredRE2Test, RegexpsGivenStrings) { + FilterTestVars v; + AtomTest* t = &atom_tests[2]; + + EXPECT_EQ("SubstrAtomRemovesSuperStrInOr", std::string(t->testname)); + size_t nregexp; + for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++) + if (t->regexps[nregexp] == NULL) + break; + AddRegexpsAndCompile(t->regexps, nregexp, &v); + std::vector atoms; + + atoms.push_back(5); + atoms.push_back(6); + std::vector potential_regexps; + v.f.AllPotentials(atoms, &potential_regexps); + EXPECT_EQ(1 ,potential_regexps.size()); +} + } // namespace re2 -- Gitee