From ac03678d66c55e7af2ec0c2eadac372f603ab649 Mon Sep 17 00:00:00 2001 From: yangwentong <425822674@qq.com> Date: Mon, 17 Oct 2022 21:44:13 +0800 Subject: [PATCH 1/2] =?UTF-8?q?=E4=BC=98=E5=8C=96set.cc=E4=B8=AD=E7=9A=84A?= =?UTF-8?q?dd=E5=92=8CMatch?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- re2/set.cc | 57 ++++++++++++++++++------------------------------------ 1 file changed, 19 insertions(+), 38 deletions(-) diff --git a/re2/set.cc b/re2/set.cc index 7710617..4ece785 100644 --- a/re2/set.cc +++ b/re2/set.cc @@ -79,7 +79,7 @@ namespace re2 } else { - elem_.push_back(pair(pattern.as_string(), nullptr)); + elem_.push_back(pair(pattern.as_string(), (re2::Regexp*)re)); size_++; // rure_free(re); return place_num; @@ -145,7 +145,6 @@ namespace re2 { if(v == NULL) { - bool result = rure_set_is_match((rure_set *)prog_.get(), (const uint8_t *)pat_str, length, 0); return result; @@ -165,49 +164,31 @@ namespace re2 } break; } - case RE2::ANCHOR_BOTH: { - if(v == NULL) + bool matches[elem_.size()]; + bool result = rure_set_matches((rure_set *)prog_.get(), + (const uint8_t *)pat_str, length, 0, matches); + if(!result) return false; + if(v != NULL) v->clear(); + for(size_t i = 0; i < elem_.size(); i++) { - bool matches[elem_.size()]; - bool result = rure_set_matches((rure_set *)prog_.get(), - (const uint8_t *)pat_str, length, 0, matches); - if(!result) return false; - for(size_t i = 0; i < elem_.size(); i++) + if(matches[i]) { - if(matches[i]) + rure *re = (rure*)elem_[i].second; + rure_match match = {0}; + rure_find(re, (const uint8_t *)pat_str, strlen(pat_str), + 0, &match); + if(match.start == 0 && match.end == strlen(pat_str)) { - const char *pattern = elem_[i].first.c_str(); - rure *re = rure_compile_must(pattern); - rure_match match = {0}; - rure_find(re, (const uint8_t *)pat_str, strlen(pat_str), - 0, &match); - if(match.start == 0 && match.end == strlen(pat_str)) return true; - } - + if(v) v->push_back(i); // v不空的情形,把索引加入到v中 + else return true; // v为NULL, 直接返回匹配成功的情形 + } } - return false; } - else - { - v->clear(); - bool matches[elem_.size()]; - bool result = rure_set_matches((rure_set *)prog_.get(), - (const uint8_t *)pat_str, length, 0, matches); - if(!result) return false; - for(size_t i = 0; i < elem_.size(); i++) - { - if(matches[i]) - { - const char *pattern = elem_[i].first.c_str(); - rure *re = rure_compile_must(pattern); - rure_match match = {0}; - rure_find(re, (const uint8_t *)pat_str, strlen(pat_str), - 0, &match); - if(match.start == 0 && match.end == strlen(pat_str)) v->push_back(i); - } - } + if(v == NULL) return false; // v为空的情况 + else // v不为空的情况,若经过处理后v中存储了相关索引,则返回true,否则false + { if(v->size()) return true; else return false; } -- Gitee From e2c04ea41f5d94a22bcddef32cb00433e26254ca Mon Sep 17 00:00:00 2001 From: yangwentong <425822674@qq.com> Date: Mon, 17 Oct 2022 21:58:57 +0800 Subject: [PATCH 2/2] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dset.cc=E4=B8=ADMatch?= =?UTF-8?q?=E5=87=BD=E6=95=B0=EF=BC=8C=E6=9B=B4=E6=AD=A3TEST(Set,=20Anchor?= =?UTF-8?q?Start)=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- re2/set.cc | 59 ++++++++++++++++++++++++++++++----------- re2/testing/set_test.cc | 13 +++------ 2 files changed, 47 insertions(+), 25 deletions(-) diff --git a/re2/set.cc b/re2/set.cc index 4ece785..76aa7dd 100644 --- a/re2/set.cc +++ b/re2/set.cc @@ -196,27 +196,54 @@ namespace re2 } case RE2::ANCHOR_START: { - if(v == NULL) + bool matches[elem_.size()]; + bool result = rure_set_matches((rure_set *)prog_.get(), + (const uint8_t *)pat_str, length, 0, matches); + if(!result) return false; + if(v != NULL) v->clear(); + for(size_t i = 0; i < elem_.size(); i++) { - - bool result = rure_set_is_match((rure_set *)prog_.get(), - (const uint8_t *)pat_str, length, 0); - return result; - } - else - { - v->clear(); - bool matches[elem_.size()]; - bool result = rure_set_matches((rure_set *)prog_.get(), - (const uint8_t *)pat_str, length, 0, matches); - if(!result) return false; - for(size_t i = 0; i < elem_.size(); i++) + if(matches[i]) { - if(matches[i]) v->push_back(i); + rure *re = (rure*)elem_[i].second; + rure_match match = {0}; + rure_find(re, (const uint8_t *)pat_str, strlen(pat_str), + 0, &match); + if(match.start == 0) + { + if(v) v->push_back(i); // v不空的情形,把索引加入到v中 + else return true; // v为NULL, 直接返回匹配成功的情形 + } } - return true; + } + if(v == NULL) return false; // v为空的情况 + else // v不为空的情况,若经过处理后v中存储了相关索引,则返回true,否则false + { + if(v->size()) return true; + else return false; } break; + // if(v == NULL) + // { + + // bool result = rure_set_is_match((rure_set *)prog_.get(), + // (const uint8_t *)pat_str, length, 0); + // return result; + // } + // else + // { + // v->clear(); + // bool matches[elem_.size()]; + // bool result = rure_set_matches((rure_set *)prog_.get(), + // (const uint8_t *)pat_str, length, 0, matches); + // if(!result) return false; + // for(size_t i = 0; i < elem_.size(); i++) + // { + // if(matches[i]) v->push_back(i); + // } + // return true; + // } + // break; } } return true; diff --git a/re2/testing/set_test.cc b/re2/testing/set_test.cc index b03c967..a7feb4c 100644 --- a/re2/testing/set_test.cc +++ b/re2/testing/set_test.cc @@ -51,21 +51,16 @@ TEST(Set, AnchorStart) { ASSERT_EQ(s.Match("foobar", NULL), true); ASSERT_EQ(s.Match("fooba", NULL), true); - ASSERT_EQ(s.Match("oobar", NULL), true); + ASSERT_EQ(s.Match("oobar", NULL), false); std::vector v; ASSERT_EQ(s.Match("foobar", &v), true); - ASSERT_EQ(v.size(), 2); - ASSERT_EQ(v[0], 0); - ASSERT_EQ(v[1], 1); - - ASSERT_EQ(s.Match("fooba", &v), true); ASSERT_EQ(v.size(), 1); ASSERT_EQ(v[0], 0); - ASSERT_EQ(s.Match("oobar", &v), true); - ASSERT_EQ(v.size(), 1); - ASSERT_EQ(v[0], 1); + ASSERT_EQ(s.Match("oobar", &v), false); + ASSERT_EQ(v.size(), 0); + } TEST(Set, UnanchoredFactored) { -- Gitee