From 8979c4a6f685396b95706ce11d62524e81f74f1f Mon Sep 17 00:00:00 2001
From: yangwentong <425822674@qq.com>
Date: Tue, 3 Jan 2023 11:40:11 +0800
Subject: [PATCH 1/4] =?UTF-8?q?=E6=9B=B4=E6=96=B0README?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md | 75 ++++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 61 insertions(+), 14 deletions(-)

diff --git a/README.md b/README.md
index c734488..d24bf0e 100644
--- a/README.md
+++ b/README.md
@@ -1,17 +1,19 @@
 # re2-rust
 
-a compatible RE2 API(
-2021-11-01)  by calling Rust library [regex](https://github.com/rust-lang/regex)
+## re2-rust介绍
+re2-rust是用来兼容RE2 API[(version 2021-11-01)](https://github.com/google/re2/tree/2021-11-01)的项目，通过调用[Rust正则表达式库](https://github.com/rust-lang/regex)进行实现。re2-rust的功能与原本RE2基本保持一致。
 
+re2-rust保留了re2中的对外的接口，分别在re2.h、set.h和filtered_re2.h中。
 
+re2.h中的接口可以实现正则表达式的匹配、查找和替换的功能；set.h中的接口可以同时处理多组正则表达式；filtered_re.h中的接口提供了一种预过滤机制，有助于减少需要实际搜索的regexp的数量。这些接口再调用Rust正则库中提供的接口对用户传递过来的数据进行处理，最后再把结果进行返回。
+
+
+## 编译、安装re2-rust
 ``` Shell
 $ git clone https://gitee.com/openeuler/re2-rust.git
 $ cd re2-rust
 ```
-
-### 编译、安装re2-rust
-
-使用openEuler 22.03-LTS
+**使用openEuler 22.03-LTS**
 
 ``` Shell
 dnf install git
@@ -27,7 +29,7 @@ g++ testinstall.cc -o testinstall -lre2
 ./testinstall
 ```
 
-Ubuntu 20.04
+**使用Ubuntu 20.04**
 
 ``` Shell
 $ make
@@ -37,8 +39,52 @@ $ g++ testinstall.cc -o testinstall -lre2
 $ ./testinstall
 ```
 
-## Test Rusults
+## 性能测试
+RE2-Rust项目中只需要对re2目录下filtered_re2.h、re2.h、set.h文件中声明的部分函数进行性能测试，而filtered_re2.h中的主要函数是通过调用re2.h中的`PartialMatch()`函数实现的，所以下面只对re2.h和set.h文件中主要函数进行性能测试。相关的性能测试代码详见regexp_benchmark.cc文件。
+re2.h文件中相关函数的性能测试：
+我们对re2.h对外接口中的`FullMatch()`、`PartialMatch()`、`FindAndConsume()`三个函数进行了测试，下面表格中的re2-c++和re2-rust分别通过上述三个函数测试了表格中的八个正则表达式，但由于这三个函数的本质是调用了`RE2::DoMatch()`函数，所以在表格中不对上面三个函数进行区分。下面表格是regexp_benchmark.cc中一些正则表达式在text_re2_1KB.txt文本下的执行时间。
+
+| 正则表达式  （含义）                    | RE2-C++       | RE2-Rust      | PCRE         | Regex         |
+| --------------------------------------- | ------------- | ------------- | ------------ | ------------- |
+| `“”`                                    | 339 ns/iter   | 213 ns/iter   | 133 ns/iter  | 54 ns/iter    |
+| **空**                                  | 3019.80 MB/s  | 4785.82 MB/s  | 7653.53 MB/s | 18890.09 MB/s |
+| `"abcdefg"`                             | 820 ns/iter   | 259 ns/iter   | 1686 ns/iter | 97 ns/iter    |
+| **匹配abcdefg字符串**                   | 1248.70 MB/s  | 3951.78 MB/s  | 607.26 MB/s  | 10507.00 MB/s |
+| `"(?-s)^(?:GET|POST) +([^ ]+) HTTP"`    | 343 ns/iter   | 246 ns/iter   | 147 ns/iter  | 92 ns/iter    |
+| **匹配HTTP请求报文格式**                | 2982.79 MB/s  | 4157.21 MB/s  | 6932.47 MB/s | 11096.98 MB/s |
+| `"(?-s)^(.+)"`                          | 542 ns/iter   | 212 ns/iter   | 203 ns/iter  | 56 ns/iter    |
+| **匹配行首连续出现一次以上的字符**      | 1886.72 MB/s  | 4807.80 MB/s  | 5031.92 MB/s | 18062.85 MB/s |
+| ` "(?-s)^([ -~]+)"`                     | 557 ns/iter   | 217 ns/iter   | 190 ns/iter  | 59 ns/iter    |
+| **匹配行首连续出现一次以上的ASCII字符** | 1835.28 MB/s  | 4715.87 MB/s  | 5365.38 MB/s | 17188.64 MB/s |
+| `"(?s).*"`                              | 349 ns/iter   | 21223 ns/iter | 154 ns/iter  | 2588 ns/iter  |
+| **匹配任意字符**                        | 2929.63 MB/s  | 48.25 MB/s    | 6640.02 MB/s | 395.62 MB/s   |
+| `"(?s).*$"`                             | 11401 ns/iter | 19678 ns/iter | 159 ns/iter  | 2468 ns/iter  |
+| **匹配任意字符**                        | 89.81 MB/s    | 52.04 MB/s    | 6415.22 MB/s | 414.86 MB/s   |
+| `"(?s)((.*)()()($))"`                   | 11179 ns/iter | 19873 ns/iter | 260 ns/iter  | 2488 ns/iter  |
+| **匹配任意字符**                        | 91.59 MB/s    | 51.53 MB/s    | 3937.18 MB/s | 411.54 MB/s   |
+
+注：(?s)表示单行模式
+
+set.h文件中相关函数的性能测试：
+
+可以看到，set.h中主要是下面的函数接口有匹配功能：
+
+`bool Match(const StringPiece& text, std::vector* v) const;`
 
+上述函数功能为同一文本可同时匹配多个正则表达式，并将匹配到的结果保存到向量v中，若传入的v为空则表示不需要返回匹配结果。
+
+我们使用的待匹配文本还是text_re2_1KB.txt中的数据，同时匹配五个正则表达式，分别是`"(?s).*"`、`"(?s).*"`、`"(?s)((.*)()()("`、`"(?*s*)((.∗)()()())"`、`"hwx"`、`"ldi"`。
+
+由于对于锚点为`RE2::UNANCHORED`、`RE2::ANCHOR_BOTH`、`RE2::ANCHOR_START`三种不同情况已经在`RE2::Set::Add()`已经进行了处理，所以对锚点三种不同情况的处理并不计算在匹配时间。为方便RE2-Rust与RE2-C++、Regex进行性能对比分析，我们采用锚点为RE2::UNANCHORED进行性能对比，详细性能评测代码见regexp_benchmark.cc文件中`Set_Match_UNANCHORED_RE2()`和`Set_Match_UNANCHORED_NULL_RE2()`函数。下面是set.h文件中`RE2::Set::Match()`在RE2-C++、RE2-Rust、Regex三种不同正则表达式框架下的性能对比结果（PCRE不支持同时匹配多个正则表达式）：
+
+|             | RE2-C++      | RE2-Rust     | Regex        |
+| ----------- | ------------ | ------------ | ------------ |
+| **V为空**   | 1716 ns/iter | 383 ns/iter  | 18 ns/iter   |
+|             | 596.67 MB/s  | 2671.52 MB/s | 56944 MB/s   |
+| **V不为空** | 8231 ns/iter | 535 ns/iter  | 6686 ns/iter |
+|             | 124.40 MB/s  | 1910.52 MB/s | 153 MB/s     |
+
+另外我们采用第三方正则表达式测试框架regex-performance，通过一些指定的正则表达式，对主流的正则表达式库进行了评测（测试详情可见https://gitee.com/openeuler/re2-rust/blob/master/test-results.txt），得到了如下结果：
 ```
 Total Results:
 [      ctre] time:  4010462.7 ms, score:      6 points,
@@ -56,11 +102,15 @@ Total Results:
 [rust_regex] time:   4790.2 ms, score:     56 points,
 [rust_regrs] time:  47772.1 ms, score:      6 points,
 ```
-从测试结果看re2-rust评分比re2略高，但是耗时re2-rust比re2增加很多，通过仔细分析发现正则表达式'[a-q][^u-z]{13}x'耗时特别高4280.7 - 130.5 = 4150.2 ms，另外'\b\w+nn\b'耗时322.6 - 23.9 = 298.7，除去这两个异常测试项外的16个测试项耗时re2-rust：334.4 ms vs. re2: 362 ms ，也就是说re2-rust在大多数情况下性能比re2要好。
-
+从以上测试结果看re2-rust评分比re2略高，但是耗时re2-rust比re2增加很多，通过仔细分析发现正则表达式`'[a-q][^u-z]{13}x'`耗时特别高4280.7 - 130.5 = 4150.2 ms，另外`'\b\w+nn\b'`耗时322.6 - 23.9 = 298.7，除去这两个异常测试项外的16个测试项耗时re2-rust：334.4 ms vs. re2: 362 ms ，也就是说re2-rust在大多数情况下性能比re2要好。
 从测试耗时看re2-rust和rust_regex两者相差3%（多次测评结果看两者差距上下浮动5%以内），总体看re2-rust和rust_regex性能基本一致。
+综合对比可知：
+
+1.  RE2-Rust在大部分测试用例下性能优于RE2-C++，而在涉及到捕获组会差于RE2-C++，原因可见https://github.com/rust-lang/regex/discussions/903
+2.  RE2-rust和Regex性能大致相当，但是由于RE2-Rust是调用了Regex的对外的C接口，所以RE2-rust会比Regex多了函数调用开销、特殊处理、错误判断等开销，故RE2-Rust性能会略低于Regex
+3.  RE2-Rust支持多行模式，但不支持同名的捕获组
+4.  RE2-Rust比RE2-C++支持更少的转义字符，比如`”\C”`
 
-测试采用第三方正则表达式测试框架regex-performance，测试详情见test-results.txt
 
 # Links
 
@@ -68,6 +118,3 @@ Total Results:
 * https://gitee.com/src-openeuler/re2
 * https://github.com/google/re2
 * https://gitee.com/mengning997/regex-performance for re2-rust
-
-
-
-- 
Gitee


From c216ad3cc7ab0c6354b154fd0405bd24adf8f3b1 Mon Sep 17 00:00:00 2001
From: yangwentong <425822674@qq.com>
Date: Wed, 4 Jan 2023 12:10:54 +0800
Subject: [PATCH 2/4] =?UTF-8?q?=E8=A7=A3=E5=86=B3=E5=A4=9A=E7=BA=BF?=
 =?UTF-8?q?=E7=A8=8B=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 re2/re2.cc | 43 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 32 insertions(+), 11 deletions(-)

diff --git a/re2/re2.cc b/re2/re2.cc
index 4091209..4a3d846 100644
--- a/re2/re2.cc
+++ b/re2/re2.cc
@@ -233,14 +233,13 @@ namespace re2
       delete group_names_;
   }
 
-  // Returns named_groups_, computing it if needed.
-  const std::map<std::string, int> &RE2::NamedCapturingGroups() const
+  std::map<std::string, int> *NamedCaptures(re2::Prog *prog)
   {
     std::map<std::string, int> *temp = new std::map<std::string, int>;
     std::string str;
     char *name;
     int i = 0;
-    rure_iter_capture_names *it = rure_iter_capture_names_new((rure *)prog_);
+    rure_iter_capture_names *it = rure_iter_capture_names_new((rure *)prog);
     while (rure_iter_capture_names_next(it, &name))
     {
       str = name;
@@ -248,19 +247,16 @@ namespace re2
         temp->insert(make_pair(str, i));
       ++i;
     }
-    named_groups_ = temp;
-
-    return *named_groups_;
+    return temp;
   }
-  
-  // Returns group_names_, computing it if needed.
-  const std::map<int, std::string> &RE2::CapturingGroupNames() const
+
+  std::map<int, std::string> *CaptureNames(re2::Prog *prog)
   {
     std::map<int, std::string> *temp = new std::map<int, std::string>;
     std::string str;
     char *name;
     int i = 0;
-    rure_iter_capture_names *it = rure_iter_capture_names_new((rure *)prog_);
+    rure_iter_capture_names *it = rure_iter_capture_names_new((rure *)prog);
     while (rure_iter_capture_names_next(it, &name))
     {
       str = name;
@@ -268,7 +264,32 @@ namespace re2
         temp->insert(make_pair(i, str));
       ++i;
     }
-    group_names_ = temp;
+    return temp;
+  }
+
+  // Returns named_groups_, computing it if needed.
+  const std::map<std::string, int> &RE2::NamedCapturingGroups() const
+  {
+    std::call_once(named_groups_once_, [](const RE2* re) {
+      if (re->suffix_regexp_ != NULL)
+      {
+        re->named_groups_ = NamedCaptures(re->prog_);
+      } 
+      if (re->named_groups_ == NULL)
+        re->named_groups_ = empty_named_groups;
+      }, this);
+    return *named_groups_;
+  }
+  
+  // Returns group_names_, computing it if needed.
+  const std::map<int, std::string> &RE2::CapturingGroupNames() const
+  {
+    std::call_once(group_names_once_, [](const RE2* re) {
+      if (re->suffix_regexp_ != NULL)
+        re->group_names_ = CaptureNames(re->prog_);
+      if (re->group_names_ == NULL)
+        re->group_names_ = empty_group_names;
+    }, this);
 
     return *group_names_;
   }
-- 
Gitee


From 8ee853dddd0684835d43d2527ae7806c6f1f11e6 Mon Sep 17 00:00:00 2001
From: yangwentong <425822674@qq.com>
Date: Wed, 4 Jan 2023 12:14:11 +0800
Subject: [PATCH 3/4] =?UTF-8?q?=E8=A7=A3=E5=86=B3=E5=86=85=E5=AD=98?=
 =?UTF-8?q?=E6=B3=84=E6=BC=8F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 re2/re2.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/re2/re2.cc b/re2/re2.cc
index 4a3d846..6b63c67 100644
--- a/re2/re2.cc
+++ b/re2/re2.cc
@@ -187,6 +187,7 @@ namespace re2
         error_ = new std::string(msg);
         error_code_ = ErrorInternal; // 暂时对这个错误进行赋值，如何处理错误类型？？？
       }
+      rure_error_free(err);
       return;
     }
     prog_ = (Prog *)re;
-- 
Gitee


From 445cf2f364accb90f4b6d0c3d88ee4c69671e33f Mon Sep 17 00:00:00 2001
From: yangwentong <425822674@qq.com>
Date: Wed, 4 Jan 2023 14:08:58 +0800
Subject: [PATCH 4/4] =?UTF-8?q?=E6=9B=B4=E6=94=B9Latin1=E7=BC=96=E7=A0=81?=
 =?UTF-8?q?=E7=9A=84=E5=AD=97=E7=AC=A6=E4=B8=B2=E8=BD=AC=E6=8D=A2=E4=B8=BA?=
 =?UTF-8?q?UTF8=E7=9A=84=E5=AD=97=E7=AC=A6=E4=B8=B2=E7=9A=84=E6=96=B9?=
 =?UTF-8?q?=E5=BC=8F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 re2/re2.cc | 154 +++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 127 insertions(+), 27 deletions(-)

diff --git a/re2/re2.cc b/re2/re2.cc
index 6b63c67..54a572c 100644
--- a/re2/re2.cc
+++ b/re2/re2.cc
@@ -94,30 +94,125 @@ namespace re2
     Init(pattern, options);
   }
 
-  std::string encodingLatin1ToUTF8(std::string str)
+  typedef signed int Rune; /* Code-point values in Unicode 4.0 are 21 bits wide.*/
+
+  enum
+  {
+    UTFmax = 4,         /* maximum bytes per rune */
+    Runesync = 0x80,    /* cannot represent part of a UTF sequence (<) */
+    Runeself = 0x80,    /* rune and UTF sequences are the same (<) */
+    Runeerror = 0xFFFD, /* decoding error in UTF */
+    Runemax = 0x10FFFF, /* maximum rune value */
+  };
+
+  enum
+  {
+    Bit1 = 7,
+    Bitx = 6,
+    Bit2 = 5,
+    Bit3 = 4,
+    Bit4 = 3,
+    Bit5 = 2,
+
+    T1 = ((1 << (Bit1 + 1)) - 1) ^ 0xFF, /* 0000 0000 */
+    Tx = ((1 << (Bitx + 1)) - 1) ^ 0xFF, /* 1000 0000 */
+    T2 = ((1 << (Bit2 + 1)) - 1) ^ 0xFF, /* 1100 0000 */
+    T3 = ((1 << (Bit3 + 1)) - 1) ^ 0xFF, /* 1110 0000 */
+    T4 = ((1 << (Bit4 + 1)) - 1) ^ 0xFF, /* 1111 0000 */
+    T5 = ((1 << (Bit5 + 1)) - 1) ^ 0xFF, /* 1111 1000 */
+
+    Rune1 = (1 << (Bit1 + 0 * Bitx)) - 1, /* 0000 0000 0111 1111 */
+    Rune2 = (1 << (Bit2 + 1 * Bitx)) - 1, /* 0000 0111 1111 1111 */
+    Rune3 = (1 << (Bit3 + 2 * Bitx)) - 1, /* 1111 1111 1111 1111 */
+    Rune4 = (1 << (Bit4 + 3 * Bitx)) - 1,
+    /* 0001 1111 1111 1111 1111 1111 */
+
+    Maskx = (1 << Bitx) - 1, /* 0011 1111 */
+    Testx = Maskx ^ 0xFF,    /* 1100 0000 */
+
+    Bad = Runeerror,
+  };
+  int runetochar(char *str, const Rune *rune)
   {
-    string strOut;
-    for (std::string::iterator it = str.begin(); it != str.end(); ++it)
+    /* Runes are signed, so convert to unsigned for range check. */
+    unsigned long c;
+
+    /*
+     * one character sequence
+     *	00000-0007F => 00-7F
+     */
+    c = *rune;
+    if (c <= Rune1)
     {
-      uint8_t ch = *it;
-      if (ch < 0x80)
-      {
-        strOut.push_back(ch);
-      }
-      else
-      {
-        strOut.push_back(0xc0 | ch >> 6);
-        strOut.push_back(0x80 | (ch & 0x3f));
-      }
+      str[0] = static_cast<char>(c);
+      return 1;
+    }
+
+    /*
+     * two character sequence
+     *	0080-07FF => T2 Tx
+     */
+    if (c <= Rune2)
+    {
+      str[0] = T2 | static_cast<char>(c >> 1 * Bitx);
+      str[1] = Tx | (c & Maskx);
+      return 2;
+    }
+
+    /*
+     * If the Rune is out of range, convert it to the error rune.
+     * Do this test here because the error rune encodes to three bytes.
+     * Doing it earlier would duplicate work, since an out of range
+     * Rune wouldn't have fit in one or two bytes.
+     */
+    if (c > Runemax)
+      c = Runeerror;
+
+    /*
+     * three character sequence
+     *	0800-FFFF => T3 Tx Tx
+     */
+    if (c <= Rune3)
+    {
+      str[0] = T3 | static_cast<char>(c >> 2 * Bitx);
+      str[1] = Tx | ((c >> 1 * Bitx) & Maskx);
+      str[2] = Tx | (c & Maskx);
+      return 3;
+    }
+
+    /*
+     * four character sequence (21-bit value)
+     *     10000-1FFFFF => T4 Tx Tx Tx
+     */
+    str[0] = T4 | static_cast<char>(c >> 3 * Bitx);
+    str[1] = Tx | ((c >> 2 * Bitx) & Maskx);
+    str[2] = Tx | ((c >> 1 * Bitx) & Maskx);
+    str[3] = Tx | (c & Maskx);
+    return 4;
+  }
+
+  // Converts latin1 (assumed to be encoded as Latin1 bytes)
+  // into UTF8 encoding in string.
+  // Can't use EncodingUtils::EncodeLatin1AsUTF8 because it is
+  // deprecated and because it rejects code points 0x80-0x9F.
+  void ConvertLatin1ToUTF8(const StringPiece &latin1, std::string *utf)
+  {
+    char buf[UTFmax];
+
+    utf->clear();
+    for (size_t i = 0; i < latin1.size(); i++)
+    {
+      Rune r = latin1[i] & 0xFF;
+      int n = runetochar(buf, &r);
+      utf->append(buf, n);
     }
-    return strOut;
   }
 
   void RE2::Init(const StringPiece &pattern, const Options &options)
   {
     std::string rure_str; // 正则表达式UTF-8编码形式
     static std::once_flag empty_once;
-    std::call_once(empty_once, []() { //为了解决多线程中出现的资源竞争导致的数据不一致问题
+    std::call_once(empty_once, []() { // 为了解决多线程中出现的资源竞争导致的数据不一致问题
       empty_string = new std::string;
       empty_named_groups = new std::map<std::string, int>;
       empty_group_names = new std::map<int, std::string>;
@@ -149,7 +244,7 @@ namespace re2
     }
     else
     { // Latin-1编码
-      rure_str = encodingLatin1ToUTF8(pattern.ToString());
+      ConvertLatin1ToUTF8(pattern, &rure_str);
     }
 
     uint32_t flags = RURE_DEFAULT_FLAGS;
@@ -163,7 +258,7 @@ namespace re2
 
     // for All
     rure *re = rure_compile((const uint8_t *)rure_str.c_str(), strlen(rure_str.c_str()), flags, NULL, err);
-    //如果编译失败，打印错误信息
+    // 如果编译失败，打印错误信息
     if (re == NULL)
     {
       const char *msg = rure_error_message(err);
@@ -206,7 +301,7 @@ namespace re2
       entire_regexp_ = (re2::Regexp *)re;
     }
 
-    //获取捕获组的数量, 并对num_captures_其进行赋值
+    // 获取捕获组的数量, 并对num_captures_其进行赋值
     rure_captures *caps = rure_captures_new(re);
     size_t captures_len = rure_captures_len(caps) - 1;
     if (!options_.never_capture())
@@ -271,26 +366,30 @@ namespace re2
   // Returns named_groups_, computing it if needed.
   const std::map<std::string, int> &RE2::NamedCapturingGroups() const
   {
-    std::call_once(named_groups_once_, [](const RE2* re) {
+    std::call_once(
+        named_groups_once_, [](const RE2 *re)
+        {
       if (re->suffix_regexp_ != NULL)
       {
         re->named_groups_ = NamedCaptures(re->prog_);
       } 
       if (re->named_groups_ == NULL)
-        re->named_groups_ = empty_named_groups;
-      }, this);
+        re->named_groups_ = empty_named_groups; },
+        this);
     return *named_groups_;
   }
-  
+
   // Returns group_names_, computing it if needed.
   const std::map<int, std::string> &RE2::CapturingGroupNames() const
   {
-    std::call_once(group_names_once_, [](const RE2* re) {
+    std::call_once(
+        group_names_once_, [](const RE2 *re)
+        {
       if (re->suffix_regexp_ != NULL)
         re->group_names_ = CaptureNames(re->prog_);
       if (re->group_names_ == NULL)
-        re->group_names_ = empty_group_names;
-    }, this);
+        re->group_names_ = empty_group_names; },
+        this);
 
     return *group_names_;
   }
@@ -505,7 +604,8 @@ namespace re2
     // Latin-1编码转换
     if (options_.encoding() == RE2::Options::EncodingLatin1)
     {
-      haystack = encodingLatin1ToUTF8(text.as_string());
+      ConvertLatin1ToUTF8(text, &haystack);
+      // haystack = encodingLatin1ToUTF8(text.as_string());
     }
     rure *re = (rure *)prog_;
     // rure *re1 = (rure *)rprog_;
@@ -683,7 +783,7 @@ namespace re2
     // vec 用于存放捕获到的数据
     // nvec 表示需要捕获的数据的个数
 
-    //此处在改写的时候先不进行任何处理，直接使用之前的Match函数，完成之后在对Match进行改写
+    // 此处在改写的时候先不进行任何处理，直接使用之前的Match函数，完成之后在对Match进行改写
     if (!Match(text, 0, text.size(), re_anchor, vec, nvec))
     {
       // std::cout << "DoMatch : Match 带参 未匹配";
-- 
Gitee