From a9aa4e8a60d53d152bde67b3bcb7129e01705b39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=BF=97=E6=B6=9B?= Date: Wed, 21 Sep 2022 03:24:38 +0000 Subject: [PATCH 1/2] =?UTF-8?q?TEST(RE2,=20UTF8)=E6=B5=8B=E8=AF=95?= =?UTF-8?q?=E7=94=A8=E4=BE=8B=E5=85=A8=E9=83=A8=E9=80=9A=E8=BF=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 刘志涛 --- re2/testing/re2_test.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/re2/testing/re2_test.cc b/re2/testing/re2_test.cc index b8e9a7e..ae101d3 100644 --- a/re2/testing/re2_test.cc +++ b/re2/testing/re2_test.cc @@ -1111,11 +1111,10 @@ TEST(RE2, UTF8) { // Check that '.' matches one byte or UTF-8 character // according to the mode. std::string s; - /*待处理的 RE2 re_test3("(.)", RE2::Latin1); ASSERT_TRUE(RE2::PartialMatch(utf8_string, re_test3, &s)); ASSERT_EQ(s, std::string("\xe6")); - */ + RE2 re_test4("(.)"); ASSERT_TRUE(RE2::PartialMatch(utf8_string, re_test4, &s)); ASSERT_EQ(s, std::string("\xe6\x97\xa5")); -- Gitee From 0785fc14044df8f27f3816738696736f4c8abb11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=BF=97=E6=B6=9B?= Date: Wed, 21 Sep 2022 03:25:16 +0000 Subject: [PATCH 2/2] =?UTF-8?q?TEST(RE2,=20UTF8)=E6=B5=8B=E8=AF=95?= =?UTF-8?q?=E7=94=A8=E4=BE=8B=E5=85=A8=E9=83=A8=E9=80=9A=E8=BF=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 刘志涛 --- re2/re2.cc | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/re2/re2.cc b/re2/re2.cc index 1a60d09..8e7169b 100644 --- a/re2/re2.cc +++ b/re2/re2.cc @@ -823,7 +823,13 @@ namespace re2 size_t start = match.start; size_t end = match.end; size_t len = end - start; - submatch[i] = StringPiece(text.data() + start, static_cast(len)); + if(options_.encoding() == RE2::Options::EncodingUTF8){ + submatch[i] = StringPiece(text.data() + start, static_cast(len)); + } + else{ + submatch[i] = StringPiece(text.data() + start, static_cast(len/2)); + } + } else { -- Gitee