From 549fd9b3e606c89c57d0a865bc4d716abc7bd766 Mon Sep 17 00:00:00 2001
From: bianshaolei <bianshaolei@kaihongdigi.com>
Date: Tue, 16 Nov 2021 23:37:42 +0800
Subject: [PATCH 1/6] fix bug: create string with utf8 data and size, not '\0'
 terminated.

Signed-off-by: bianshaolei <bianshaolei@kaihongdigi.com>
---
 ecmascript/base/utf_helper.cpp | 47 ++++++++++++++++++++++++++++++++++
 ecmascript/base/utf_helper.h   |  6 ++++-
 ecmascript/ecma_string-inl.h   |  2 +-
 ecmascript/ecma_string.cpp     | 19 ++++++++++++++
 ecmascript/ecma_string.h       |  1 +
 ecmascript/object_factory.cpp  |  2 +-
 6 files changed, 74 insertions(+), 3 deletions(-)

diff --git a/ecmascript/base/utf_helper.cpp b/ecmascript/base/utf_helper.cpp
index 5cb168e06f..6e34eab513 100644
--- a/ecmascript/base/utf_helper.cpp
+++ b/ecmascript/base/utf_helper.cpp
@@ -227,6 +227,18 @@ size_t Utf8ToUtf16Size(const uint8_t *utf8)
     return res;
 }
 
+size_t Utf8ToUtf16Size(const uint8_t *utf8, size_t utf8Len)
+{
+    size_t res = 0;
+    const uint8_t *putf8 = utf8;
+    while (putf8 - utf8 < utf8Len) {  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
+        auto [pair, nbytes] = ConvertUtf8ToUtf16Pair(putf8);
+        res += pair > 0xffff ? UtfLength::TWO : UtfLength::ONE;  // NOLINT(readability-magic-numbers)
+        putf8 += nbytes;  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
+    }
+    return res;
+}
+
 size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, uint16_t *utf16Out, size_t utf16Len, size_t start)
 {
     ASSERT(utf16Out != nullptr);
@@ -260,4 +272,39 @@ size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, uint16_t *utf16Out, size_
 
     return outPos;
 }
+
+size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, size_t utf8Len, uint16_t *utf16Out, size_t utf16Len, size_t start)
+{
+    ASSERT(utf16Out != nullptr);
+    size_t outPos = 0;
+    const uint8_t *putf8 = utf8In;
+    while (putf8 - utf8In < utf8Len) {  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
+        auto [pair, nbytes] = ConvertUtf8ToUtf16Pair(putf8);
+        auto [pHi, pLo] = utf::SplitUtf16Pair(pair);
+
+        putf8 += nbytes;  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
+        if (start > 0) {
+            start -= nbytes;
+            continue;
+        }
+
+        if (pHi != 0) {
+            if (outPos >= utf16Len - 1) {  // check for place for two uint16
+                break;
+            }
+            outPos++;
+            *utf16Out++ = pHi;  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
+        }
+        if (outPos >= utf16Len) {
+            break;
+        }
+        outPos++;
+        *utf16Out++ = pLo;  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
+        if (outPos >= utf16Len) {
+            break;
+        }
+    }
+
+    return outPos;
+}
 }  // namespace panda::ecmascript::base::utf_helper
diff --git a/ecmascript/base/utf_helper.h b/ecmascript/base/utf_helper.h
index 29abed4908..108bd47544 100644
--- a/ecmascript/base/utf_helper.h
+++ b/ecmascript/base/utf_helper.h
@@ -71,8 +71,12 @@ std::pair<uint32_t, size_t> ConvertUtf8ToUtf16Pair(const uint8_t *data, bool com
 
 size_t Utf8ToUtf16Size(const uint8_t *utf8);
 
+size_t Utf8ToUtf16Size(const uint8_t *utf8, size_t utf8Len);
+
 size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, uint16_t *utf16Out, size_t utf16Len, size_t start);
 
+size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, size_t utf8Len, uint16_t *utf16Out, size_t utf16Len, size_t start);
+
 static inline uint32_t CombineTwoU16(uint16_t d0, uint16_t d1)
 {
     uint32_t codePoint = d0 - utf::HI_SURROGATE_MIN;
@@ -83,4 +87,4 @@ static inline uint32_t CombineTwoU16(uint16_t d0, uint16_t d1)
 }
 }  // namespace panda::ecmascript::base::utf_helper
 
-#endif  // ECMASCRIPT_BASE_UTF_HELPER_H
\ No newline at end of file
+#endif  // ECMASCRIPT_BASE_UTF_HELPER_H
diff --git a/ecmascript/ecma_string-inl.h b/ecmascript/ecma_string-inl.h
index 5cffa1c1b1..105df81d67 100644
--- a/ecmascript/ecma_string-inl.h
+++ b/ecmascript/ecma_string-inl.h
@@ -63,7 +63,7 @@ inline EcmaString *EcmaString::CreateFromUtf8(const uint8_t *utf8Data, uint32_t
             UNREACHABLE();
         }
     } else {
-        auto utf16Len = base::utf_helper::Utf8ToUtf16Size(utf8Data);
+        auto utf16Len = base::utf_helper::Utf8ToUtf16Size(utf8Data, utf8Len);
         string = AllocStringObject(utf16Len, false, vm);
         ASSERT(string != nullptr);
 
diff --git a/ecmascript/ecma_string.cpp b/ecmascript/ecma_string.cpp
index 797848f2a2..98266274dc 100644
--- a/ecmascript/ecma_string.cpp
+++ b/ecmascript/ecma_string.cpp
@@ -258,6 +258,25 @@ bool EcmaString::CanBeCompressed(const uint8_t *utf8Data)
     return isCompressed;
 }
 
+// static
+bool EcmaString::CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len)
+{
+    if (!compressedStringsEnabled) {
+        return false;
+    }
+    bool isCompressed = true;
+    int index = 0;
+    // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
+    while (index < utf8Len) {
+        // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
+        if (!IsASCIICharacter(utf8Data[index])) {
+            isCompressed = false;
+            break;
+        }
+        ++index;
+    }
+    return isCompressed;
+}
 /* static */
 bool EcmaString::CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len)
 {
diff --git a/ecmascript/ecma_string.h b/ecmascript/ecma_string.h
index 453b2d1250..7334ee7773 100644
--- a/ecmascript/ecma_string.h
+++ b/ecmascript/ecma_string.h
@@ -261,6 +261,7 @@ public:
     static EcmaString *AllocStringObject(size_t length, bool compressed, const EcmaVM *vm);
 
     static bool CanBeCompressed(const uint8_t *utf8Data);
+    static bool CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len);
     static bool CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len);
 
 private:
diff --git a/ecmascript/object_factory.cpp b/ecmascript/object_factory.cpp
index 8a2aeaca41..08dafd0629 100644
--- a/ecmascript/object_factory.cpp
+++ b/ecmascript/object_factory.cpp
@@ -2152,7 +2152,7 @@ JSHandle<EcmaString> ObjectFactory::NewFromStdStringUnCheck(const std::string &d
 JSHandle<EcmaString> ObjectFactory::NewFromUtf8(const uint8_t *utf8Data, uint32_t utf8Len)
 {
     NewObjectHook();
-    bool canBeCompress = EcmaString::CanBeCompressed(utf8Data);
+    bool canBeCompress = EcmaString::CanBeCompressed(utf8Data, utf8Len);
     return GetStringFromStringTable(utf8Data, utf8Len, canBeCompress);
 }
 
-- 
Gitee


From 813955a1d1548f8dd9c319f4f0b1e7748edfaec3 Mon Sep 17 00:00:00 2001
From: bianshaolei <bianshaolei@kaihongdigi.com>
Date: Fri, 26 Nov 2021 16:45:57 +0800
Subject: [PATCH 2/6] Make one line less than 120 chars.

Signed-off-by: bianshaolei <bianshaolei@kaihongdigi.com>
---
 ecmascript/base/utf_helper.cpp | 4 ++--
 ecmascript/base/utf_helper.h   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/ecmascript/base/utf_helper.cpp b/ecmascript/base/utf_helper.cpp
index 6e34eab513..93c16c980d 100644
--- a/ecmascript/base/utf_helper.cpp
+++ b/ecmascript/base/utf_helper.cpp
@@ -273,12 +273,12 @@ size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, uint16_t *utf16Out, size_
     return outPos;
 }
 
-size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, size_t utf8Len, uint16_t *utf16Out, size_t utf16Len, size_t start)
+size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, size_t iLen, uint16_t *utf16Out, size_t utf16Len, size_t start)
 {
     ASSERT(utf16Out != nullptr);
     size_t outPos = 0;
     const uint8_t *putf8 = utf8In;
-    while (putf8 - utf8In < utf8Len) {  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
+    while (putf8 - utf8In < iLen) {  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
         auto [pair, nbytes] = ConvertUtf8ToUtf16Pair(putf8);
         auto [pHi, pLo] = utf::SplitUtf16Pair(pair);
 
diff --git a/ecmascript/base/utf_helper.h b/ecmascript/base/utf_helper.h
index 108bd47544..32cb539bce 100644
--- a/ecmascript/base/utf_helper.h
+++ b/ecmascript/base/utf_helper.h
@@ -75,7 +75,7 @@ size_t Utf8ToUtf16Size(const uint8_t *utf8, size_t utf8Len);
 
 size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, uint16_t *utf16Out, size_t utf16Len, size_t start);
 
-size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, size_t utf8Len, uint16_t *utf16Out, size_t utf16Len, size_t start);
+size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, size_t iLen, uint16_t *utf16Out, size_t utf16Len, size_t start);
 
 static inline uint32_t CombineTwoU16(uint16_t d0, uint16_t d1)
 {
-- 
Gitee


From 79c5fecd18ca1c524facc51e50e22cf7cf8ce404 Mon Sep 17 00:00:00 2001
From: bianshaolei <bianshaolei@kaihongdigi.com>
Date: Tue, 28 Dec 2021 09:12:24 +0800
Subject: [PATCH 3/6] =?UTF-8?q?utf8=E5=AD=97=E7=AC=A6=E4=B8=B2=E5=9C=A8?=
 =?UTF-8?q?=E4=BC=A0=E9=80=92=E9=95=BF=E5=BA=A6=E7=9A=84=E6=83=85=E5=86=B5?=
 =?UTF-8?q?=E4=B8=8B=EF=BC=8C=E4=B8=8D=E8=83=BD=E5=81=87=E8=AE=BE=E5=85=B6?=
 =?UTF-8?q?=E4=BB=A5'\0'=E7=BB=93=E5=B0=BE?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: bianshaolei <bianshaolei@kaihongdigi.com>
---
 ecmascript/base/number_helper.cpp |  2 +-
 ecmascript/base/utf_helper.cpp    | 55 +++++--------------------------
 ecmascript/base/utf_helper.h      |  6 +---
 ecmascript/ecma_string-inl.h      |  2 +-
 ecmascript/ecma_string.cpp        | 47 +++++++++-----------------
 ecmascript/ecma_string.h          | 10 +++---
 ecmascript/ecma_string_table.cpp  |  2 +-
 7 files changed, 33 insertions(+), 91 deletions(-)

diff --git a/ecmascript/base/number_helper.cpp b/ecmascript/base/number_helper.cpp
index a6c3410bab..034e541060 100644
--- a/ecmascript/base/number_helper.cpp
+++ b/ecmascript/base/number_helper.cpp
@@ -76,7 +76,7 @@ bool NumberHelper::GotoNonspace(uint8_t **ptr, const uint8_t *end)
                 ++size;
                 utf8Bit >>= 1UL;
             }
-            if (base::utf_helper::ConvertRegionUtf8ToUtf16(*ptr, &c, 1, 0) <= 0) {
+            if (base::utf_helper::ConvertRegionUtf8ToUtf16(*ptr, SIZE_MAX, &c, 1, 0) <= 0) {
                 return true;
             }
         }
diff --git a/ecmascript/base/utf_helper.cpp b/ecmascript/base/utf_helper.cpp
index 93c16c980d..8dc9aa9ca6 100644
--- a/ecmascript/base/utf_helper.cpp
+++ b/ecmascript/base/utf_helper.cpp
@@ -216,22 +216,14 @@ std::pair<uint32_t, size_t> ConvertUtf8ToUtf16Pair(const uint8_t *data, bool com
     return {pair, UtfLength::FOUR};
 }
 
-size_t Utf8ToUtf16Size(const uint8_t *utf8)
-{
-    size_t res = 0;
-    while (*utf8 != '\0') {  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
-        auto [pair, nbytes] = ConvertUtf8ToUtf16Pair(utf8);
-        res += pair > 0xffff ? UtfLength::TWO : UtfLength::ONE;  // NOLINT(readability-magic-numbers)
-        utf8 += nbytes;  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
-    }
-    return res;
-}
-
 size_t Utf8ToUtf16Size(const uint8_t *utf8, size_t utf8Len)
 {
     size_t res = 0;
     const uint8_t *putf8 = utf8;
-    while (putf8 - utf8 < utf8Len) {  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
+    if (utf8Len == SIZE_MAX) {
+        utf8Len = strlen((const char *)utf8);
+    }
+    while (putf8 - utf8 < (int)utf8Len) {  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
         auto [pair, nbytes] = ConvertUtf8ToUtf16Pair(putf8);
         res += pair > 0xffff ? UtfLength::TWO : UtfLength::ONE;  // NOLINT(readability-magic-numbers)
         putf8 += nbytes;  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
@@ -239,46 +231,15 @@ size_t Utf8ToUtf16Size(const uint8_t *utf8, size_t utf8Len)
     return res;
 }
 
-size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, uint16_t *utf16Out, size_t utf16Len, size_t start)
-{
-    ASSERT(utf16Out != nullptr);
-    size_t outPos = 0;
-    while (*utf8In != '\0') {  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
-        auto [pair, nbytes] = ConvertUtf8ToUtf16Pair(utf8In);
-        auto [pHi, pLo] = utf::SplitUtf16Pair(pair);
-
-        utf8In += nbytes;  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
-        if (start > 0) {
-            start -= nbytes;
-            continue;
-        }
-
-        if (pHi != 0) {
-            if (outPos >= utf16Len - 1) {  // check for place for two uint16
-                break;
-            }
-            outPos++;
-            *utf16Out++ = pHi;  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
-        }
-        if (outPos >= utf16Len) {
-            break;
-        }
-        outPos++;
-        *utf16Out++ = pLo;  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
-        if (outPos >= utf16Len) {
-            break;
-        }
-    }
-
-    return outPos;
-}
-
 size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, size_t iLen, uint16_t *utf16Out, size_t utf16Len, size_t start)
 {
     ASSERT(utf16Out != nullptr);
     size_t outPos = 0;
     const uint8_t *putf8 = utf8In;
-    while (putf8 - utf8In < iLen) {  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
+    if (iLen == SIZE_MAX) {
+        iLen = strlen((const char *)utf8In);
+    }
+    while (putf8 - utf8In < (int)iLen) {  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
         auto [pair, nbytes] = ConvertUtf8ToUtf16Pair(putf8);
         auto [pHi, pLo] = utf::SplitUtf16Pair(pair);
 
diff --git a/ecmascript/base/utf_helper.h b/ecmascript/base/utf_helper.h
index 32cb539bce..d927269d3d 100644
--- a/ecmascript/base/utf_helper.h
+++ b/ecmascript/base/utf_helper.h
@@ -69,11 +69,7 @@ size_t ConvertRegionUtf16ToUtf8(const uint16_t *utf16In, uint8_t *utf8Out, size_
 
 std::pair<uint32_t, size_t> ConvertUtf8ToUtf16Pair(const uint8_t *data, bool combine = false);
 
-size_t Utf8ToUtf16Size(const uint8_t *utf8);
-
-size_t Utf8ToUtf16Size(const uint8_t *utf8, size_t utf8Len);
-
-size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, uint16_t *utf16Out, size_t utf16Len, size_t start);
+size_t Utf8ToUtf16Size(const uint8_t *utf8, size_t utf8Len = SIZE_MAX);
 
 size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, size_t iLen, uint16_t *utf16Out, size_t utf16Len, size_t start);
 
diff --git a/ecmascript/ecma_string-inl.h b/ecmascript/ecma_string-inl.h
index 105df81d67..dad7a2b4d7 100644
--- a/ecmascript/ecma_string-inl.h
+++ b/ecmascript/ecma_string-inl.h
@@ -68,7 +68,7 @@ inline EcmaString *EcmaString::CreateFromUtf8(const uint8_t *utf8Data, uint32_t
         ASSERT(string != nullptr);
 
         [[maybe_unused]] auto len =
-            base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, string->GetDataUtf16Writable(), utf16Len, 0);
+            base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, utf8Len, string->GetDataUtf16Writable(), utf16Len, 0);
         ASSERT(len == utf16Len);
     }
 
diff --git a/ecmascript/ecma_string.cpp b/ecmascript/ecma_string.cpp
index 98266274dc..19bc8785e7 100644
--- a/ecmascript/ecma_string.cpp
+++ b/ecmascript/ecma_string.cpp
@@ -238,26 +238,6 @@ int32_t EcmaString::IndexOf(const EcmaString *rhs, int32_t pos) const
     return -1;
 }
 
-// static
-bool EcmaString::CanBeCompressed(const uint8_t *utf8Data)
-{
-    if (!compressedStringsEnabled) {
-        return false;
-    }
-    bool isCompressed = true;
-    int index = 0;
-    // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
-    while (utf8Data[index] != '\0') {
-        // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
-        if (!IsASCIICharacter(utf8Data[index])) {
-            isCompressed = false;
-            break;
-        }
-        ++index;
-    }
-    return isCompressed;
-}
-
 // static
 bool EcmaString::CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len)
 {
@@ -266,8 +246,11 @@ bool EcmaString::CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len)
     }
     bool isCompressed = true;
     int index = 0;
+    if (utf8Len == UINT32_MAX) {
+        utf8Len = strlen((const char *)utf8Data);
+    }
     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
-    while (index < utf8Len) {
+    while (index < (int)utf8Len) {
         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
         if (!IsASCIICharacter(utf8Data[index])) {
             isCompressed = false;
@@ -340,7 +323,7 @@ bool EcmaString::StringsAreEqualUtf8(const EcmaString *str1, const uint8_t *utf8
         Span<const uint8_t> data2(utf8Data, utf8Len);
         return EcmaString::StringsAreEquals(data1, data2);
     }
-    return IsUtf8EqualsUtf16(utf8Data, str1->GetDataUtf16(), str1->GetLength());
+    return IsUtf8EqualsUtf16(utf8Data, utf8Len, str1->GetDataUtf16(), str1->GetLength());
 }
 
 /* static */
@@ -350,7 +333,7 @@ bool EcmaString::StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *ut
     if (str1->GetLength() != utf16Len) {
         result = false;
     } else if (!str1->IsUtf16()) {
-        result = IsUtf8EqualsUtf16(str1->GetDataUtf8(), utf16Data, utf16Len);
+        result = IsUtf8EqualsUtf16(str1->GetDataUtf8(), str1->GetLength(), utf16Data, utf16Len);
     } else {
         Span<const uint16_t> data1(str1->GetDataUtf16(), str1->GetLength());
         Span<const uint16_t> data2(utf16Data, utf16Len);
@@ -411,13 +394,13 @@ static int32_t ComputeHashForData(const T *data, size_t size)
     return static_cast<int32_t>(hash);
 }
 
-static int32_t ComputeHashForUtf8(const uint8_t *utf8Data)
+static int32_t ComputeHashForUtf8(const uint8_t *utf8Data, size_t utf8Len)
 {
     if (utf8Data == nullptr) {
         return 0;
     }
     uint32_t hash = 0;
-    while (*utf8Data != '\0') {  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
+    for (size_t i = 0; i < utf8Len; i++) {
         constexpr size_t SHIFT = 5;
         hash = (hash << SHIFT) - hash + *utf8Data++;  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
     }
@@ -441,15 +424,15 @@ uint32_t EcmaString::ComputeHashcode() const
 }
 
 /* static */
-uint32_t EcmaString::ComputeHashcodeUtf8(const uint8_t *utf8Data, bool canBeCompress)
+uint32_t EcmaString::ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress)
 {
     uint32_t hash;
     if (canBeCompress) {
-        hash = ComputeHashForUtf8(utf8Data);
+        hash = ComputeHashForUtf8(utf8Data, utf8Len);
     } else {
-        auto utf16Len = base::utf_helper::Utf8ToUtf16Size(utf8Data);
+        auto utf16Len = base::utf_helper::Utf8ToUtf16Size(utf8Data, utf8Len);
         CVector<uint16_t> tmpBuffer(utf16Len);
-        [[maybe_unused]] auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf16Len, 0);
+        [[maybe_unused]] auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, utf8Len, tmpBuffer.data(), utf16Len, 0);
         ASSERT(len == utf16Len);
         hash = ComputeHashForData(tmpBuffer.data(), utf16Len);
     }
@@ -463,12 +446,14 @@ uint32_t EcmaString::ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t le
 }
 
 /* static */
-bool EcmaString::IsUtf8EqualsUtf16(const uint8_t *utf8Data, const uint16_t *utf16Data, uint32_t utf16Len)
+bool EcmaString::IsUtf8EqualsUtf16(const uint8_t *utf8Data, uint32_t utf8Len,
+        const uint16_t *utf16Data, uint32_t utf16Len)
 {
     // length is one more than compared utf16Data, don't need convert all utf8Data to utf16Data
     uint32_t utf8ConvertLength = utf16Len + 1;
     CVector<uint16_t> tmpBuffer(utf8ConvertLength);
-    auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8ConvertLength, 0);
+    auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, utf8Len,
+            tmpBuffer.data(), utf8ConvertLength, 0);
     if (len != utf16Len) {
         return false;
     }
diff --git a/ecmascript/ecma_string.h b/ecmascript/ecma_string.h
index 7334ee7773..144118505f 100644
--- a/ecmascript/ecma_string.h
+++ b/ecmascript/ecma_string.h
@@ -171,7 +171,7 @@ public:
             }
             return length;
         }
-        return base::utf_helper::ConvertRegionUtf8ToUtf16(GetDataUtf8(), buf, maxLength, start);
+        return base::utf_helper::ConvertRegionUtf8ToUtf16(GetDataUtf8(), GetLength(), buf, maxLength, start);
     }
 
     // NOLINTNEXTLINE(modernize-avoid-c-arrays)
@@ -245,7 +245,7 @@ public:
      * Compares strings by bytes, It doesn't check canonical unicode equivalence.
      */
     static bool StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len);
-    static uint32_t ComputeHashcodeUtf8(const uint8_t *utf8Data, bool canBeCompress);
+    static uint32_t ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress);
     static uint32_t ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length);
 
     static void SetCompressedStringsEnabled(bool val)
@@ -260,8 +260,7 @@ public:
 
     static EcmaString *AllocStringObject(size_t length, bool compressed, const EcmaVM *vm);
 
-    static bool CanBeCompressed(const uint8_t *utf8Data);
-    static bool CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len);
+    static bool CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len = UINT32_MAX);
     static bool CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len);
 
 private:
@@ -304,7 +303,8 @@ private:
      * str1 should have the same length as utf16_data.
      * Converts utf8Data to utf16 and compare it with given utf16_data.
      */
-    static bool IsUtf8EqualsUtf16(const uint8_t *utf8Data, const uint16_t *utf16Data, uint32_t utf16Len);
+    static bool IsUtf8EqualsUtf16(const uint8_t *utf8Data, uint32_t utf8Len,
+            const uint16_t *utf16Data, uint32_t utf16Len);
 
     template<typename T>
     /**
diff --git a/ecmascript/ecma_string_table.cpp b/ecmascript/ecma_string_table.cpp
index 88426a7a22..a0e7715c36 100644
--- a/ecmascript/ecma_string_table.cpp
+++ b/ecmascript/ecma_string_table.cpp
@@ -27,7 +27,7 @@ EcmaStringTable::EcmaStringTable(const EcmaVM *vm) : vm_(vm) {}
 
 EcmaString *EcmaStringTable::GetString(const uint8_t *utf8Data, uint32_t utf8Len, bool canBeCompress) const
 {
-    uint32_t hashCode = EcmaString::ComputeHashcodeUtf8(utf8Data, canBeCompress);
+    uint32_t hashCode = EcmaString::ComputeHashcodeUtf8(utf8Data, utf8Len, canBeCompress);
     for (auto it = table_.find(hashCode); it != table_.end(); it++) {
         auto foundedString = it->second;
         if (EcmaString::StringsAreEqualUtf8(foundedString, utf8Data, utf8Len, canBeCompress)) {
-- 
Gitee


From 85fa51f55ae52d5e3691b08cc9fe084f821314c9 Mon Sep 17 00:00:00 2001
From: bianshaolei <bianshaolei@kaihongdigi.com>
Date: Tue, 28 Dec 2021 10:20:19 +0800
Subject: [PATCH 4/6] code align.

Signed-off-by: bianshaolei <bianshaolei@kaihongdigi.com>
---
 ecmascript/ecma_string.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/ecmascript/ecma_string.cpp b/ecmascript/ecma_string.cpp
index 19bc8785e7..d284bad604 100644
--- a/ecmascript/ecma_string.cpp
+++ b/ecmascript/ecma_string.cpp
@@ -432,7 +432,8 @@ uint32_t EcmaString::ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len
     } else {
         auto utf16Len = base::utf_helper::Utf8ToUtf16Size(utf8Data, utf8Len);
         CVector<uint16_t> tmpBuffer(utf16Len);
-        [[maybe_unused]] auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, utf8Len, tmpBuffer.data(), utf16Len, 0);
+        [[maybe_unused]] auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, utf8Len,
+                                                                               tmpBuffer.data(), utf16Len, 0);
         ASSERT(len == utf16Len);
         hash = ComputeHashForData(tmpBuffer.data(), utf16Len);
     }
@@ -447,13 +448,13 @@ uint32_t EcmaString::ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t le
 
 /* static */
 bool EcmaString::IsUtf8EqualsUtf16(const uint8_t *utf8Data, uint32_t utf8Len,
-        const uint16_t *utf16Data, uint32_t utf16Len)
+                                   const uint16_t *utf16Data, uint32_t utf16Len)
 {
     // length is one more than compared utf16Data, don't need convert all utf8Data to utf16Data
     uint32_t utf8ConvertLength = utf16Len + 1;
     CVector<uint16_t> tmpBuffer(utf8ConvertLength);
     auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, utf8Len,
-            tmpBuffer.data(), utf8ConvertLength, 0);
+                                                          tmpBuffer.data(), utf8ConvertLength, 0);
     if (len != utf16Len) {
         return false;
     }
-- 
Gitee


From 07b24a9978f0f17f365908d6a16681329a6abdca Mon Sep 17 00:00:00 2001
From: bianshaolei <bianshaolei@kaihongdigi.com>
Date: Fri, 31 Dec 2021 10:53:03 +0800
Subject: [PATCH 5/6] fix test: ComputeHashcodeUtf8 fix strlen in L0: include
 string.h

Signed-off-by: bianshaolei <bianshaolei@kaihongdigi.com>
---
 ecmascript/base/utf_helper.cpp        | 1 +
 ecmascript/tests/ecma_string_test.cpp | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/ecmascript/base/utf_helper.cpp b/ecmascript/base/utf_helper.cpp
index 8dc9aa9ca6..a528c935b4 100644
--- a/ecmascript/base/utf_helper.cpp
+++ b/ecmascript/base/utf_helper.cpp
@@ -14,6 +14,7 @@
  */
 
 #include "ecmascript/base/utf_helper.h"
+#include <string.h>
 
 // NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
 static constexpr int32_t U16_SURROGATE_OFFSET = (0xd800 << 10UL) + 0xdc00 - 0x10000;
diff --git a/ecmascript/tests/ecma_string_test.cpp b/ecmascript/tests/ecma_string_test.cpp
index f3e39b5b2e..0769dc10f1 100644
--- a/ecmascript/tests/ecma_string_test.cpp
+++ b/ecmascript/tests/ecma_string_test.cpp
@@ -1647,7 +1647,7 @@ HWTEST_F_L0(EcmaStringTest, ComputeHashcodeUtf8)
     for (uint32_t i = 0; i < lengthEcmaStrU8; i++) {
         hashExpect = hashExpect * 31 + arrayU8[i];
     }
-    EXPECT_EQ(EcmaString::ComputeHashcodeUtf8(&arrayU8[0], lengthEcmaStrU8), static_cast<int32_t>(hashExpect));
+    EXPECT_EQ(EcmaString::ComputeHashcodeUtf8(&arrayU8[0], lengthEcmaStrU8, false), static_cast<int32_t>(hashExpect));
 }
 
 /*
-- 
Gitee


From dc2b74f48eefe7a2ed9a3cf780e33c9efdf3ee45 Mon Sep 17 00:00:00 2001
From: bianshaolei <bianshaolei@kaihongdigi.com>
Date: Fri, 31 Dec 2021 11:01:13 +0800
Subject: [PATCH 6/6] cstring instead of string.h

Signed-off-by: bianshaolei <bianshaolei@kaihongdigi.com>
---
 ecmascript/base/utf_helper.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ecmascript/base/utf_helper.cpp b/ecmascript/base/utf_helper.cpp
index a528c935b4..1859863fa0 100644
--- a/ecmascript/base/utf_helper.cpp
+++ b/ecmascript/base/utf_helper.cpp
@@ -14,7 +14,7 @@
  */
 
 #include "ecmascript/base/utf_helper.h"
-#include <string.h>
+#include <cstring>
 
 // NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
 static constexpr int32_t U16_SURROGATE_OFFSET = (0xd800 << 10UL) + 0xdc00 - 0x10000;
-- 
Gitee