From 549fd9b3e606c89c57d0a865bc4d716abc7bd766 Mon Sep 17 00:00:00 2001 From: bianshaolei Date: Tue, 16 Nov 2021 23:37:42 +0800 Subject: [PATCH 1/6] fix bug: create string with utf8 data and size, not '\0' terminated. Signed-off-by: bianshaolei --- ecmascript/base/utf_helper.cpp | 47 ++++++++++++++++++++++++++++++++++ ecmascript/base/utf_helper.h | 6 ++++- ecmascript/ecma_string-inl.h | 2 +- ecmascript/ecma_string.cpp | 19 ++++++++++++++ ecmascript/ecma_string.h | 1 + ecmascript/object_factory.cpp | 2 +- 6 files changed, 74 insertions(+), 3 deletions(-) diff --git a/ecmascript/base/utf_helper.cpp b/ecmascript/base/utf_helper.cpp index 5cb168e06f..6e34eab513 100644 --- a/ecmascript/base/utf_helper.cpp +++ b/ecmascript/base/utf_helper.cpp @@ -227,6 +227,18 @@ size_t Utf8ToUtf16Size(const uint8_t *utf8) return res; } +size_t Utf8ToUtf16Size(const uint8_t *utf8, size_t utf8Len) +{ + size_t res = 0; + const uint8_t *putf8 = utf8; + while (putf8 - utf8 < utf8Len) { // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) + auto [pair, nbytes] = ConvertUtf8ToUtf16Pair(putf8); + res += pair > 0xffff ? UtfLength::TWO : UtfLength::ONE; // NOLINT(readability-magic-numbers) + putf8 += nbytes; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) + } + return res; +} + size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, uint16_t *utf16Out, size_t utf16Len, size_t start) { ASSERT(utf16Out != nullptr); @@ -260,4 +272,39 @@ size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, uint16_t *utf16Out, size_ return outPos; } + +size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, size_t utf8Len, uint16_t *utf16Out, size_t utf16Len, size_t start) +{ + ASSERT(utf16Out != nullptr); + size_t outPos = 0; + const uint8_t *putf8 = utf8In; + while (putf8 - utf8In < utf8Len) { // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) + auto [pair, nbytes] = ConvertUtf8ToUtf16Pair(putf8); + auto [pHi, pLo] = utf::SplitUtf16Pair(pair); + + putf8 += nbytes; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) + if (start > 0) { + start -= nbytes; + continue; + } + + if (pHi != 0) { + if (outPos >= utf16Len - 1) { // check for place for two uint16 + break; + } + outPos++; + *utf16Out++ = pHi; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) + } + if (outPos >= utf16Len) { + break; + } + outPos++; + *utf16Out++ = pLo; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) + if (outPos >= utf16Len) { + break; + } + } + + return outPos; +} } // namespace panda::ecmascript::base::utf_helper diff --git a/ecmascript/base/utf_helper.h b/ecmascript/base/utf_helper.h index 29abed4908..108bd47544 100644 --- a/ecmascript/base/utf_helper.h +++ b/ecmascript/base/utf_helper.h @@ -71,8 +71,12 @@ std::pair ConvertUtf8ToUtf16Pair(const uint8_t *data, bool com size_t Utf8ToUtf16Size(const uint8_t *utf8); +size_t Utf8ToUtf16Size(const uint8_t *utf8, size_t utf8Len); + size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, uint16_t *utf16Out, size_t utf16Len, size_t start); +size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, size_t utf8Len, uint16_t *utf16Out, size_t utf16Len, size_t start); + static inline uint32_t CombineTwoU16(uint16_t d0, uint16_t d1) { uint32_t codePoint = d0 - utf::HI_SURROGATE_MIN; @@ -83,4 +87,4 @@ static inline uint32_t CombineTwoU16(uint16_t d0, uint16_t d1) } } // namespace panda::ecmascript::base::utf_helper -#endif // ECMASCRIPT_BASE_UTF_HELPER_H \ No newline at end of file +#endif // ECMASCRIPT_BASE_UTF_HELPER_H diff --git a/ecmascript/ecma_string-inl.h b/ecmascript/ecma_string-inl.h index 5cffa1c1b1..105df81d67 100644 --- a/ecmascript/ecma_string-inl.h +++ b/ecmascript/ecma_string-inl.h @@ -63,7 +63,7 @@ inline EcmaString *EcmaString::CreateFromUtf8(const uint8_t *utf8Data, uint32_t UNREACHABLE(); } } else { - auto utf16Len = base::utf_helper::Utf8ToUtf16Size(utf8Data); + auto utf16Len = base::utf_helper::Utf8ToUtf16Size(utf8Data, utf8Len); string = AllocStringObject(utf16Len, false, vm); ASSERT(string != nullptr); diff --git a/ecmascript/ecma_string.cpp b/ecmascript/ecma_string.cpp index 797848f2a2..98266274dc 100644 --- a/ecmascript/ecma_string.cpp +++ b/ecmascript/ecma_string.cpp @@ -258,6 +258,25 @@ bool EcmaString::CanBeCompressed(const uint8_t *utf8Data) return isCompressed; } +// static +bool EcmaString::CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len) +{ + if (!compressedStringsEnabled) { + return false; + } + bool isCompressed = true; + int index = 0; + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + while (index < utf8Len) { + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) + if (!IsASCIICharacter(utf8Data[index])) { + isCompressed = false; + break; + } + ++index; + } + return isCompressed; +} /* static */ bool EcmaString::CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len) { diff --git a/ecmascript/ecma_string.h b/ecmascript/ecma_string.h index 453b2d1250..7334ee7773 100644 --- a/ecmascript/ecma_string.h +++ b/ecmascript/ecma_string.h @@ -261,6 +261,7 @@ public: static EcmaString *AllocStringObject(size_t length, bool compressed, const EcmaVM *vm); static bool CanBeCompressed(const uint8_t *utf8Data); + static bool CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len); static bool CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len); private: diff --git a/ecmascript/object_factory.cpp b/ecmascript/object_factory.cpp index 8a2aeaca41..08dafd0629 100644 --- a/ecmascript/object_factory.cpp +++ b/ecmascript/object_factory.cpp @@ -2152,7 +2152,7 @@ JSHandle ObjectFactory::NewFromStdStringUnCheck(const std::string &d JSHandle ObjectFactory::NewFromUtf8(const uint8_t *utf8Data, uint32_t utf8Len) { NewObjectHook(); - bool canBeCompress = EcmaString::CanBeCompressed(utf8Data); + bool canBeCompress = EcmaString::CanBeCompressed(utf8Data, utf8Len); return GetStringFromStringTable(utf8Data, utf8Len, canBeCompress); } -- Gitee From 813955a1d1548f8dd9c319f4f0b1e7748edfaec3 Mon Sep 17 00:00:00 2001 From: bianshaolei Date: Fri, 26 Nov 2021 16:45:57 +0800 Subject: [PATCH 2/6] Make one line less than 120 chars. Signed-off-by: bianshaolei --- ecmascript/base/utf_helper.cpp | 4 ++-- ecmascript/base/utf_helper.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ecmascript/base/utf_helper.cpp b/ecmascript/base/utf_helper.cpp index 6e34eab513..93c16c980d 100644 --- a/ecmascript/base/utf_helper.cpp +++ b/ecmascript/base/utf_helper.cpp @@ -273,12 +273,12 @@ size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, uint16_t *utf16Out, size_ return outPos; } -size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, size_t utf8Len, uint16_t *utf16Out, size_t utf16Len, size_t start) +size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, size_t iLen, uint16_t *utf16Out, size_t utf16Len, size_t start) { ASSERT(utf16Out != nullptr); size_t outPos = 0; const uint8_t *putf8 = utf8In; - while (putf8 - utf8In < utf8Len) { // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) + while (putf8 - utf8In < iLen) { // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) auto [pair, nbytes] = ConvertUtf8ToUtf16Pair(putf8); auto [pHi, pLo] = utf::SplitUtf16Pair(pair); diff --git a/ecmascript/base/utf_helper.h b/ecmascript/base/utf_helper.h index 108bd47544..32cb539bce 100644 --- a/ecmascript/base/utf_helper.h +++ b/ecmascript/base/utf_helper.h @@ -75,7 +75,7 @@ size_t Utf8ToUtf16Size(const uint8_t *utf8, size_t utf8Len); size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, uint16_t *utf16Out, size_t utf16Len, size_t start); -size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, size_t utf8Len, uint16_t *utf16Out, size_t utf16Len, size_t start); +size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, size_t iLen, uint16_t *utf16Out, size_t utf16Len, size_t start); static inline uint32_t CombineTwoU16(uint16_t d0, uint16_t d1) { -- Gitee From 79c5fecd18ca1c524facc51e50e22cf7cf8ce404 Mon Sep 17 00:00:00 2001 From: bianshaolei Date: Tue, 28 Dec 2021 09:12:24 +0800 Subject: [PATCH 3/6] =?UTF-8?q?utf8=E5=AD=97=E7=AC=A6=E4=B8=B2=E5=9C=A8?= =?UTF-8?q?=E4=BC=A0=E9=80=92=E9=95=BF=E5=BA=A6=E7=9A=84=E6=83=85=E5=86=B5?= =?UTF-8?q?=E4=B8=8B=EF=BC=8C=E4=B8=8D=E8=83=BD=E5=81=87=E8=AE=BE=E5=85=B6?= =?UTF-8?q?=E4=BB=A5'\0'=E7=BB=93=E5=B0=BE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: bianshaolei --- ecmascript/base/number_helper.cpp | 2 +- ecmascript/base/utf_helper.cpp | 55 +++++-------------------------- ecmascript/base/utf_helper.h | 6 +--- ecmascript/ecma_string-inl.h | 2 +- ecmascript/ecma_string.cpp | 47 +++++++++----------------- ecmascript/ecma_string.h | 10 +++--- ecmascript/ecma_string_table.cpp | 2 +- 7 files changed, 33 insertions(+), 91 deletions(-) diff --git a/ecmascript/base/number_helper.cpp b/ecmascript/base/number_helper.cpp index a6c3410bab..034e541060 100644 --- a/ecmascript/base/number_helper.cpp +++ b/ecmascript/base/number_helper.cpp @@ -76,7 +76,7 @@ bool NumberHelper::GotoNonspace(uint8_t **ptr, const uint8_t *end) ++size; utf8Bit >>= 1UL; } - if (base::utf_helper::ConvertRegionUtf8ToUtf16(*ptr, &c, 1, 0) <= 0) { + if (base::utf_helper::ConvertRegionUtf8ToUtf16(*ptr, SIZE_MAX, &c, 1, 0) <= 0) { return true; } } diff --git a/ecmascript/base/utf_helper.cpp b/ecmascript/base/utf_helper.cpp index 93c16c980d..8dc9aa9ca6 100644 --- a/ecmascript/base/utf_helper.cpp +++ b/ecmascript/base/utf_helper.cpp @@ -216,22 +216,14 @@ std::pair ConvertUtf8ToUtf16Pair(const uint8_t *data, bool com return {pair, UtfLength::FOUR}; } -size_t Utf8ToUtf16Size(const uint8_t *utf8) -{ - size_t res = 0; - while (*utf8 != '\0') { // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - auto [pair, nbytes] = ConvertUtf8ToUtf16Pair(utf8); - res += pair > 0xffff ? UtfLength::TWO : UtfLength::ONE; // NOLINT(readability-magic-numbers) - utf8 += nbytes; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - } - return res; -} - size_t Utf8ToUtf16Size(const uint8_t *utf8, size_t utf8Len) { size_t res = 0; const uint8_t *putf8 = utf8; - while (putf8 - utf8 < utf8Len) { // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) + if (utf8Len == SIZE_MAX) { + utf8Len = strlen((const char *)utf8); + } + while (putf8 - utf8 < (int)utf8Len) { // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) auto [pair, nbytes] = ConvertUtf8ToUtf16Pair(putf8); res += pair > 0xffff ? UtfLength::TWO : UtfLength::ONE; // NOLINT(readability-magic-numbers) putf8 += nbytes; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) @@ -239,46 +231,15 @@ size_t Utf8ToUtf16Size(const uint8_t *utf8, size_t utf8Len) return res; } -size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, uint16_t *utf16Out, size_t utf16Len, size_t start) -{ - ASSERT(utf16Out != nullptr); - size_t outPos = 0; - while (*utf8In != '\0') { // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - auto [pair, nbytes] = ConvertUtf8ToUtf16Pair(utf8In); - auto [pHi, pLo] = utf::SplitUtf16Pair(pair); - - utf8In += nbytes; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - if (start > 0) { - start -= nbytes; - continue; - } - - if (pHi != 0) { - if (outPos >= utf16Len - 1) { // check for place for two uint16 - break; - } - outPos++; - *utf16Out++ = pHi; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - } - if (outPos >= utf16Len) { - break; - } - outPos++; - *utf16Out++ = pLo; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - if (outPos >= utf16Len) { - break; - } - } - - return outPos; -} - size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, size_t iLen, uint16_t *utf16Out, size_t utf16Len, size_t start) { ASSERT(utf16Out != nullptr); size_t outPos = 0; const uint8_t *putf8 = utf8In; - while (putf8 - utf8In < iLen) { // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) + if (iLen == SIZE_MAX) { + iLen = strlen((const char *)utf8In); + } + while (putf8 - utf8In < (int)iLen) { // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) auto [pair, nbytes] = ConvertUtf8ToUtf16Pair(putf8); auto [pHi, pLo] = utf::SplitUtf16Pair(pair); diff --git a/ecmascript/base/utf_helper.h b/ecmascript/base/utf_helper.h index 32cb539bce..d927269d3d 100644 --- a/ecmascript/base/utf_helper.h +++ b/ecmascript/base/utf_helper.h @@ -69,11 +69,7 @@ size_t ConvertRegionUtf16ToUtf8(const uint16_t *utf16In, uint8_t *utf8Out, size_ std::pair ConvertUtf8ToUtf16Pair(const uint8_t *data, bool combine = false); -size_t Utf8ToUtf16Size(const uint8_t *utf8); - -size_t Utf8ToUtf16Size(const uint8_t *utf8, size_t utf8Len); - -size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, uint16_t *utf16Out, size_t utf16Len, size_t start); +size_t Utf8ToUtf16Size(const uint8_t *utf8, size_t utf8Len = SIZE_MAX); size_t ConvertRegionUtf8ToUtf16(const uint8_t *utf8In, size_t iLen, uint16_t *utf16Out, size_t utf16Len, size_t start); diff --git a/ecmascript/ecma_string-inl.h b/ecmascript/ecma_string-inl.h index 105df81d67..dad7a2b4d7 100644 --- a/ecmascript/ecma_string-inl.h +++ b/ecmascript/ecma_string-inl.h @@ -68,7 +68,7 @@ inline EcmaString *EcmaString::CreateFromUtf8(const uint8_t *utf8Data, uint32_t ASSERT(string != nullptr); [[maybe_unused]] auto len = - base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, string->GetDataUtf16Writable(), utf16Len, 0); + base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, utf8Len, string->GetDataUtf16Writable(), utf16Len, 0); ASSERT(len == utf16Len); } diff --git a/ecmascript/ecma_string.cpp b/ecmascript/ecma_string.cpp index 98266274dc..19bc8785e7 100644 --- a/ecmascript/ecma_string.cpp +++ b/ecmascript/ecma_string.cpp @@ -238,26 +238,6 @@ int32_t EcmaString::IndexOf(const EcmaString *rhs, int32_t pos) const return -1; } -// static -bool EcmaString::CanBeCompressed(const uint8_t *utf8Data) -{ - if (!compressedStringsEnabled) { - return false; - } - bool isCompressed = true; - int index = 0; - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - while (utf8Data[index] != '\0') { - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - if (!IsASCIICharacter(utf8Data[index])) { - isCompressed = false; - break; - } - ++index; - } - return isCompressed; -} - // static bool EcmaString::CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len) { @@ -266,8 +246,11 @@ bool EcmaString::CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len) } bool isCompressed = true; int index = 0; + if (utf8Len == UINT32_MAX) { + utf8Len = strlen((const char *)utf8Data); + } // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) - while (index < utf8Len) { + while (index < (int)utf8Len) { // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) if (!IsASCIICharacter(utf8Data[index])) { isCompressed = false; @@ -340,7 +323,7 @@ bool EcmaString::StringsAreEqualUtf8(const EcmaString *str1, const uint8_t *utf8 Span data2(utf8Data, utf8Len); return EcmaString::StringsAreEquals(data1, data2); } - return IsUtf8EqualsUtf16(utf8Data, str1->GetDataUtf16(), str1->GetLength()); + return IsUtf8EqualsUtf16(utf8Data, utf8Len, str1->GetDataUtf16(), str1->GetLength()); } /* static */ @@ -350,7 +333,7 @@ bool EcmaString::StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *ut if (str1->GetLength() != utf16Len) { result = false; } else if (!str1->IsUtf16()) { - result = IsUtf8EqualsUtf16(str1->GetDataUtf8(), utf16Data, utf16Len); + result = IsUtf8EqualsUtf16(str1->GetDataUtf8(), str1->GetLength(), utf16Data, utf16Len); } else { Span data1(str1->GetDataUtf16(), str1->GetLength()); Span data2(utf16Data, utf16Len); @@ -411,13 +394,13 @@ static int32_t ComputeHashForData(const T *data, size_t size) return static_cast(hash); } -static int32_t ComputeHashForUtf8(const uint8_t *utf8Data) +static int32_t ComputeHashForUtf8(const uint8_t *utf8Data, size_t utf8Len) { if (utf8Data == nullptr) { return 0; } uint32_t hash = 0; - while (*utf8Data != '\0') { // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) + for (size_t i = 0; i < utf8Len; i++) { constexpr size_t SHIFT = 5; hash = (hash << SHIFT) - hash + *utf8Data++; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) } @@ -441,15 +424,15 @@ uint32_t EcmaString::ComputeHashcode() const } /* static */ -uint32_t EcmaString::ComputeHashcodeUtf8(const uint8_t *utf8Data, bool canBeCompress) +uint32_t EcmaString::ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress) { uint32_t hash; if (canBeCompress) { - hash = ComputeHashForUtf8(utf8Data); + hash = ComputeHashForUtf8(utf8Data, utf8Len); } else { - auto utf16Len = base::utf_helper::Utf8ToUtf16Size(utf8Data); + auto utf16Len = base::utf_helper::Utf8ToUtf16Size(utf8Data, utf8Len); CVector tmpBuffer(utf16Len); - [[maybe_unused]] auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf16Len, 0); + [[maybe_unused]] auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, utf8Len, tmpBuffer.data(), utf16Len, 0); ASSERT(len == utf16Len); hash = ComputeHashForData(tmpBuffer.data(), utf16Len); } @@ -463,12 +446,14 @@ uint32_t EcmaString::ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t le } /* static */ -bool EcmaString::IsUtf8EqualsUtf16(const uint8_t *utf8Data, const uint16_t *utf16Data, uint32_t utf16Len) +bool EcmaString::IsUtf8EqualsUtf16(const uint8_t *utf8Data, uint32_t utf8Len, + const uint16_t *utf16Data, uint32_t utf16Len) { // length is one more than compared utf16Data, don't need convert all utf8Data to utf16Data uint32_t utf8ConvertLength = utf16Len + 1; CVector tmpBuffer(utf8ConvertLength); - auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8ConvertLength, 0); + auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, utf8Len, + tmpBuffer.data(), utf8ConvertLength, 0); if (len != utf16Len) { return false; } diff --git a/ecmascript/ecma_string.h b/ecmascript/ecma_string.h index 7334ee7773..144118505f 100644 --- a/ecmascript/ecma_string.h +++ b/ecmascript/ecma_string.h @@ -171,7 +171,7 @@ public: } return length; } - return base::utf_helper::ConvertRegionUtf8ToUtf16(GetDataUtf8(), buf, maxLength, start); + return base::utf_helper::ConvertRegionUtf8ToUtf16(GetDataUtf8(), GetLength(), buf, maxLength, start); } // NOLINTNEXTLINE(modernize-avoid-c-arrays) @@ -245,7 +245,7 @@ public: * Compares strings by bytes, It doesn't check canonical unicode equivalence. */ static bool StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len); - static uint32_t ComputeHashcodeUtf8(const uint8_t *utf8Data, bool canBeCompress); + static uint32_t ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress); static uint32_t ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length); static void SetCompressedStringsEnabled(bool val) @@ -260,8 +260,7 @@ public: static EcmaString *AllocStringObject(size_t length, bool compressed, const EcmaVM *vm); - static bool CanBeCompressed(const uint8_t *utf8Data); - static bool CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len); + static bool CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len = UINT32_MAX); static bool CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len); private: @@ -304,7 +303,8 @@ private: * str1 should have the same length as utf16_data. * Converts utf8Data to utf16 and compare it with given utf16_data. */ - static bool IsUtf8EqualsUtf16(const uint8_t *utf8Data, const uint16_t *utf16Data, uint32_t utf16Len); + static bool IsUtf8EqualsUtf16(const uint8_t *utf8Data, uint32_t utf8Len, + const uint16_t *utf16Data, uint32_t utf16Len); template /** diff --git a/ecmascript/ecma_string_table.cpp b/ecmascript/ecma_string_table.cpp index 88426a7a22..a0e7715c36 100644 --- a/ecmascript/ecma_string_table.cpp +++ b/ecmascript/ecma_string_table.cpp @@ -27,7 +27,7 @@ EcmaStringTable::EcmaStringTable(const EcmaVM *vm) : vm_(vm) {} EcmaString *EcmaStringTable::GetString(const uint8_t *utf8Data, uint32_t utf8Len, bool canBeCompress) const { - uint32_t hashCode = EcmaString::ComputeHashcodeUtf8(utf8Data, canBeCompress); + uint32_t hashCode = EcmaString::ComputeHashcodeUtf8(utf8Data, utf8Len, canBeCompress); for (auto it = table_.find(hashCode); it != table_.end(); it++) { auto foundedString = it->second; if (EcmaString::StringsAreEqualUtf8(foundedString, utf8Data, utf8Len, canBeCompress)) { -- Gitee From 85fa51f55ae52d5e3691b08cc9fe084f821314c9 Mon Sep 17 00:00:00 2001 From: bianshaolei Date: Tue, 28 Dec 2021 10:20:19 +0800 Subject: [PATCH 4/6] code align. Signed-off-by: bianshaolei --- ecmascript/ecma_string.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ecmascript/ecma_string.cpp b/ecmascript/ecma_string.cpp index 19bc8785e7..d284bad604 100644 --- a/ecmascript/ecma_string.cpp +++ b/ecmascript/ecma_string.cpp @@ -432,7 +432,8 @@ uint32_t EcmaString::ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len } else { auto utf16Len = base::utf_helper::Utf8ToUtf16Size(utf8Data, utf8Len); CVector tmpBuffer(utf16Len); - [[maybe_unused]] auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, utf8Len, tmpBuffer.data(), utf16Len, 0); + [[maybe_unused]] auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, utf8Len, + tmpBuffer.data(), utf16Len, 0); ASSERT(len == utf16Len); hash = ComputeHashForData(tmpBuffer.data(), utf16Len); } @@ -447,13 +448,13 @@ uint32_t EcmaString::ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t le /* static */ bool EcmaString::IsUtf8EqualsUtf16(const uint8_t *utf8Data, uint32_t utf8Len, - const uint16_t *utf16Data, uint32_t utf16Len) + const uint16_t *utf16Data, uint32_t utf16Len) { // length is one more than compared utf16Data, don't need convert all utf8Data to utf16Data uint32_t utf8ConvertLength = utf16Len + 1; CVector tmpBuffer(utf8ConvertLength); auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, utf8Len, - tmpBuffer.data(), utf8ConvertLength, 0); + tmpBuffer.data(), utf8ConvertLength, 0); if (len != utf16Len) { return false; } -- Gitee From 07b24a9978f0f17f365908d6a16681329a6abdca Mon Sep 17 00:00:00 2001 From: bianshaolei Date: Fri, 31 Dec 2021 10:53:03 +0800 Subject: [PATCH 5/6] fix test: ComputeHashcodeUtf8 fix strlen in L0: include string.h Signed-off-by: bianshaolei --- ecmascript/base/utf_helper.cpp | 1 + ecmascript/tests/ecma_string_test.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ecmascript/base/utf_helper.cpp b/ecmascript/base/utf_helper.cpp index 8dc9aa9ca6..a528c935b4 100644 --- a/ecmascript/base/utf_helper.cpp +++ b/ecmascript/base/utf_helper.cpp @@ -14,6 +14,7 @@ */ #include "ecmascript/base/utf_helper.h" +#include // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) static constexpr int32_t U16_SURROGATE_OFFSET = (0xd800 << 10UL) + 0xdc00 - 0x10000; diff --git a/ecmascript/tests/ecma_string_test.cpp b/ecmascript/tests/ecma_string_test.cpp index f3e39b5b2e..0769dc10f1 100644 --- a/ecmascript/tests/ecma_string_test.cpp +++ b/ecmascript/tests/ecma_string_test.cpp @@ -1647,7 +1647,7 @@ HWTEST_F_L0(EcmaStringTest, ComputeHashcodeUtf8) for (uint32_t i = 0; i < lengthEcmaStrU8; i++) { hashExpect = hashExpect * 31 + arrayU8[i]; } - EXPECT_EQ(EcmaString::ComputeHashcodeUtf8(&arrayU8[0], lengthEcmaStrU8), static_cast(hashExpect)); + EXPECT_EQ(EcmaString::ComputeHashcodeUtf8(&arrayU8[0], lengthEcmaStrU8, false), static_cast(hashExpect)); } /* -- Gitee From dc2b74f48eefe7a2ed9a3cf780e33c9efdf3ee45 Mon Sep 17 00:00:00 2001 From: bianshaolei Date: Fri, 31 Dec 2021 11:01:13 +0800 Subject: [PATCH 6/6] cstring instead of string.h Signed-off-by: bianshaolei --- ecmascript/base/utf_helper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ecmascript/base/utf_helper.cpp b/ecmascript/base/utf_helper.cpp index a528c935b4..1859863fa0 100644 --- a/ecmascript/base/utf_helper.cpp +++ b/ecmascript/base/utf_helper.cpp @@ -14,7 +14,7 @@ */ #include "ecmascript/base/utf_helper.h" -#include +#include // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) static constexpr int32_t U16_SURROGATE_OFFSET = (0xd800 << 10UL) + 0xdc00 - 0x10000; -- Gitee