From d2db6e33c415d8808ca21064f7f480d6c62234f6 Mon Sep 17 00:00:00 2001 From: Aleksandr Emelenko Date: Tue, 15 Aug 2023 16:23:02 +0300 Subject: [PATCH] Save string hash inside MarkWord. Change-Id: I36260e8c552bf73778960f902b87643277f10ab0 Signed-off-by: Aleksandr Emelenko --- runtime/coretypes/string.cpp | 27 +++++++++++-------- runtime/include/coretypes/string.h | 16 +++++++---- runtime/include/object_header-inl.h | 1 + runtime/include/object_header.h | 2 ++ runtime/object_header.cpp | 41 +++++++++++++++++++++++++++++ 5 files changed, 71 insertions(+), 16 deletions(-) diff --git a/runtime/coretypes/string.cpp b/runtime/coretypes/string.cpp index 29e98b5b9..b18e555d8 100644 --- a/runtime/coretypes/string.cpp +++ b/runtime/coretypes/string.cpp @@ -14,6 +14,7 @@ */ #include +#include #include #include @@ -46,7 +47,10 @@ String *String::CreateFromString(String *str, const LanguageContext &ctx, PandaV // retrive str after gc str = str_handle.GetPtr(); - string->hashcode_ = str->hashcode_; + uint32_t str_hashcode = str->GetStringHashCode(); + if (str_hashcode != 0U) { + string->SetHashcode(str_hashcode); + } uint32_t length = str->GetLength(); // After memcpy we should have a full barrier, so this writes should happen-before barrier @@ -73,7 +77,7 @@ String *String::CreateFromMUtf8(const uint8_t *mutf8_data, size_t mutf8_length, return nullptr; } - ASSERT(string->hashcode_ == 0); + ASSERT(string->GetStringHashCode() == 0); // After copying we should have a full barrier, so this writes should happen-before barrier TSAN_ANNOTATE_IGNORE_WRITES_BEGIN(); if (can_be_compressed) { @@ -140,7 +144,7 @@ String *String::CreateFromUtf16(const uint16_t *utf16_data, uint32_t utf16_lengt return nullptr; } - ASSERT(string->hashcode_ == 0); + ASSERT(string->GetStringHashCode() == 0); // After copying we should have a full barrier, so this writes should happen-before barrier TSAN_ANNOTATE_IGNORE_WRITES_BEGIN(); if (can_be_compressed) { @@ -197,7 +201,7 @@ String *String::CreateNewStringFromChars(uint32_t offset, uint32_t length, Array // retrieve src since gc may move it src = reinterpret_cast(ToUintPtr(array_handle->GetData()) + (offset << 1UL)); - ASSERT(string->hashcode_ == 0); + ASSERT(string->GetStringHashCode() == 0); // After copying we should have a full barrier, so this writes should happen-before barrier TSAN_ANNOTATE_IGNORE_WRITES_BEGIN(); if (can_be_compressed) { @@ -235,7 +239,7 @@ String *String::CreateNewStringFromBytes(uint32_t offset, uint32_t length, uint3 // retrieve src since gc may move it src = reinterpret_cast(ToUintPtr(array_handle->GetData()) + offset); - ASSERT(string->hashcode_ == 0); + ASSERT(string->GetStringHashCode() == 0); // After copying we should have a full barrier, so this writes should happen-before barrier TSAN_ANNOTATE_IGNORE_WRITES_BEGIN(); if (can_be_compressed) { @@ -642,7 +646,7 @@ static int32_t ComputeHashForData(const T *data, size_t size) constexpr size_t SHIFT = 5; hash = (hash << SHIFT) - hash + c; } - return static_cast(hash); + return static_cast(hash) & MarkWord::HASH_MASK; } static int32_t ComputeHashForMutf8(const uint8_t *mutf8_data) @@ -652,7 +656,7 @@ static int32_t ComputeHashForMutf8(const uint8_t *mutf8_data) constexpr size_t SHIFT = 5; hash = (hash << SHIFT) - hash + *mutf8_data++; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) } - return static_cast(hash); + return static_cast(hash) & MarkWord::HASH_MASK; } uint32_t String::ComputeHashcode() @@ -724,7 +728,7 @@ String *String::DoReplace(String *src, uint16_t old_c, uint16_t new_c, const Lan // retrieve src after gc src = src_handle.GetPtr(); - ASSERT(string->hashcode_ == 0); + ASSERT(string->GetStringHashCode() == 0); // After replacing we should have a full barrier, so this writes should happen-before barrier TSAN_ANNOTATE_IGNORE_WRITES_BEGIN(); @@ -774,7 +778,7 @@ String *String::FastSubString(String *src, uint32_t start, uint32_t utf16_length // retrieve src after gc src = src_handle.GetPtr(); - ASSERT(string->hashcode_ == 0); + ASSERT(string->GetStringHashCode() == 0); // After copying we should have a full barrier, so this writes should happen-before barrier TSAN_ANNOTATE_IGNORE_WRITES_BEGIN(); @@ -817,7 +821,7 @@ String *String::Concat(String *string1, String *string2, const LanguageContext & return nullptr; } - ASSERT(new_string->hashcode_ == 0); + ASSERT(new_string->GetStringHashCode() == 0); // retrieve strings after gc string1 = str1_handle.GetPtr(); @@ -872,7 +876,8 @@ String *String::AllocStringObject(size_t length, bool compressed, const Language // After setting length we should have a full barrier, so this write should happens-before barrier TSAN_ANNOTATE_IGNORE_WRITES_BEGIN(); string->SetLength(length, compressed); - string->SetHashcode(0); + ASSERT(string->GetStringHashCode() == 0U); + // string->SetHashcode(0); TSAN_ANNOTATE_IGNORE_WRITES_END(); // Witout full memory barrier it is possible that architectures with weak memory order can try fetching string // legth before it's set diff --git a/runtime/include/coretypes/string.h b/runtime/include/coretypes/string.h index c7af18674..4f82bd546 100644 --- a/runtime/include/coretypes/string.h +++ b/runtime/include/coretypes/string.h @@ -222,10 +222,16 @@ public: uint32_t GetHashcode() { - if (hashcode_ == 0) { - hashcode_ = ComputeHashcode(); + (void)none_; + uint32_t hashcode = this->GetStringHashCode(); + if (hashcode == 0) { + hashcode = ComputeHashcode(); + this->SaveStringHashCode(hashcode); + } else { + ASSERT(hashcode == ComputeHashcode()); } - return hashcode_; + ASSERT((hashcode & (~MarkWord::HASH_MASK)) == 0U); + return hashcode; } int32_t IndexOf(String *rhs, int pos = 0); @@ -293,7 +299,7 @@ protected: void SetHashcode(uint32_t hashcode) { - hashcode_ = hashcode; + this->SaveStringHashCode(hashcode); } uint32_t ComputeHashcode(); @@ -342,7 +348,7 @@ private: // In last bit of length_ we store if this string is compressed or not. uint32_t length_; - uint32_t hashcode_; + uint32_t none_ {0}; // A pointer to the string data stored after the string header. // Data can be stored in mutf8 or utf16 form according to compressed bit. __extension__ uint16_t data_utf16_[0]; // NOLINT(modernize-avoid-c-arrays) diff --git a/runtime/include/object_header-inl.h b/runtime/include/object_header-inl.h index 12b21fde2..36b8c067b 100644 --- a/runtime/include/object_header-inl.h +++ b/runtime/include/object_header-inl.h @@ -15,6 +15,7 @@ #ifndef PANDA_RUNTIME_OBJECT_HEADER_INL_H_ #define PANDA_RUNTIME_OBJECT_HEADER_INL_H_ +#include #include "runtime/include/class-inl.h" #include "runtime/include/field.h" #include "runtime/include/object_accessor-inl.h" diff --git a/runtime/include/object_header.h b/runtime/include/object_header.h index 589b6352f..c46499fae 100644 --- a/runtime/include/object_header.h +++ b/runtime/include/object_header.h @@ -121,6 +121,8 @@ public: template uint32_t GetHashCode(); uint32_t GetHashCodeFromMonitor(Monitor *monitor_p); + void SaveStringHashCode(uint32_t hash); + uint32_t GetStringHashCode(); // Size of object header static constexpr size_t ObjectHeaderSize() diff --git a/runtime/object_header.cpp b/runtime/object_header.cpp index c227e180b..38e9d7c0f 100644 --- a/runtime/object_header.cpp +++ b/runtime/object_header.cpp @@ -85,6 +85,47 @@ uint32_t ObjectHeader::GetHashCodeFromMonitor(Monitor *monitor_p) return monitor_p->GetHashCode(); } +void ObjectHeader::SaveStringHashCode(uint32_t hash) +{ + auto mark = GetMark(); + switch (mark.GetState()) { + case MarkWord::STATE_UNLOCKED: { + mark = mark.DecodeFromHash(hash); + ASSERT(mark.GetState() == MarkWord::STATE_HASHED); + SetMark(mark); + return; + } + case MarkWord::STATE_HASHED: { + [[maybe_unused]] uint32_t saved_hash = mark.GetHash(); + // DEBUG CERR + if (saved_hash != hash) { + std::cerr << "saved_hash " << std::hex << saved_hash << "; hash " << hash << std::endl; + } + ASSERT(saved_hash == hash); + return; + } + default: + LOG(FATAL, RUNTIME) << "Error on SaveStringHashCode(): invalid state"; + return; + } +} + +uint32_t ObjectHeader::GetStringHashCode() +{ + auto mark = GetMark(); + switch (mark.GetState()) { + case MarkWord::STATE_UNLOCKED: { + return 0U; + } + case MarkWord::STATE_HASHED: { + return mark.GetHash(); + } + default: + LOG(FATAL, RUNTIME) << "Error on SaveStringHashCode(): invalid state"; + return 0U; + } +} + uint32_t ObjectHeader::GetHashCodeMTSingle() { auto mark = GetMark(); -- Gitee