diff --git a/BUILD.gn b/BUILD.gn index 042bdd5acfe84236ddf5532cfe8110fccf5a6e5e..caae009e7d258763017a53f1f0325796ef02760e 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -339,6 +339,7 @@ ecma_source = [ "ecmascript/mem/c_string.cpp", "ecmascript/mem/chunk.cpp", "ecmascript/mem/compress_collector.cpp", + "ecmascript/mem/concurrent_marker.cpp", "ecmascript/mem/concurrent_sweeper.cpp", "ecmascript/mem/ecma_heap_manager.cpp", "ecmascript/mem/free_object_kind.cpp", @@ -347,10 +348,10 @@ ecma_source = [ "ecmascript/mem/heap.cpp", "ecmascript/mem/mem_controller.cpp", "ecmascript/mem/old_space_collector.cpp", + "ecmascript/mem/parallel_work_helper.cpp", "ecmascript/mem/region_factory.cpp", "ecmascript/mem/semi_space_collector.cpp", "ecmascript/mem/semi_space_marker.cpp", - "ecmascript/mem/semi_space_worker.cpp", "ecmascript/mem/space.cpp", "ecmascript/mem/tagged_object.cpp", "ecmascript/mem/verification.cpp", diff --git a/ecmascript/ecma_vm.cpp b/ecmascript/ecma_vm.cpp index 7d5de2b91a12440d55cb65e802131e2497025e67..782d068a6ec52eadf7f7a4e338709dd9ea3f898c 100644 --- a/ecmascript/ecma_vm.cpp +++ b/ecmascript/ecma_vm.cpp @@ -33,6 +33,7 @@ #include "ecmascript/js_for_in_iterator.h" #include "ecmascript/js_invoker.h" #include "ecmascript/js_thread.h" +#include "ecmascript/mem/concurrent_marker.h" #include "ecmascript/mem/heap.h" #include "ecmascript/tagged_dictionary.h" #include "ecmascript/object_factory.h" diff --git a/ecmascript/interpreter/interpreter-inl.h b/ecmascript/interpreter/interpreter-inl.h index ad41ab53773fbfafc1d004be10297c2c21094e52..de0aec204926fc60f9716efc660869dbcc011531 100644 --- a/ecmascript/interpreter/interpreter-inl.h +++ b/ecmascript/interpreter/interpreter-inl.h @@ -27,6 +27,7 @@ #include "ecmascript/js_generator_object.h" #include "ecmascript/js_tagged_value.h" #include "ecmascript/literal_data_extractor.h" +#include "ecmascript/mem/concurrent_marker.h" #include "ecmascript/runtime_call_id.h" #include "ecmascript/template_string.h" #include "ecmascript/vmstat/runtime_stat.h" @@ -3314,6 +3315,10 @@ bool EcmaInterpreter::UpdateHotnessCounter(JSThread* thread, TaggedType *sp, JST hotnessCounter += offset; if (UNLIKELY(hotnessCounter <= 0)) { + if (thread->IsConcurrentMarkingFinished()) { + auto marker = thread->GetEcmaVM()->GetHeap()->GetConcurrentMarker(); + marker->HandleGCRequest(); + } if (state->profileTypeInfo == JSTaggedValue::Undefined()) { state->acc = acc; auto numVregs = method->GetNumVregs(); diff --git a/ecmascript/js_thread.h b/ecmascript/js_thread.h index 9edc94579a5dff36f247faa1f3cecbf0c84e9fb6..29e7b03ac1a47497ef2196ad43f1dd00b2144feb 100644 --- a/ecmascript/js_thread.h +++ b/ecmascript/js_thread.h @@ -29,8 +29,17 @@ class EcmaVM; class RegionFactory; class InternalCallParams; +enum class ConcurentMarkingStatus : uint8_t +{ + INITIALIZED, + CONCURRENT_MARKING, + FINISHED +}; + class JSThread : public ManagedThread { public: + static constexpr int CONCURRENT_MARKING_BITFIELD_NUM = 2; + using ConcurrentMarkingStatusBits = BitField; using Address = uintptr_t; static JSThread *Cast(ManagedThread *thread) { @@ -199,6 +208,29 @@ public: void IterateWeakEcmaGlobalStorage(const WeakRootVisitor &visitor); + void SetConcurrentMarkingStatus(ConcurentMarkingStatus status) + { + uint64_t newVal = ConcurrentMarkingStatusBits::Update(threadStatusBitField_.load(std::memory_order_relaxed), status); + threadStatusBitField_.store(newVal, std::memory_order_release); + } + + bool IsConcurrentMarkingInitialized() const + { + auto status = ConcurrentMarkingStatusBits::Decode(threadStatusBitField_.load(std::memory_order_acquire)); + return status == ConcurentMarkingStatus::INITIALIZED; + } + + bool IsConcurrentMarking() const + { + auto status = ConcurrentMarkingStatusBits::Decode(threadStatusBitField_.load(std::memory_order_acquire)); + return status == ConcurentMarkingStatus::CONCURRENT_MARKING; + } + + bool IsConcurrentMarkingFinished() const + { + auto status = ConcurrentMarkingStatusBits::Decode(threadStatusBitField_.load(std::memory_order_acquire)); + return status == ConcurentMarkingStatus::FINISHED; + } private: NO_COPY_SEMANTIC(JSThread); NO_MOVE_SEMANTIC(JSThread); @@ -233,6 +265,7 @@ private: bool stableArrayElementsGuardians_ {true}; GlobalEnvConstants globalConst_; // Place-Holder InternalCallParams *internalCallParams_ {nullptr}; + std::atomic threadStatusBitField_ {0ULL}; friend class EcmaHandleScope; friend class GlobalHandleCollection; diff --git a/ecmascript/mem/barriers-inl.h b/ecmascript/mem/barriers-inl.h index 494474386f8761d62ac80f4871f2c783bb4778c0..2251b268f8312ce1af30b6f214d2ae26b15b7b3a 100644 --- a/ecmascript/mem/barriers-inl.h +++ b/ecmascript/mem/barriers-inl.h @@ -24,14 +24,16 @@ namespace panda::ecmascript { static inline void MarkingBarrier(void *obj, size_t offset, JSTaggedType value) { - ASSERT(value != JSTaggedValue::VALUE_UNDEFINED); - Region *object_region = Region::ObjectAddressToRange(static_cast(obj)); - Region *value_region = Region::ObjectAddressToRange(reinterpret_cast(value)); - if (!object_region->InYoungGeneration() && value_region->InYoungGeneration()) { - [[maybe_unused]] uintptr_t slot_addr = ToUintPtr(obj) + offset; + Region *objectRegion = Region::ObjectAddressToRange(static_cast(obj)); + Region *valueRegion = Region::ObjectAddressToRange(reinterpret_cast(value)); + if (!objectRegion->InYoungGeneration() && valueRegion->InYoungGeneration()) { + uintptr_t slotAddr = ToUintPtr(obj) + offset; // Should align with '8' in 64 and 32 bit platform - ASSERT((slot_addr % static_cast(MemAlignment::MEM_ALIGN_OBJECT)) == 0); - object_region->InsertOldToNewRememberedSet(slot_addr); + objectRegion->InsertOldToNewRememberedSet(slotAddr); + } + if (objectRegion->IsConcurrentMarking()) { + uintptr_t slotAddr = ToUintPtr(obj) + offset; + objectRegion->InsertReferenceSet(slotAddr); } } diff --git a/ecmascript/mem/compress_collector.cpp b/ecmascript/mem/compress_collector.cpp index 8fd14bbb395dbcae4ba6ec0a64061db65937c143..e3dd08946d7ae320e56ccde5b9159637da951072 100644 --- a/ecmascript/mem/compress_collector.cpp +++ b/ecmascript/mem/compress_collector.cpp @@ -30,7 +30,8 @@ namespace panda::ecmascript { CompressCollector::CompressCollector(Heap *heap, bool parallelGc) : heap_(heap), paralledGC_(parallelGc), marker_(this), rootManager_(heap->GetEcmaVM()) { - workList_ = new CompressGCWorker(heap_, heap_->GetThreadPool()->GetThreadNum()); + workList_ = new WorkerHelper(heap_, Platform::GetCurrentPlatform()->GetTotalThreadNum() + 1, + ParallelGCTaskPhase::COMPRESS_HANDLE_GLOBAL_POOL_TASK); } CompressCollector::~CompressCollector() @@ -58,7 +59,7 @@ void CompressCollector::RunPhases() void CompressCollector::InitializePhase() { - heap_->GetThreadPool()->WaitTaskFinish(); + heap_->WaitRunningTaskFinished(); heap_->GetSweeper()->EnsureAllTaskFinish(); auto compressSpace = const_cast(heap_->GetCompressSpace()); if (compressSpace->GetCommittedSize() == 0) { @@ -93,7 +94,7 @@ void CompressCollector::InitializePhase() heap_->FlipCompressSpace(); heap_->FlipNewSpace(); - workList_->Initialize(); + workList_->Initialize(TriggerGCType::COMPRESS_FULL_GC); youngAndOldAliveSize_ = 0; nonMoveSpaceFreeSize_ = 0; youngSpaceCommitSize_ = heap_->GetFromSpace()->GetCommittedSize(); @@ -105,11 +106,7 @@ void CompressCollector::FinishPhase() { // swap if (paralledGC_) { - heap_->GetThreadPool()->Submit([this]([[maybe_unused]] uint32_t threadId) -> bool { - const_cast(heap_->GetCompressSpace())->ReclaimRegions(); - const_cast(heap_->GetFromSpace())->ReclaimRegions(); - return true; - }); + heap_->PostParallelGCTask(ParallelGCTaskPhase::COMPRESS_HANDLE_RECLIAM_REGION_TASK); } else { const_cast(heap_->GetCompressSpace())->ReclaimRegions(); const_cast(heap_->GetFromSpace())->ReclaimRegions(); @@ -140,9 +137,7 @@ void CompressCollector::MarkingPhase() rootManager_.VisitVMRoots(gcMarkYoung, gcMarkRangeYoung); ProcessMarkStack(0); - if (paralledGC_) { - heap_->GetThreadPool()->WaitTaskFinish(); - } + heap_->WaitRunningTaskFinished(); } void CompressCollector::ProcessMarkStack(uint32_t threadId) @@ -177,7 +172,8 @@ void CompressCollector::SweepPhases() { trace::ScopedTrace scoped_trace("CompressCollector::SweepPhases"); // process weak reference - for (uint32_t i = 0; i < heap_->GetThreadPool()->GetThreadNum(); i++) { + auto totalThreadCount = Platform::GetCurrentPlatform()->GetTotalThreadNum() + 1; // gc thread and main thread + for (uint32_t i = 0; i < totalThreadCount; i++) { ProcessQueue *queue = workList_->GetWeakReferenceQueue(i); while (true) { @@ -221,8 +217,7 @@ void CompressCollector::SweepPhases() MarkWord markWord(header); if (markWord.IsForwardingAddress()) { - TaggedObject *dst = markWord.ToForwardingAddress(); - return dst; + return markWord.ToForwardingAddress(); } return reinterpret_cast(ToUintPtr(nullptr)); }; diff --git a/ecmascript/mem/compress_collector.h b/ecmascript/mem/compress_collector.h index 8cfb405ed12ae38dd66b517404d5e390ae5bcdb3..6e6c8c79dba70f6ed39d936494ed5fc42c8d9980 100644 --- a/ecmascript/mem/compress_collector.h +++ b/ecmascript/mem/compress_collector.h @@ -17,8 +17,8 @@ #define ECMASCRIPT_MEM_COMPRESS_COLLECTOR_H #include "ecmascript/mem/compress_gc_marker.h" +#include "ecmascript/mem/parallel_work_helper.h" #include "ecmascript/mem/semi_space_collector.h" -#include "ecmascript/mem/semi_space_worker.h" namespace panda { namespace ecmascript { @@ -57,7 +57,7 @@ private: bool paralledGC_; CompressGCMarker marker_; HeapRootManager rootManager_; - CompressGCWorker *workList_; + WorkerHelper *workList_; os::memory::Mutex mtx_; BumpPointerAllocator fromSpaceAllocator_{}; FreeListAllocator oldSpaceAllocator_{}; @@ -70,7 +70,8 @@ private: friend class TlabAllocator; friend class CompressGCMarker; - friend class CompressGCWorker; + friend class WorkerHelper; + friend class Heap; }; } // namespace ecmascript } // namespace panda diff --git a/ecmascript/mem/concurrent_marker.cpp b/ecmascript/mem/concurrent_marker.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3ad406028408345dd66199ee017e16749822e1f9 --- /dev/null +++ b/ecmascript/mem/concurrent_marker.cpp @@ -0,0 +1,245 @@ +/* + * Copyright (c) 2021 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ecmascript/mem/concurrent_marker.h" + +#include "ecmascript/mem/allocator-inl.h" +#include "ecmascript/mem/heap-inl.h" +#include "ecmascript/mem/heap_roots-inl.h" +#include "ecmascript/platform/platform.h" + +#include "ecmascript/mem/verification.h" + +#include "os/mutex.h" + +namespace panda::ecmascript { +ConcurrentMarker::ConcurrentMarker(Heap *heap) : heap_(heap), rootManager_(heap_->GetEcmaVM()) +{ + workList_ = new WorkerHelper(heap_, Platform::GetCurrentPlatform()->GetTotalThreadNum() + 1, + ParallelGCTaskPhase::CONCURRENT_HANDLE_GLOBAL_POOL_TASK); +} + +void ConcurrentMarker::ConcurrentMarking() +{ + InitializeMarking(); + heap_->GetEcmaVM()->GetJSThread()->SetConcurrentMarkingStatus(ConcurentMarkingStatus::CONCURRENT_MARKING); + Platform::GetCurrentPlatform()->PostTask(std::make_unique(this)); +} + +void ConcurrentMarker::SweepPhases(WorkerHelper *worklist) +{ + auto totalThreadCount = Platform::GetCurrentPlatform()->GetTotalThreadNum() + 1; // gc thread and main thread + for (uint32_t i = 0; i < totalThreadCount; i++) { + ProcessQueue *queue = worklist->GetWeakReferenceQueue(i); + while (true) { + auto obj = queue->PopBack(); + if (UNLIKELY(obj == nullptr)) { + break; + } + ObjectSlot slot(ToUintPtr(obj)); + JSTaggedValue value(slot.GetTaggedType()); + + if (value.IsHole()) { + continue; + } + auto header = value.GetTaggedWeakRef(); + + Region *objectRegion = Region::ObjectAddressToRange(header); + auto markBitmap = objectRegion->GetMarkBitmap(); + if (!markBitmap->Test(header)) { + slot.Update(static_cast(JSTaggedValue::Undefined().GetRawData())); + } + } + } + + auto stringTable = heap_->GetEcmaVM()->GetEcmaStringTable(); + WeakRootVisitor gcUpdateWeak = [](TaggedObject *header) { + Region *objectRegion = Region::ObjectAddressToRange(reinterpret_cast(header)); + if (objectRegion->InYoungGeneration()) { + return header; + } + + auto markBitmap = objectRegion->GetMarkBitmap(); + if (markBitmap->Test(header)) { + return header; + } + return reinterpret_cast(ToUintPtr(nullptr)); + }; + stringTable->SweepWeakReference(gcUpdateWeak); + heap_->GetEcmaVM()->GetJSThread()->IterateWeakEcmaGlobalStorage(gcUpdateWeak); + heap_->GetEcmaVM()->ProcessReferences(gcUpdateWeak); + + heap_->GetSweeper()->SweepPhases(); +} + +void ConcurrentMarker::FinishPhase(WorkerHelper *worklist) +{ + size_t aliveSize = 0; + worklist->Finish(aliveSize); +} + +void ConcurrentMarker::ReMarking(WorkerHelper *worklist) +{ + MarkRoots(worklist); + heap_->EnumerateRegions([this, &worklist](Region *current) { + auto referenceSet = current->GetReferenceSet(); + if (referenceSet == nullptr) { + return; + } + referenceSet->IterateOverMarkedChunks([this, &worklist](void *mem) -> bool { + ObjectSlot slot(ToUintPtr(mem)); + JSTaggedValue value(slot.GetTaggedType()); + if (value.IsWeak()) { + RecordWeakReference(worklist, 0, reinterpret_cast(slot.SlotAddress())); + } else if (value.IsHeapObject()) { + MarkObject(worklist, 0, value.GetTaggedObject()); + } + return true; + }); + }); + ParallelMarkStack(worklist, 0); +} + +void ConcurrentMarker::MarkRoots(WorkerHelper *worklist) +{ + RootVisitor gcMark = [this, &worklist]([[maybe_unused]] Root type, ObjectSlot slot) { + JSTaggedValue value(slot.GetTaggedType()); + if (value.IsHeapObject()) { + MarkObject(worklist, 0, value.GetTaggedObject()); + } + }; + RootRangeVisitor gcMarkRange = [this, &worklist]([[maybe_unused]] Root type, ObjectSlot start, ObjectSlot end) { + for (ObjectSlot slot = start; slot < end; slot++) { + JSTaggedValue value(slot.GetTaggedType()); + if (value.IsHeapObject()) { + MarkObject(worklist, 0, value.GetTaggedObject()); + } + } + }; + HeapRootManager rootManager(heap_->GetEcmaVM()); + rootManager.VisitVMRoots(gcMark, gcMarkRange); +} + +void ConcurrentMarker::HandleGCRequest() // js-thread wait for sweep +{ + os::memory::LockHolder lock(waitMarkingFinishedMutex_); + if (notifyMarkingFinished_) { + heap_->CollectGarbage(TriggerGCType::OLD_GC); + } +} + +void ConcurrentMarker::WaitConcurrentMarkingFinished() // call in EcmaVm thread, wait for mark finished +{ + os::memory::LockHolder lock(waitMarkingFinishedMutex_); + if (!notifyMarkingFinished_) { + vmThreadWaitMarkingFinished_ = true; + waitMarkingFinishedCV_.Wait(&waitMarkingFinishedMutex_); + } +} + +void ConcurrentMarker::Reset() +{ + heap_->GetEcmaVM()->GetJSThread()->SetConcurrentMarkingStatus(ConcurentMarkingStatus::INITIALIZED); + notifyMarkingFinished_ = false; +} + +// -------------------- privete method ------------------------------------------ +void ConcurrentMarker::InitializeMarking() +{ + heap_->WaitRunningTaskFinished(); + heap_->GetSweeper()->EnsureAllTaskFinish(); + heap_->EnumerateRegions([](Region *current) { + // ensure mark bitmap + auto bitmap = current->GetMarkBitmap(); + if (bitmap == nullptr) { + current->GetOrCreateMarkBitmap(); + } else { + bitmap->ClearAllBits(); + } + auto referenceSet = current->GetReferenceSet(); + if (referenceSet == nullptr) { + current->GetOrCreateReferenceSet(); + } else { + referenceSet->ClearAllBits(); + } + current->SetConcurrentMarking(true); + }); + workList_->Initialize(TriggerGCType::OLD_GC); + MarkRoots(workList_); +} + +void ConcurrentMarker::MarkObject(WorkerHelper *worklist, uint32_t threadId, TaggedObject *object) +{ + Region *objectRegion = Region::ObjectAddressToRange(object); + + auto markBitmap = objectRegion->GetOrCreateMarkBitmap(); + if (!markBitmap->AtomicTestAndSet(object)) { + worklist->Push(threadId, object); + } +} + +void ConcurrentMarker::RecordWeakReference(WorkerHelper *worklist, uint32_t threadId, JSTaggedType *ref) +{ + worklist->PushWeakReference(threadId, ref); +} + +void ConcurrentMarker::ParallelMarkStack(WorkerHelper *worklist, uint32_t threadId) +{ + while (true) { + TaggedObject *obj = nullptr; + if (!worklist->Pop(threadId, &obj)) { + break; + } + auto jsHclass = obj->GetClass(); + // mark dynClass + MarkObject(worklist, threadId, jsHclass); + + HeapRootManager rootManager(heap_->GetEcmaVM()); + rootManager.MarkObjectBody(obj, jsHclass, + [this, &threadId, &worklist]([[maybe_unused]] TaggedObject *root, ObjectSlot start, ObjectSlot end) { + for (ObjectSlot slot = start; slot < end; slot++) { + JSTaggedValue value(slot.GetTaggedType()); + if (value.IsWeak()) { + RecordWeakReference(worklist, threadId, reinterpret_cast(slot.SlotAddress())); + continue; + } + if (value.IsHeapObject()) { + MarkObject(worklist, threadId, value.GetTaggedObject()); + } + } + }); + } +} + +bool ConcurrentMarker::ConcurrentMarkerTask::Run(uint32_t threadId) +{ + marker_->ParallelMarkStack(marker_->workList_, 0); + marker_->heap_->WaitRunningTaskFinished(); + marker_->MarkingFinished(); + return true; +} + +void ConcurrentMarker::MarkingFinished() +{ + heap_->GetEcmaVM()->GetJSThread()->SetConcurrentMarkingStatus(ConcurentMarkingStatus::FINISHED); + os::memory::LockHolder lock(waitMarkingFinishedMutex_); + if (vmThreadWaitMarkingFinished_) { + waitMarkingFinishedCV_.Signal(); + vmThreadWaitMarkingFinished_ = false; + } else { + notifyMarkingFinished_ = true; + } +} +} // namespace panda::ecmascript diff --git a/ecmascript/mem/concurrent_marker.h b/ecmascript/mem/concurrent_marker.h new file mode 100644 index 0000000000000000000000000000000000000000..43b0176a7a326a08dff26cc7e4a0123f14b8f05b --- /dev/null +++ b/ecmascript/mem/concurrent_marker.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2021 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ECMASCRIPT_MEM_CONCURRENT_MARKER_H +#define ECMASCRIPT_MEM_CONCURRENT_MARKER_H + +#include +#include + +#include "ecmascript/mem/heap_roots.h" +#include "ecmascript/mem/parallel_work_helper.h" +#include "ecmascript/mem/space.h" +#include "ecmascript/platform/task.h" + +#include "os/mutex.h" + +namespace panda::ecmascript { +class Heap; + +class ConcurrentMarker { +public: + ConcurrentMarker(Heap *heap); + ~ConcurrentMarker() = default; + + NO_COPY_SEMANTIC(ConcurrentMarker); + NO_MOVE_SEMANTIC(ConcurrentMarker); + + void ConcurrentMarking(); + void SweepPhases(WorkerHelper *worklist); + void FinishPhase(WorkerHelper *worklist); + void ReMarking(WorkerHelper *worklist); + void MarkRoots(WorkerHelper *worklist); + // Parallel GC threadID starts with 1. 0 is main threadId + void ParallelMarkStack(WorkerHelper *worklist, uint32_t threadId); + + void HandleGCRequest(); // call in vm thread. + void WaitConcurrentMarkingFinished(); // call in main thread + void Reset(); + + WorkerHelper *GetWorklist() const + { + return workList_; + } +private: + + class ConcurrentMarkerTask : public Task { + public: + ConcurrentMarkerTask(ConcurrentMarker *marker) : marker_(marker) {} + ~ConcurrentMarkerTask() override = default; + bool Run(uint32_t threadId) override; + + NO_COPY_SEMANTIC(ConcurrentMarkerTask); + NO_MOVE_SEMANTIC(ConcurrentMarkerTask); + + private: + ConcurrentMarker *marker_ {nullptr}; + }; + + void InitializeMarking(); + void MarkingFinished(); + void MarkObject(WorkerHelper *worklist, uint32_t threadId, TaggedObject *object); + void RecordWeakReference(WorkerHelper *worklist, uint32_t threadId, JSTaggedType *ref); + + Heap *heap_ {nullptr}; + WorkerHelper *workList_ {nullptr}; + HeapRootManager rootManager_; + + bool notifyMarkingFinished_ {false}; // notify js-thread that marking is finished and need sweep + bool vmThreadWaitMarkingFinished_ {false}; // jsMainThread waiting for concurrentGC FINISHED + os::memory::Mutex waitMarkingFinishedMutex_; + os::memory::ConditionVariable waitMarkingFinishedCV_; +}; +} // namespace panda::ecmascript +#endif // ECMASCRIPT_MEM_CONCURRENT_MARKER_H \ No newline at end of file diff --git a/ecmascript/mem/concurrent_sweeper.cpp b/ecmascript/mem/concurrent_sweeper.cpp index 59068857bba9055edfdbe321fee2e324ff5a5595..0bf00bd61d5f00cf8acb97039b2ac48c84dcecec 100644 --- a/ecmascript/mem/concurrent_sweeper.cpp +++ b/ecmascript/mem/concurrent_sweeper.cpp @@ -95,7 +95,10 @@ void ConcurrentSweeper::SweepSpace(MemSpaceType type, bool isMain) void ConcurrentSweeper::SweepSpace(Space *space, FreeListAllocator &allocator) { allocator.RebuildFreeList(); - space->EnumerateRegions([this, &allocator](Region *current) { FreeRegion(current, allocator); }); + space->EnumerateRegions([this, &allocator](Region *current) { + current->SetConcurrentMarking(false); + FreeRegion(current, allocator); + }); } void ConcurrentSweeper::SweepHugeSpace() @@ -105,6 +108,7 @@ void ConcurrentSweeper::SweepHugeSpace() Region *currentRegion = space->GetRegionList().GetFirst(); while (currentRegion != nullptr) { + currentRegion->SetConcurrentMarking(false); Region *next = currentRegion->GetNext(); auto markBitmap = currentRegion->GetMarkBitmap(); bool isMarked = false; @@ -235,7 +239,7 @@ void ConcurrentSweeper::FinishSweeping(MemSpaceType type) } } -bool ConcurrentSweeper::SweeperTask::Run() +bool ConcurrentSweeper::SweeperTask::Run(uint32_t threadIndex) { int sweepTypeNum = FREE_LIST_NUM - sweeper_->startSpaceType_; for (size_t i = sweeper_->startSpaceType_; i < FREE_LIST_NUM; i++) { diff --git a/ecmascript/mem/concurrent_sweeper.h b/ecmascript/mem/concurrent_sweeper.h index b24b5a65f5e21ddb40d65bcafeef5706c90bcc48..b52ae85a5dc9edb9c7bdc0b72ae1188a23c8a9b0 100644 --- a/ecmascript/mem/concurrent_sweeper.h +++ b/ecmascript/mem/concurrent_sweeper.h @@ -52,7 +52,7 @@ private: public: SweeperTask(ConcurrentSweeper *sweeper, MemSpaceType type) : sweeper_(sweeper), type_(type) {}; ~SweeperTask() override = default; - bool Run() override; + bool Run(uint32_t threadIndex) override; NO_COPY_SEMANTIC(SweeperTask); NO_MOVE_SEMANTIC(SweeperTask); diff --git a/ecmascript/mem/ecma_heap_manager.h b/ecmascript/mem/ecma_heap_manager.h index 82ec7bc71d7a14cf1e2c6ceadc288ecc5ee7963e..a8b2c8c245e7e595d39e121f14410705834a3b13 100644 --- a/ecmascript/mem/ecma_heap_manager.h +++ b/ecmascript/mem/ecma_heap_manager.h @@ -33,7 +33,6 @@ public: inline TaggedObject *AllocateYoungGenerationOrHugeObject(JSHClass *hclass); inline TaggedObject *TryAllocateYoungGeneration(size_t size); inline TaggedObject *AllocateYoungGenerationOrHugeObject(JSHClass *hclass, size_t size); - inline TaggedObject *AllocateNonMovableOrHugeObject(JSHClass *hclass, size_t size); inline TaggedObject *AllocateNonMovableOrHugeObject(JSHClass *hclass); inline TaggedObject *AllocateHugeObject(JSHClass *hclass, size_t size); diff --git a/ecmascript/mem/heap-inl.h b/ecmascript/mem/heap-inl.h index a459e3e372f66fa70142dabc559fbd60b0ab93ed..40b91abc956c8d501d712a5306ce892a46980bb2 100644 --- a/ecmascript/mem/heap-inl.h +++ b/ecmascript/mem/heap-inl.h @@ -17,6 +17,7 @@ #define ECMASCRIPT_MEM_HEAP_INL_H #include "ecmascript/mem/heap.h" + #include "ecmascript/mem/mem_controller.h" #include "ecmascript/mem/space.h" #include "ecmascript/hprof/heap_tracker.h" diff --git a/ecmascript/mem/heap.cpp b/ecmascript/mem/heap.cpp index 76bfc94b4c6795ef08d2fa696bb6e4e049d1d9bf..55f0991fc461542ef820afdb90676f2dde84d4cb 100644 --- a/ecmascript/mem/heap.cpp +++ b/ecmascript/mem/heap.cpp @@ -20,13 +20,14 @@ #include "ecmascript/ecma_vm.h" #include "ecmascript/mem/assert_scope-inl.h" #include "ecmascript/mem/compress_collector.h" +#include "ecmascript/mem/concurrent_marker.h" #include "ecmascript/mem/concurrent_sweeper.h" #include "ecmascript/mem/ecma_heap_manager.h" #include "ecmascript/mem/mark_stack.h" #include "ecmascript/mem/mem_controller.h" #include "ecmascript/mem/old_space_collector.h" +#include "ecmascript/mem/parallel_work_helper.h" #include "ecmascript/mem/semi_space_collector.h" -#include "ecmascript/mem/semi_space_worker.h" #include "ecmascript/mem/verification.h" namespace panda::ecmascript { @@ -56,21 +57,12 @@ void Heap::Initialize() machineCodeSpace_->Initialize(); hugeObjectSpace_ = new HugeObjectSpace(this); bool paralledGc = ecmaVm_->GetOptions().IsEnableParalledYoungGc(); - if (paralledGc) { - int numOfCpuCore = get_nprocs(); - int numThread = std::min(numOfCpuCore, THREAD_NUM_FOR_YOUNG_GC); - pool_ = new ThreadPool(numThread); - semiSpaceCollector_ = new SemiSpaceCollector(this, true); - compressCollector_ = new CompressCollector(this, true); - oldSpaceCollector_ = new OldSpaceCollector(this, true); - } else { - pool_ = new ThreadPool(1); - semiSpaceCollector_ = new SemiSpaceCollector(this, false); - compressCollector_ = new CompressCollector(this, false); - oldSpaceCollector_ = new OldSpaceCollector(this, false); - } + semiSpaceCollector_ = new SemiSpaceCollector(this, paralledGc); + compressCollector_ = new CompressCollector(this, paralledGc); + oldSpaceCollector_ = new OldSpaceCollector(this, paralledGc); // After EcmaOptions merged, it will modified to EcmaOptions configuration sweeper_ = new ConcurrentSweeper(this, true); + concurrentMarker_ = new ConcurrentMarker(this); } void Heap::FlipNewSpace() @@ -88,7 +80,7 @@ void Heap::FlipCompressSpace() } void Heap::Destroy() { - pool_->WaitTaskFinish(); + WaitRunningTaskFinished(); sweeper_->EnsureAllTaskFinish(); toSpace_->Destroy(); delete toSpace_; @@ -125,10 +117,10 @@ void Heap::Destroy() regionFactory_ = nullptr; delete memController_; memController_ = nullptr; - delete pool_; - pool_ = nullptr; delete sweeper_; sweeper_ = nullptr; + delete concurrentMarker_; + concurrentMarker_ = nullptr; } void Heap::CollectGarbage(TriggerGCType gcType) @@ -144,6 +136,7 @@ void Heap::CollectGarbage(TriggerGCType gcType) } } } + switch (gcType) { case TriggerGCType::SEMI_GC: if (GetMemController()->IsInAppStartup()) { @@ -156,6 +149,9 @@ void Heap::CollectGarbage(TriggerGCType gcType) semiSpaceCollector_->RunPhases(); } } + if (ConcurrentMarkingEnable()) { + TryTriggerConcurrentMarking(); + } break; case TriggerGCType::OLD_GC: oldSpaceCollector_->RunPhases(); @@ -229,6 +225,19 @@ void Heap::RecomputeLimits() oldSpaceAllocLimit_ = newOldSpaceLimit; } +void Heap::TriggerConcurrentMarking() +{ + concurrentMarker_->ConcurrentMarking(); +} + +void Heap::TryTriggerConcurrentMarking() +{ + if (ecmaVm_->GetJSThread()->IsConcurrentMarkingInitialized() && + oldSpace_->GetHeapObjectSize() >= 0.8 * oldSpaceAllocLimit_) { + TriggerConcurrentMarking(); + } +} + bool Heap::CheckAndTriggerOldGC() { if ((oldSpace_->GetCommittedSize() + hugeObjectSpace_->GetCommittedSize()) <= oldSpaceAllocLimit_) { @@ -255,4 +264,88 @@ bool Heap::CheckAndTriggerNonMovableGC() CollectGarbage(TriggerGCType::NON_MOVE_GC); return true; } + +void Heap::WaitRunningTaskFinished() +{ + os::memory::LockHolder holder(waitTaskFinishedMutex_); + while (runningTastCount_ > 0) { + waitTaskFinishedCV_.Wait(&waitTaskFinishedMutex_); + } +} + +void Heap::WaitConcurrentMarkingFinished() +{ + concurrentMarker_->WaitConcurrentMarkingFinished(); +} + +void Heap::SetConcurrentMarkingEnable(bool flag) +{ + concurrentMarkingEnable_ = flag; +} + +bool Heap::ConcurrentMarkingEnable() const +{ + return concurrentMarkingEnable_; +} + +void Heap::PostParallelGCTask(ParallelGCTaskPhase gcTask) +{ + IncreaseTaskCount(); + Platform::GetCurrentPlatform()->PostTask(std::make_unique(this, gcTask)); +} + +void Heap::IncreaseTaskCount() +{ + os::memory::LockHolder holder(waitTaskFinishedMutex_); + runningTastCount_++; +} + +bool Heap::CheckCanDistributeTask() +{ + return runningTastCount_ < Platform::GetCurrentPlatform()->GetTotalThreadNum() - 1; +} + +void Heap::ReduceTaskCount() +{ + os::memory::LockHolder holder(waitTaskFinishedMutex_); + runningTastCount_--; + if (runningTastCount_ == 0) { + waitTaskFinishedCV_.SignalAll(); + } +} + +bool Heap::ParallelGCTask::Run(uint32_t threadIndex) +{ + switch (taskPhase_) { + case ParallelGCTaskPhase::SEMI_HANDLE_THREAD_ROOTS_TASK: + heap_->GetSemiSpaceCollector()->ParallelHandleThreadRoots(threadIndex); + break; + case ParallelGCTaskPhase::SEMI_HANDLE_SNAPSHOT_TASK: + heap_->GetSemiSpaceCollector()->ParallelHandleSnapShot(threadIndex); + break; + case ParallelGCTaskPhase::SEMI_HANDLE_GLOBAL_POOL_TASK: + heap_->GetSemiSpaceCollector()->ParallelHandleGlobalPool(threadIndex); + break; + case ParallelGCTaskPhase::SEMI_HANDLE_RECLIAM_REGION_TASK: + const_cast(heap_->GetFromSpace())->ReclaimRegions(); + break; + case ParallelGCTaskPhase::OLD_HANDLE_GLOBAL_POOL_TASK: + heap_->GetConcurrentMarker()->ParallelMarkStack(heap_->GetOldSpaceCollector()->GetWorklist(), threadIndex); + break; + case ParallelGCTaskPhase::COMPRESS_HANDLE_GLOBAL_POOL_TASK: + heap_->GetCompressCollector()->ProcessMarkStack(threadIndex); + break; + case ParallelGCTaskPhase::COMPRESS_HANDLE_RECLIAM_REGION_TASK: + const_cast(heap_->GetCompressSpace())->ReclaimRegions(); + const_cast(heap_->GetFromSpace())->ReclaimRegions(); + break; + case ParallelGCTaskPhase::CONCURRENT_HANDLE_GLOBAL_POOL_TASK: + heap_->GetConcurrentMarker()->ParallelMarkStack(heap_->GetConcurrentMarker()->GetWorklist(), threadIndex); + break; + default: + break; + } + heap_->ReduceTaskCount(); + return true; +} } // namespace panda::ecmascript diff --git a/ecmascript/mem/heap.h b/ecmascript/mem/heap.h index 55ce680c727044f1837cd06f9c3069a9571491b8..afe30ae1f6cf50beb04270daeb2fc4336cea390b 100644 --- a/ecmascript/mem/heap.h +++ b/ecmascript/mem/heap.h @@ -16,9 +16,9 @@ #ifndef ECMASCRIPT_MEM_HEAP_H #define ECMASCRIPT_MEM_HEAP_H -#include "ecmascript/thread/thread_pool.h" #include "ecmascript/mem/mark_stack.h" #include "ecmascript/mem/space.h" +#include "ecmascript/platform/platform.h" namespace panda::ecmascript { class EcmaVM; @@ -32,6 +32,19 @@ class RegionFactory; class HeapTracker; class MemController; class ConcurrentSweeper; +class ConcurrentMarker; + +enum ParallelGCTaskPhase { + SEMI_HANDLE_THREAD_ROOTS_TASK, + SEMI_HANDLE_SNAPSHOT_TASK, + SEMI_HANDLE_GLOBAL_POOL_TASK, + SEMI_HANDLE_RECLIAM_REGION_TASK, + OLD_HANDLE_GLOBAL_POOL_TASK, + COMPRESS_HANDLE_GLOBAL_POOL_TASK, + COMPRESS_HANDLE_RECLIAM_REGION_TASK, + CONCURRENT_HANDLE_GLOBAL_POOL_TASK, + TASK_LAST // Count of different Task phase +}; class Heap { public: @@ -114,14 +127,14 @@ public: return sweeper_; } - EcmaVM *GetEcmaVM() const + ConcurrentMarker *GetConcurrentMarker() const { - return ecmaVm_; + return concurrentMarker_; } - ThreadPool *GetThreadPool() const + EcmaVM *GetEcmaVM() const { - return pool_; + return ecmaVm_; } void FlipNewSpace(); @@ -176,6 +189,10 @@ public: inline void OnAllocateEvent(uintptr_t address); inline void OnMoveEvent(uintptr_t address, uintptr_t forwardAddress); + void TriggerConcurrentMarking(); + + void TryTriggerConcurrentMarking(); + bool CheckAndTriggerOldGC(); bool CheckAndTriggerCompressGC(); @@ -257,7 +274,41 @@ public: inline void ClearSlotsRange(Region *current, uintptr_t freeStart, uintptr_t freeEnd); + void WaitRunningTaskFinished(); + + bool CheckCanDistributeTask(); + + void PostParallelGCTask(ParallelGCTaskPhase gcTask); + + bool IsEnableParallelGC() const + { + return paralledGc_; + } + + void WaitConcurrentMarkingFinished(); + + void SetConcurrentMarkingEnable(bool flag); + + bool ConcurrentMarkingEnable() const; private: + void IncreaseTaskCount(); + + void ReduceTaskCount(); + + class ParallelGCTask : public Task { + public: + ParallelGCTask(Heap *heap, ParallelGCTaskPhase taskPhase) : heap_(heap), taskPhase_(taskPhase) {}; + ~ParallelGCTask() override = default; + bool Run(uint32_t threadIndex) override; + + NO_COPY_SEMANTIC(ParallelGCTask); + NO_MOVE_SEMANTIC(ParallelGCTask); + + private: + Heap *heap_ {nullptr}; + ParallelGCTaskPhase taskPhase_; + }; + EcmaVM *ecmaVm_ {nullptr}; SemiSpace *fromSpace_ {nullptr}; SemiSpace *toSpace_ {nullptr}; @@ -275,9 +326,15 @@ private: RegionFactory *regionFactory_ {nullptr}; HeapTracker *tracker_ {nullptr}; MemController *memController_ {nullptr}; - ThreadPool *pool_ {nullptr}; size_t oldSpaceAllocLimit_ {OLD_SPACE_LIMIT_BEGIN}; + ConcurrentMarker *concurrentMarker_; + uint32_t runningTastCount_ {0}; + os::memory::Mutex waitTaskFinishedMutex_; + os::memory::ConditionVariable waitTaskFinishedCV_; + bool paralledGc_ {false}; + + bool concurrentMarkingEnable_ {false}; inline void SetMaximumCapacity(SemiSpace *space, size_t maximumCapacity); }; } // namespace panda::ecmascript diff --git a/ecmascript/mem/old_space_collector.cpp b/ecmascript/mem/old_space_collector.cpp index 343f7c7e037395b1334360e5f3ab4c75505c1821..ed91b3154f9d78d248958b1c354b6dfb51e090f9 100644 --- a/ecmascript/mem/old_space_collector.cpp +++ b/ecmascript/mem/old_space_collector.cpp @@ -17,6 +17,7 @@ #include "ecmascript/ecma_vm.h" #include "ecmascript/mem/clock_scope.h" +#include "ecmascript/mem/concurrent_marker.h" #include "ecmascript/mem/ecma_heap_manager.h" #include "ecmascript/mem/heap-inl.h" #include "ecmascript/mem/heap_roots-inl.h" @@ -30,26 +31,38 @@ namespace panda::ecmascript { OldSpaceCollector::OldSpaceCollector(Heap *heap, bool parallelGc) : heap_(heap), rootManager_(heap->GetEcmaVM()), paralledGC_(parallelGc) { - workList_ = new OldGCWorker(heap_, heap_->GetThreadPool()->GetThreadNum()); + workList_ = new WorkerHelper(heap_, Platform::GetCurrentPlatform()->GetTotalThreadNum() + 1, + ParallelGCTaskPhase::OLD_HANDLE_GLOBAL_POOL_TASK); } void OldSpaceCollector::RunPhases() { - [[maybe_unused]] ecmascript::JSThread *thread = heap_->GetEcmaVM()->GetJSThread(); + ecmascript::JSThread *thread = heap_->GetEcmaVM()->GetJSThread(); INTERPRETER_TRACE(thread, OldSpaceCollector_RunPhases); trace::ScopedTrace scoped_trace("OldSpaceCollector::RunPhases"); [[maybe_unused]] ClockScope clock("OldSpaceCollector::RunPhases"); - InitializePhase(); - MarkingPhase(); - SweepPhases(); - FinishPhase(); + if (heap_->ConcurrentMarkingEnable() && !thread->IsConcurrentMarkingInitialized()) { + if (thread->IsConcurrentMarking()) { + heap_->WaitConcurrentMarkingFinished(); + } + auto worklist = heap_->GetConcurrentMarker()->GetWorklist(); + heap_->GetConcurrentMarker()->ReMarking(worklist); + heap_->GetConcurrentMarker()->SweepPhases(worklist); + heap_->GetConcurrentMarker()->FinishPhase(worklist); + heap_->GetConcurrentMarker()->Reset(); + } else { + InitializePhase(); + MarkingPhase(); + heap_->GetConcurrentMarker()->SweepPhases(workList_); + heap_->GetConcurrentMarker()->FinishPhase(workList_); + } heap_->GetEcmaVM()->GetEcmaGCStats()->StatisticOldCollector(clock.GetPauseTime(), freeSize_, oldSpaceCommitSize_, nonMoveSpaceCommitSize_); } void OldSpaceCollector::InitializePhase() { - heap_->GetThreadPool()->WaitTaskFinish(); + heap_->WaitRunningTaskFinished(); heap_->GetSweeper()->EnsureAllTaskFinish(); heap_->EnumerateRegions([](Region *current) { // ensure mark bitmap @@ -60,111 +73,18 @@ void OldSpaceCollector::InitializePhase() bitmap->ClearAllBits(); } }); - workList_->Initialize(); + workList_->Initialize(TriggerGCType::OLD_GC); freeSize_ = 0; hugeSpaceFreeSize_ = 0; oldSpaceCommitSize_ = heap_->GetOldSpace()->GetCommittedSize(); nonMoveSpaceCommitSize_ = heap_->GetNonMovableSpace()->GetCommittedSize(); } -void OldSpaceCollector::FinishPhase() -{ - size_t aliveSize = 0; - workList_->Finish(aliveSize); -} - void OldSpaceCollector::MarkingPhase() { trace::ScopedTrace scoped_trace("OldSpaceCollector::MarkingPhase"); - RootVisitor gcMarkYoung = [this]([[maybe_unused]] Root type, ObjectSlot slot) { - JSTaggedValue value(slot.GetTaggedType()); - if (value.IsHeapObject()) { - MarkObject(0, value.GetTaggedObject()); - } - }; - RootRangeVisitor gcMarkRangeYoung = [this]([[maybe_unused]] Root type, ObjectSlot start, ObjectSlot end) { - for (ObjectSlot slot = start; slot < end; slot++) { - JSTaggedValue value(slot.GetTaggedType()); - if (value.IsHeapObject()) { - MarkObject(0, value.GetTaggedObject()); - } - } - }; - rootManager_.VisitVMRoots(gcMarkYoung, gcMarkRangeYoung); - - ProcessMarkStack(0); - if (paralledGC_) { - heap_->GetThreadPool()->WaitTaskFinish(); - } -} - -void OldSpaceCollector::ProcessMarkStack(uint64_t threadId) -{ - while (true) { - TaggedObject *obj = nullptr; - if (!workList_->Pop(threadId, &obj)) { - break; - } - auto jsHclass = obj->GetClass(); - // mark dynClass - MarkObject(threadId, jsHclass); - - rootManager_.MarkObjectBody( - obj, jsHclass, [this, &threadId]([[maybe_unused]] TaggedObject *root, ObjectSlot start, ObjectSlot end) { - for (ObjectSlot slot = start; slot < end; slot++) { - JSTaggedValue value(slot.GetTaggedType()); - if (value.IsWeak()) { - RecordWeakReference(threadId, reinterpret_cast(slot.SlotAddress())); - continue; - } - if (value.IsHeapObject()) { - MarkObject(threadId, value.GetTaggedObject()); - } - } - }); - } -} - -void OldSpaceCollector::SweepPhases() -{ - trace::ScopedTrace scoped_trace("OldSpaceCollector::SweepPhases"); - // process weak reference - for (uint32_t i = 0; i < heap_->GetThreadPool()->GetThreadNum(); i++) { - ProcessQueue *queue = workList_->GetWeakReferenceQueue(i); - while (true) { - auto obj = queue->PopBack(); - if (UNLIKELY(obj == nullptr)) { - break; - } - ObjectSlot slot(ToUintPtr(obj)); - JSTaggedValue value(slot.GetTaggedType()); - auto header = value.GetTaggedWeakRef(); - - Region *objectRegion = Region::ObjectAddressToRange(header); - auto markBitmap = objectRegion->GetMarkBitmap(); - if (!markBitmap->Test(header)) { - slot.Update(static_cast(JSTaggedValue::Undefined().GetRawData())); - } - } - } - - auto stringTable = heap_->GetEcmaVM()->GetEcmaStringTable(); - WeakRootVisitor gcUpdateWeak = [](TaggedObject *header) { - Region *objectRegion = Region::ObjectAddressToRange(reinterpret_cast(header)); - if (objectRegion->InYoungGeneration()) { - return header; - } - - auto markBitmap = objectRegion->GetMarkBitmap(); - if (markBitmap->Test(header)) { - return header; - } - return reinterpret_cast(ToUintPtr(nullptr)); - }; - stringTable->SweepWeakReference(gcUpdateWeak); - heap_->GetEcmaVM()->GetJSThread()->IterateWeakEcmaGlobalStorage(gcUpdateWeak); - heap_->GetEcmaVM()->ProcessReferences(gcUpdateWeak); - - heap_->GetSweeper()->SweepPhases(); + heap_->GetConcurrentMarker()->MarkRoots(workList_); + heap_->GetConcurrentMarker()->ParallelMarkStack(workList_, 0); + heap_->WaitRunningTaskFinished(); } } // namespace panda::ecmascript diff --git a/ecmascript/mem/old_space_collector.h b/ecmascript/mem/old_space_collector.h index 5da8b520717d0ee6818eb10b411025da10b5c8f2..5916eb895177641fcb2abec451fdd7b238b2bde9 100644 --- a/ecmascript/mem/old_space_collector.h +++ b/ecmascript/mem/old_space_collector.h @@ -21,7 +21,7 @@ #include "ecmascript/mem/allocator.h" #include "ecmascript/mem/mark_stack-inl.h" #include "ecmascript/mem/mark_word.h" -#include "ecmascript/mem/semi_space_worker.h" +#include "ecmascript/mem/parallel_work_helper.h" #include "ecmascript/mem/slots.h" #include "ecmascript/mem/heap_roots.h" #include "ecmascript/mem/remembered_set.h" @@ -45,6 +45,11 @@ public: return heap_; } + WorkerHelper *GetWorklist() const + { + return workList_; + } + private: void InitializePhase(); void MarkingPhase(); @@ -59,14 +64,15 @@ private: Heap *heap_; HeapRootManager rootManager_; - bool paralledGC_{false}; - OldGCWorker *workList_{nullptr}; - size_t freeSize_{0}; + bool paralledGC_ {false}; + WorkerHelper *workList_ {nullptr}; + size_t freeSize_ {0}; size_t hugeSpaceFreeSize_ = 0; size_t oldSpaceCommitSize_ = 0; size_t nonMoveSpaceCommitSize_ = 0; - friend class OldGCWorker; + friend class WorkerHelper; + friend class Heap; }; } // namespace ecmascript } // namespace panda diff --git a/ecmascript/mem/semi_space_worker.cpp b/ecmascript/mem/parallel_work_helper.cpp similarity index 63% rename from ecmascript/mem/semi_space_worker.cpp rename to ecmascript/mem/parallel_work_helper.cpp index 0df831f442d5bac5585cb79ecb728c8a322e6c55..a4b216342cd03f5ed4e2ea58923d5811b357f2da 100644 --- a/ecmascript/mem/semi_space_worker.cpp +++ b/ecmascript/mem/parallel_work_helper.cpp @@ -13,7 +13,7 @@ * limitations under the License. */ -#include "ecmascript/mem/semi_space_worker.h" +#include "ecmascript/mem/parallel_work_helper.h" #include "ecmascript/mem/area.h" #include "ecmascript/mem/compress_collector.h" @@ -24,36 +24,26 @@ #include "ecmascript/mem/tlab_allocator-inl.h" namespace panda::ecmascript { -void Worker::Finish(size_t &aliveSize) +WorkerHelper::WorkerHelper(Heap *heap, uint32_t threadNum, ParallelGCTaskPhase parallelTask) + : heap_(heap), threadNum_(threadNum), markSpace_(0), spaceTop_(0), markSpaceEnd_(0), parallelTask_(parallelTask) { for (uint32_t i = 0; i < threadNum_; i++) { - WorkNodeHolder &holder = workList_[i]; - holder.weakQueue_->FinishMarking(continuousQueue_[i]); - delete holder.weakQueue_; - holder.weakQueue_ = nullptr; - delete holder.allocator_; - holder.allocator_ = nullptr; - holder.waitUpdate_.clear(); - aliveSize += holder.aliveSize_; - } - - while (!unuseSpace_.empty()) { - const_cast(heap_->GetRegionFactory())->FreeBuffer(reinterpret_cast( - unuseSpace_.back())); - unuseSpace_.pop_back(); + continuousQueue_[i] = new ProcessQueue(heap); } + markSpace_ = ToUintPtr(const_cast(heap_->GetRegionFactory())->AllocateBuffer(SPACE_SIZE)); } -void Worker::Finish(size_t &aliveSize, size_t &promoteSize) +WorkerHelper::~WorkerHelper() { - Finish(aliveSize); for (uint32_t i = 0; i < threadNum_; i++) { - WorkNodeHolder &holder = workList_[i]; - promoteSize += holder.aliveSize_; + continuousQueue_[i]->Destroy(); + delete continuousQueue_[i]; + continuousQueue_[i] = nullptr; } + const_cast(heap_->GetRegionFactory())->FreeBuffer(reinterpret_cast(markSpace_)); } -bool Worker::Push(uint32_t threadId, TaggedObject *object) +bool WorkerHelper::Push(uint32_t threadId, TaggedObject *object) { WorkNode *&pushNode = workList_[threadId].pushNode_; if (!pushNode->Push(ToUintPtr(object))) { @@ -63,7 +53,19 @@ bool Worker::Push(uint32_t threadId, TaggedObject *object) return true; } -bool Worker::Pop(uint32_t threadId, TaggedObject **object) +void WorkerHelper::PushWorkNodeToGlobal(uint32_t threadId) +{ + WorkNode *&pushNode = workList_[threadId].pushNode_; + if (!pushNode->IsEmpty()) { + globalWork_.Push(pushNode); + pushNode = AllocalWorkNode(); + if (heap_->IsEnableParallelGC() && heap_->CheckCanDistributeTask()) { + heap_->PostParallelGCTask(parallelTask_); + } + } +} + +bool WorkerHelper::Pop(uint32_t threadId, TaggedObject **object) { WorkNode *&popNode = workList_[threadId].popNode_; WorkNode *&pushNode = workList_[threadId].pushNode_; @@ -80,126 +82,41 @@ bool Worker::Pop(uint32_t threadId, TaggedObject **object) return true; } -bool Worker::PopWorkNodeFromGlobal(uint32_t threadId) +bool WorkerHelper::PopWorkNodeFromGlobal(uint32_t threadId) { return globalWork_.Pop(&workList_[threadId].popNode_); } -WorkNode *Worker::AllocalWorkNode() -{ - size_t totalSize = sizeof(WorkNode) + sizeof(Stack) + STACK_AREA_SIZE; - // CAS - volatile auto atomicField = reinterpret_cast *>(&spaceTop_); - bool result = false; - uintptr_t begin = 0; - do { - begin = atomicField->load(std::memory_order_acquire); - if (begin + totalSize >= markSpaceEnd_) { - os::memory::LockHolder lock(mtx_); - begin = atomicField->load(std::memory_order_acquire); - if (begin + totalSize >= markSpaceEnd_) { - unuseSpace_.emplace_back(markSpace_); - markSpace_ = - ToUintPtr(const_cast(heap_->GetRegionFactory())->AllocateBuffer(SPACE_SIZE)); - spaceTop_ = markSpace_; - markSpaceEnd_ = markSpace_ + SPACE_SIZE; - begin = spaceTop_; - } - } - result = std::atomic_compare_exchange_strong_explicit(atomicField, &begin, begin + totalSize, - std::memory_order_release, std::memory_order_relaxed); - } while (!result); - Stack *stack = reinterpret_cast(begin + sizeof(WorkNode)); - stack->ResetBegin(begin + sizeof(WorkNode) + sizeof(Stack), begin + totalSize); - WorkNode *work = reinterpret_cast(begin); - return new (work) WorkNode(stack); -} - -Worker::Worker(Heap *heap, uint32_t threadNum) - : heap_(heap), threadNum_(threadNum), markSpace_(0), spaceTop_(0), markSpaceEnd_(0) -{ - for (uint32_t i = 0; i < threadNum_; i++) { - continuousQueue_[i] = new ProcessQueue(heap); - } - markSpace_ = ToUintPtr(const_cast(heap_->GetRegionFactory())->AllocateBuffer(SPACE_SIZE)); -} - -Worker::~Worker() +void WorkerHelper::Finish(size_t &aliveSize) { - for (uint32_t i = 0; i < threadNum_; i++) { - continuousQueue_[i]->Destroy(); - delete continuousQueue_[i]; - continuousQueue_[i] = nullptr; - } - const_cast(heap_->GetRegionFactory())->FreeBuffer(reinterpret_cast(markSpace_)); -} - -SemiSpaceWorker::~SemiSpaceWorker() = default; - -CompressGCWorker::~CompressGCWorker() = default; - -void SemiSpaceWorker::PushWorkNodeToGlobal(uint32_t threadId) -{ - WorkNode *&pushNode = workList_[threadId].pushNode_; - if (!pushNode->IsEmpty()) { - globalWork_.Push(pushNode); - pushNode = AllocalWorkNode(); - - auto pool = heap_->GetThreadPool(); - if (pool->GetTaskCount() < pool->GetThreadNum() - 1) { - pool->Submit(std::bind(&SemiSpaceCollector::ParallelHandleGlobalPool, heap_->GetSemiSpaceCollector(), - std::placeholders::_1)); - } - } -} - -void SemiSpaceWorker::Initialize() -{ - spaceTop_ = markSpace_; - markSpaceEnd_ = markSpace_ + SPACE_SIZE; for (uint32_t i = 0; i < threadNum_; i++) { WorkNodeHolder &holder = workList_[i]; - holder.pushNode_ = AllocalWorkNode(); - holder.popNode_ = AllocalWorkNode(); - holder.weakQueue_ = new ProcessQueue(); - holder.weakQueue_->BeginMarking(heap_, continuousQueue_[i]); - holder.allocator_ = new TlabAllocator(heap_, TriggerGCType::SEMI_GC); - holder.aliveSize_ = 0; - holder.promoteSize_ = 0; + holder.weakQueue_->FinishMarking(continuousQueue_[i]); + delete holder.weakQueue_; + holder.weakQueue_ = nullptr; + delete holder.allocator_; + holder.allocator_ = nullptr; + holder.waitUpdate_.clear(); + aliveSize += holder.aliveSize_; } -} - -void CompressGCWorker::PushWorkNodeToGlobal(uint32_t threadId) -{ - WorkNode *&pushNode = workList_[threadId].pushNode_; - if (!pushNode->IsEmpty()) { - globalWork_.Push(pushNode); - pushNode = AllocalWorkNode(); - auto pool = heap_->GetThreadPool(); - if (pool->GetTaskCount() < pool->GetThreadNum() - 1) { - pool->Submit( - std::bind(&CompressCollector::ProcessMarkStack, heap_->GetCompressCollector(), std::placeholders::_1)); - } + while (!unuseSpace_.empty()) { + const_cast(heap_->GetRegionFactory())->FreeBuffer(reinterpret_cast( + unuseSpace_.back())); + unuseSpace_.pop_back(); } } -void CompressGCWorker::Initialize() +void WorkerHelper::Finish(size_t &aliveSize, size_t &promoteSize) { - spaceTop_ = markSpace_; - markSpaceEnd_ = markSpace_ + SPACE_SIZE; + Finish(aliveSize); for (uint32_t i = 0; i < threadNum_; i++) { WorkNodeHolder &holder = workList_[i]; - holder.pushNode_ = AllocalWorkNode(); - holder.popNode_ = AllocalWorkNode(); - holder.weakQueue_ = new ProcessQueue(); - holder.weakQueue_->BeginMarking(heap_, continuousQueue_[i]); - holder.allocator_ = new TlabAllocator(heap_, TriggerGCType::COMPRESS_FULL_GC); - holder.aliveSize_ = 0; + promoteSize += holder.aliveSize_; } } -void OldGCWorker::Initialize() +void WorkerHelper::Initialize(TriggerGCType gcType) { spaceTop_ = markSpace_; markSpaceEnd_ = markSpace_ + SPACE_SIZE; @@ -209,21 +126,43 @@ void OldGCWorker::Initialize() holder.popNode_ = AllocalWorkNode(); holder.weakQueue_ = new ProcessQueue(); holder.weakQueue_->BeginMarking(heap_, continuousQueue_[i]); + holder.aliveSize_ = 0; + holder.promoteSize_ = 0; + if (gcType == TriggerGCType::SEMI_GC) { + holder.allocator_ = new TlabAllocator(heap_, TriggerGCType::SEMI_GC); + } else if (gcType == TriggerGCType::COMPRESS_FULL_GC) { + holder.allocator_ = new TlabAllocator(heap_, TriggerGCType::COMPRESS_FULL_GC); + } } } -void OldGCWorker::PushWorkNodeToGlobal(uint32_t threadId) +WorkNode *WorkerHelper::AllocalWorkNode() { - WorkNode *&pushNode = workList_[threadId].pushNode_; - if (!pushNode->IsEmpty()) { - globalWork_.Push(pushNode); - pushNode = AllocalWorkNode(); - - auto pool = heap_->GetThreadPool(); - if (pool->GetTaskCount() < pool->GetThreadNum() - 1) { - pool->Submit(std::bind(&OldSpaceCollector::ProcessMarkStack, heap_->GetOldSpaceCollector(), - std::placeholders::_1)); + size_t totalSize = sizeof(WorkNode) + sizeof(Stack) + STACK_AREA_SIZE; + // CAS + volatile auto atomicField = reinterpret_cast *>(&spaceTop_); + bool result = false; + uintptr_t begin = 0; + do { + begin = atomicField->load(std::memory_order_acquire); + if (begin + totalSize >= markSpaceEnd_) { + os::memory::LockHolder lock(mtx_); + begin = atomicField->load(std::memory_order_acquire); + if (begin + totalSize >= markSpaceEnd_) { + unuseSpace_.emplace_back(markSpace_); + markSpace_ = + ToUintPtr(const_cast(heap_->GetRegionFactory())->AllocateBuffer(SPACE_SIZE)); + spaceTop_ = markSpace_; + markSpaceEnd_ = markSpace_ + SPACE_SIZE; + begin = spaceTop_; + } } - } + result = std::atomic_compare_exchange_strong_explicit(atomicField, &begin, begin + totalSize, + std::memory_order_release, std::memory_order_relaxed); + } while (!result); + Stack *stack = reinterpret_cast(begin + sizeof(WorkNode)); + stack->ResetBegin(begin + sizeof(WorkNode) + sizeof(Stack), begin + totalSize); + WorkNode *work = reinterpret_cast(begin); + return new (work) WorkNode(stack); } } // namespace panda::ecmascript diff --git a/ecmascript/mem/semi_space_worker.h b/ecmascript/mem/parallel_work_helper.h similarity index 55% rename from ecmascript/mem/semi_space_worker.h rename to ecmascript/mem/parallel_work_helper.h index 0a88f31f0e07a0bfb273c4ed5da607778a92fa37..90244e2274c3127783ed03337909d84baa697337 100644 --- a/ecmascript/mem/semi_space_worker.h +++ b/ecmascript/mem/parallel_work_helper.h @@ -13,19 +13,17 @@ * limitations under the License. */ -#ifndef ECMASCRIPT_MEM_SEMI_SPACE_WORKER_H -#define ECMASCRIPT_MEM_SEMI_SPACE_WORKER_H +#ifndef ECMASCRIPT_MEM_PARALLEL_WORK_HELPER_H +#define ECMASCRIPT_MEM_PARALLEL_WORK_HELPER_H -#include "ecmascript/mem/mark_stack.h" +#include "ecmascript/mem/mark_stack-inl.h" #include "ecmascript/mem/slots.h" namespace panda::ecmascript { using SlotNeedUpdate = std::pair; static constexpr uint32_t MARKSTACK_MAX_SIZE = 100; -static constexpr uint32_t THREAD_NUM_FOR_YOUNG_GC = 6; -static constexpr uint32_t STACK_AREA_SIZE = sizeof(uintptr_t *) * MARKSTACK_MAX_SIZE; - +static constexpr uint32_t STACK_AREA_SIZE = sizeof(uintptr_t) * MARKSTACK_MAX_SIZE; static constexpr uint32_t SPACE_SIZE = 8 * 1024; class Heap; @@ -118,24 +116,22 @@ private: }; struct WorkNodeHolder { - WorkNode *pushNode_{nullptr}; - WorkNode *popNode_{nullptr}; - ProcessQueue *weakQueue_{nullptr}; + WorkNode *pushNode_ {nullptr}; + WorkNode *popNode_ {nullptr}; + ProcessQueue *weakQueue_ {nullptr}; std::vector waitUpdate_; - TlabAllocator *allocator_{nullptr}; + TlabAllocator *allocator_ {nullptr}; size_t aliveSize_ = 0; size_t promoteSize_ = 0; }; -class Worker { +class WorkerHelper final { public: - Worker() = delete; - explicit Worker(Heap *heap, uint32_t threadNum); - - virtual ~Worker() = 0; - virtual void PushWorkNodeToGlobal(uint32_t threadId) = 0; - virtual void Initialize() = 0; + WorkerHelper() = delete; + explicit WorkerHelper(Heap *heap, uint32_t threadNum, ParallelGCTaskPhase parallelTask); + ~WorkerHelper(); + void Initialize(TriggerGCType gcType); void Finish(size_t &aliveSize); void Finish(size_t &aliveSize, size_t &promoteSize); @@ -143,64 +139,33 @@ public: bool Pop(uint32_t threadId, TaggedObject **object); bool PopWorkNodeFromGlobal(uint32_t threadId); + void PushWorkNodeToGlobal(uint32_t threadId); - void PushWeakReference(uint32_t threadId, JSTaggedType *weak) + inline void PushWeakReference(uint32_t threadId, JSTaggedType *weak) { workList_[threadId].weakQueue_->PushBack(weak); } - void AddAliveSize(uint32_t threadId, size_t size) + inline void AddAliveSize(uint32_t threadId, size_t size) { workList_[threadId].aliveSize_ += size; } - void AddPromoteSize(uint32_t threadId, size_t size) + inline void AddPromoteSize(uint32_t threadId, size_t size) { workList_[threadId].promoteSize_ += size; } - ProcessQueue *GetWeakReferenceQueue(uint32_t threadId) const + inline ProcessQueue *GetWeakReferenceQueue(uint32_t threadId) const { return workList_[threadId].weakQueue_; } - TlabAllocator *GetTlabAllocator(uint32_t threadId) const + inline TlabAllocator *GetTlabAllocator(uint32_t threadId) const { return workList_[threadId].allocator_; } - NO_COPY_SEMANTIC(Worker); - NO_MOVE_SEMANTIC(Worker); - -protected: - WorkNode *AllocalWorkNode(); - - Heap *heap_; // NOLINT(misc-non-private-member-variables-in-classes) - uint32_t threadNum_; // NOLINT(misc-non-private-member-variables-in-classes) - // NOLINTNEXTLINE(misc-non-private-member-variables-in-classes, modernize-avoid-c-arrays) - WorkNodeHolder workList_[THREAD_NUM_FOR_YOUNG_GC]; - // NOLINTNEXTLINE(misc-non-private-member-variables-in-classes, modernize-avoid-c-arrays) - ContinuousStack *continuousQueue_[THREAD_NUM_FOR_YOUNG_GC]; - GlobalWorkList globalWork_; // NOLINT(misc-non-private-member-variables-in-classes) - - uintptr_t markSpace_; // NOLINT(misc-non-private-member-variables-in-classes) - uintptr_t spaceTop_; // NOLINT(misc-non-private-member-variables-in-classes) - uintptr_t markSpaceEnd_; // NOLINT(misc-non-private-member-variables-in-classes) - -private: - std::vector unuseSpace_; - os::memory::Mutex mtx_; -}; - -class SemiSpaceWorker : public Worker { -public: - SemiSpaceWorker() = delete; - explicit SemiSpaceWorker(Heap *heap, uint32_t threadNum) : Worker(heap, threadNum) {} - - ~SemiSpaceWorker() override; - void PushWorkNodeToGlobal(uint32_t threadId) override; - void Initialize() override; - inline void PushWaitUpdateSlot(uint32_t threadId, SlotNeedUpdate slot) { workList_[threadId].waitUpdate_.emplace_back(slot); @@ -217,35 +182,23 @@ public: return true; } - NO_COPY_SEMANTIC(SemiSpaceWorker); - NO_MOVE_SEMANTIC(SemiSpaceWorker); -}; - -class CompressGCWorker : public Worker { -public: - CompressGCWorker() = delete; - explicit CompressGCWorker(Heap *heap, uint32_t threadNum) : Worker(heap, threadNum) {} - - ~CompressGCWorker() override; - void PushWorkNodeToGlobal(uint32_t threadId) override; - void Initialize() override; - - NO_COPY_SEMANTIC(CompressGCWorker); - NO_MOVE_SEMANTIC(CompressGCWorker); -}; - -class OldGCWorker : public Worker { -public: - OldGCWorker() = delete; - OldGCWorker(Heap *heap, uint32_t threadNum) : Worker(heap, threadNum) {} - - ~OldGCWorker() override = default; +private: + NO_COPY_SEMANTIC(WorkerHelper); + NO_MOVE_SEMANTIC(WorkerHelper); - void PushWorkNodeToGlobal(uint32_t threadId) override; - void Initialize() override; + WorkNode *AllocalWorkNode(); - NO_COPY_SEMANTIC(OldGCWorker); - NO_MOVE_SEMANTIC(OldGCWorker); + Heap *heap_; + uint32_t threadNum_; + WorkNodeHolder workList_[Platform::MAX_PLATFORM_THREAD_NUM + 1]; + ContinuousStack *continuousQueue_[Platform::MAX_PLATFORM_THREAD_NUM + 1]; + GlobalWorkList globalWork_; + uintptr_t markSpace_; + uintptr_t spaceTop_; + uintptr_t markSpaceEnd_; + std::vector unuseSpace_; + os::memory::Mutex mtx_; + ParallelGCTaskPhase parallelTask_; }; } // namespace panda::ecmascript -#endif // ECMASCRIPT_MEM_SEMI_SPACE_WORKER_H +#endif // ECMASCRIPT_MEM_PARALLEL_WORK_HELPER_H diff --git a/ecmascript/mem/region.h b/ecmascript/mem/region.h index 207abda5fdf0cab3ae03e6c2502b4cf7c979f39d..7eab5776933fac5fd3eb1f32173070b9da172504 100644 --- a/ecmascript/mem/region.h +++ b/ecmascript/mem/region.h @@ -139,6 +139,11 @@ public: return markBitmap_; } + RememberedSet *GetReferenceSet() + { + return referenceSet_; + } + RememberedSet *GetCrossRegionRememberedSet() { return crossRegionSet_; @@ -180,10 +185,13 @@ public: } RangeBitmap *CreateMarkBitmap(); + inline RememberedSet *CreateReferenceSet(); + inline RememberedSet *GetOrCreateReferenceSet(); inline RememberedSet *CreateRememberedSet(); inline RememberedSet *GetOrCreateCrossRegionRememberedSet(); inline RememberedSet *GetOrCreateOldToNewRememberedSet(); inline void InsertCrossRegionRememberedSet(uintptr_t addr); + inline void InsertReferenceSet(uintptr_t addr); inline void InsertOldToNewRememberedSet(uintptr_t addr); uintptr_t GetAllocateBase() const @@ -239,6 +247,15 @@ public: } } + bool IsConcurrentMarking() + { + return concurrentMarking_; + } + + void SetConcurrentMarking(bool isMarking) + { + concurrentMarking_ = isMarking; + } private: Space *space_; uintptr_t flags_; // Memory alignment, only low 32bits are used now @@ -246,11 +263,13 @@ private: uintptr_t begin_; uintptr_t end_; uintptr_t highWaterMark_; - Region *next_{nullptr}; - Region *prev_{nullptr}; - RangeBitmap *markBitmap_{nullptr}; - RememberedSet *crossRegionSet_{nullptr}; - RememberedSet *oldToNewSet_{nullptr}; + bool concurrentMarking_ {false}; + Region *next_ {nullptr}; + Region *prev_ {nullptr}; + RangeBitmap *markBitmap_ {nullptr}; + RememberedSet *referenceSet_ {nullptr}; + RememberedSet *crossRegionSet_ {nullptr}; + RememberedSet *oldToNewSet_ {nullptr}; Span kinds_; friend class SnapShot; }; diff --git a/ecmascript/mem/semi_space_collector-inl.h b/ecmascript/mem/semi_space_collector-inl.h index 806393c760fa513be0cbe975e20eb8e2dcdca34b..522a3c7c8c091ef896a92eb80248757cd174d5d2 100644 --- a/ecmascript/mem/semi_space_collector-inl.h +++ b/ecmascript/mem/semi_space_collector-inl.h @@ -17,12 +17,14 @@ #define ECMASCRIPT_MEM_SEMI_SAPACE_COLLECTOR_INL_H #include "ecmascript/mem/semi_space_collector.h" -#include "ecmascript/mem/mem.h" + +#include "ecmascript/js_hclass-inl.h" #include "ecmascript/mem/heap.h" -#include "ecmascript/mem/region.h" #include "ecmascript/mem/mark_word.h" -#include "ecmascript/js_hclass-inl.h" -#include "ecmascript/mem/semi_space_worker.h" +#include "ecmascript/mem/mem.h" +#include "ecmascript/mem/parallel_work_helper.h" +#include "ecmascript/mem/region.h" + namespace panda::ecmascript { void SemiSpaceCollector::UpdatePromotedSlot(TaggedObject *object, ObjectSlot slot) diff --git a/ecmascript/mem/semi_space_collector.cpp b/ecmascript/mem/semi_space_collector.cpp index 0cc31fd8f1a21f1d3e0b579c66ab187d32c8464a..61b4a66adf2e87933e0b6d05856c307cd9577177 100644 --- a/ecmascript/mem/semi_space_collector.cpp +++ b/ecmascript/mem/semi_space_collector.cpp @@ -31,7 +31,8 @@ namespace panda::ecmascript { SemiSpaceCollector::SemiSpaceCollector(Heap *heap, bool parallelGc) : heap_(heap), rootManager_(heap->GetEcmaVM()), paralledGC_(parallelGc), markObject_(this) { - workList_ = new SemiSpaceWorker(heap_, heap_->GetThreadPool()->GetThreadNum()); + workList_ = new WorkerHelper(heap_, Platform::GetCurrentPlatform()->GetTotalThreadNum() + 1, + ParallelGCTaskPhase::SEMI_HANDLE_GLOBAL_POOL_TASK); } SemiSpaceCollector::~SemiSpaceCollector() @@ -44,10 +45,14 @@ SemiSpaceCollector::~SemiSpaceCollector() void SemiSpaceCollector::RunPhases() { - [[maybe_unused]] ecmascript::JSThread *thread = heap_->GetEcmaVM()->GetJSThread(); + ecmascript::JSThread *thread = heap_->GetEcmaVM()->GetJSThread(); INTERPRETER_TRACE(thread, SemiSpaceCollector_RunPhases); trace::ScopedTrace scoped_trace("SemiSpaceCollector::RunPhases"); [[maybe_unused]] ClockScope clock("SemiSpaceCollector::RunPhases"); + if (heap_->ConcurrentMarkingEnable() && thread->IsConcurrentMarking()) { + heap_->WaitConcurrentMarkingFinished(); + heap_->CollectGarbage(TriggerGCType::OLD_GC); + } InitializePhase(); ParallelMarkingPhase(); SweepPhases(); @@ -58,7 +63,7 @@ void SemiSpaceCollector::RunPhases() void SemiSpaceCollector::InitializePhase() { - heap_->GetThreadPool()->WaitTaskFinish(); + heap_->WaitRunningTaskFinished(); heap_->GetSweeper()->EnsureAllTaskFinish(); auto fromSpace = heap_->GetFromSpace(); if (fromSpace->GetCommittedSize() == 0) { @@ -69,7 +74,7 @@ void SemiSpaceCollector::InitializePhase() oldSpaceAllocator_.Swap(heapManager->GetOldSpaceAllocator()); ageMark_ = heap_->GetNewSpace()->GetAgeMark(); heap_->FlipNewSpace(); - workList_->Initialize(); + workList_->Initialize(TriggerGCType::SEMI_GC); promotedSize_ = 0; semiCopiedSize_ = 0; commitSize_ = heap_->GetFromSpace()->GetCommittedSize(); @@ -80,10 +85,7 @@ void SemiSpaceCollector::FinishPhase() // swap const_cast(heap_->GetNewSpace())->Swap(const_cast(heap_->GetFromSpace())); if (paralledGC_) { - heap_->GetThreadPool()->Submit([this]([[maybe_unused]] uint32_t threadId) -> bool { - const_cast(heap_->GetFromSpace())->ReclaimRegions(); - return true; - }); + heap_->PostParallelGCTask(ParallelGCTaskPhase::SEMI_HANDLE_RECLIAM_REGION_TASK); } else { const_cast(heap_->GetFromSpace())->ReclaimRegions(); } @@ -193,19 +195,18 @@ void SemiSpaceCollector::ParallelMarkingPhase() auto region = oldSpace->GetCurrentRegion(); if (paralledGC_) { - heap_->GetThreadPool()->Submit( - std::bind(&SemiSpaceCollector::ParallelHandleThreadRoots, this, std::placeholders::_1)); - heap_->GetThreadPool()->Submit( - std::bind(&SemiSpaceCollector::ParallelHandleSnapShot, this, std::placeholders::_1)); + heap_->PostParallelGCTask(ParallelGCTaskPhase::SEMI_HANDLE_THREAD_ROOTS_TASK); + heap_->PostParallelGCTask(ParallelGCTaskPhase::SEMI_HANDLE_SNAPSHOT_TASK); ParallelHandleOldToNew(0, region); - heap_->GetThreadPool()->WaitTaskFinish(); } else { ParallelHandleOldToNew(0, region); ParallelHandleSnapShot(0); ParallelHandleThreadRoots(0); } + heap_->WaitRunningTaskFinished(); - for (uint32_t i = 0; i < heap_->GetThreadPool()->GetThreadNum(); i++) { + auto totalThreadCount = Platform::GetCurrentPlatform()->GetTotalThreadNum() + 1; // gc thread and main thread + for (uint32_t i = 0; i < totalThreadCount; i++) { SlotNeedUpdate needUpdate(nullptr, ObjectSlot(0)); while (workList_->GetSlotNeedUpdate(i, &needUpdate)) { UpdatePromotedSlot(needUpdate.first, needUpdate.second); @@ -248,7 +249,8 @@ void SemiSpaceCollector::ProcessMarkStack(uint64_t threadId) void SemiSpaceCollector::SweepPhases() { trace::ScopedTrace scoped_trace("SemiSpaceCollector::SweepPhases"); - for (uint32_t i = 0; i < heap_->GetThreadPool()->GetThreadNum(); i++) { + auto totalThreadCount = Platform::GetCurrentPlatform()->GetTotalThreadNum() + 1; // gc thread and main thread + for (uint32_t i = 0; i < totalThreadCount; i++) { ProcessQueue *queue = workList_->GetWeakReferenceQueue(i); while (true) { auto obj = queue->PopBack(); diff --git a/ecmascript/mem/semi_space_collector.h b/ecmascript/mem/semi_space_collector.h index 3a76149e5c181dabf648422ac28ecca3f1f6721f..ca8fe63ef89d2482f1062290abbb1252ac5e7f7d 100644 --- a/ecmascript/mem/semi_space_collector.h +++ b/ecmascript/mem/semi_space_collector.h @@ -29,7 +29,6 @@ #include "ecmascript/mem/chunk_containers.h" #include "ecmascript/mem/tlab_allocator.h" -#include "ecmascript/thread/thread_pool.h" #include "ecmascript/mem/semi_space_marker.h" #include "os/mutex.h" @@ -38,7 +37,7 @@ namespace panda { namespace ecmascript { class Heap; class JSHClass; -class SemiSpaceWorker; +class WorkerHelper; class GarbageCollector { public: @@ -61,7 +60,7 @@ public: { return heap_; } - + private: bool ParallelHandleOldToNew(uint32_t threadId, Region *region); bool ParallelHandleThreadRoots(uint32_t threadId); @@ -82,17 +81,18 @@ private: Heap *heap_; HeapRootManager rootManager_; os::memory::Mutex allocatorLock_; - BumpPointerAllocator fromSpaceAllocator_{}; - FreeListAllocator oldSpaceAllocator_{}; - bool paralledGC_{false}; - SemiSpaceWorker *workList_{nullptr}; + BumpPointerAllocator fromSpaceAllocator_ {}; + FreeListAllocator oldSpaceAllocator_ {}; + bool paralledGC_ {false}; + WorkerHelper *workList_ {nullptr}; SemiSpaceMarker markObject_; - size_t promotedSize_{0}; - size_t semiCopiedSize_{0}; + size_t promotedSize_ {0}; + size_t semiCopiedSize_ {0}; size_t commitSize_ = 0; - uintptr_t ageMark_{0}; + uintptr_t ageMark_ {0}; + friend class TlabAllocator; - friend class SemiSpaceWorker; + friend class WorkerHelper; friend class SemiSpaceMarker; friend class Heap; }; diff --git a/ecmascript/mem/semi_space_marker.cpp b/ecmascript/mem/semi_space_marker.cpp index d4ca483813349e67596c0695b56f0d5b2324511c..7605e94e8ec6f02550f366bedc3a27286f311148 100644 --- a/ecmascript/mem/semi_space_marker.cpp +++ b/ecmascript/mem/semi_space_marker.cpp @@ -17,9 +17,9 @@ #include "ecmascript/free_object.h" #include "ecmascript/js_hclass-inl.h" +#include "ecmascript/mem/parallel_work_helper.h" #include "ecmascript/mem/region.h" #include "ecmascript/mem/semi_space_collector-inl.h" -#include "ecmascript/mem/semi_space_worker.h" #include "ecmascript/mem/tlab_allocator-inl.h" namespace panda::ecmascript { diff --git a/ecmascript/mem/space-inl.h b/ecmascript/mem/space-inl.h index a37e5c7d04ea9b93a8314bfe9250c41af73fda9d..8a355556ce23f64f42eaf9788f4bd920f9c17a0e 100644 --- a/ecmascript/mem/space-inl.h +++ b/ecmascript/mem/space-inl.h @@ -38,6 +38,19 @@ void Space::EnumerateRegions(const Callback &cb, Region *region) const } } +RememberedSet *Region::CreateReferenceSet() +{ + return CreateRememberedSet(); +} + +RememberedSet *Region::GetOrCreateReferenceSet() +{ + if (UNLIKELY(referenceSet_ == nullptr)) { + referenceSet_ = CreateReferenceSet(); + } + return referenceSet_; +} + RememberedSet *Region::CreateRememberedSet() { auto setSize = RememberedSet::GetSizeInByte(GetCapacity()); @@ -70,6 +83,12 @@ void Region::InsertCrossRegionRememberedSet(uintptr_t addr) set->Insert(addr); } +void Region::InsertReferenceSet(uintptr_t addr) +{ + auto set = GetOrCreateReferenceSet(); + set->Insert(addr); +} + void Region::InsertOldToNewRememberedSet(uintptr_t addr) { auto set = GetOrCreateOldToNewRememberedSet(); diff --git a/ecmascript/mem/space.cpp b/ecmascript/mem/space.cpp index c1156bc0eed7de9e7972d1f5e46bb0d01ba3c77f..a8deb8bb5adad7777f4426a5ea689822a433236e 100644 --- a/ecmascript/mem/space.cpp +++ b/ecmascript/mem/space.cpp @@ -67,6 +67,12 @@ void Space::ClearAndFreeRegion(Region *region) const_cast(heap_->GetRegionFactory())->Free(bitmap->GetBitMap().Data(), size); delete bitmap; } + if (region->GetReferenceSet() != nullptr) { + auto referenceSet = region->GetReferenceSet(); + auto size = RememberedSet::GetSizeInByte(region->GetCapacity()); + const_cast(heap_->GetRegionFactory())->Free(referenceSet->GetBitMap().Data(), size); + delete referenceSet; + } if (region->GetCrossRegionRememberedSet() != nullptr) { auto rememberedSet = region->GetCrossRegionRememberedSet(); auto size = RememberedSet::GetSizeInByte(region->GetCapacity()); diff --git a/ecmascript/mem/tagged_object-inl.h b/ecmascript/mem/tagged_object-inl.h index d993a5aed4cdc31eb92628da620cb6be10f83b50..42b9fde4a8efe9309d446e927711086d97ace5f1 100644 --- a/ecmascript/mem/tagged_object-inl.h +++ b/ecmascript/mem/tagged_object-inl.h @@ -26,7 +26,11 @@ namespace panda::ecmascript { inline void TaggedObject::SetClass(JSHClass *hclass) { - *reinterpret_cast(ToUintPtr(this)) = reinterpret_cast(hclass); + if (hclass == nullptr) { + *reinterpret_cast(ToUintPtr(this)) = reinterpret_cast(hclass); + } else { + Barriers::SetDynObject(GetJSThread(), this, 0, JSTaggedValue(hclass).GetRawData()); + } } inline void TaggedObject::SetClass(JSHandle hclass) diff --git a/ecmascript/platform/platform.cpp b/ecmascript/platform/platform.cpp index f589f3807e9a1f370137a6b60e6c52deba0057db..01a76cf85afb0a2e92f4167c52574c27223d783e 100644 --- a/ecmascript/platform/platform.cpp +++ b/ecmascript/platform/platform.cpp @@ -34,12 +34,12 @@ void Platform::Destory() } } -int Platform::TheMostSuitableThreadNum(int threadNum) const +uint32_t Platform::TheMostSuitableThreadNum(uint32_t threadNum) const { if (threadNum > 0) { - return std::min(threadNum, MAX_PLATFORM_THREAD_NUM); + return std::min(threadNum, MAX_PLATFORM_THREAD_NUM); } - int numOfCpuCore = get_nprocs() - 1; - return std::min(numOfCpuCore, MAX_PLATFORM_THREAD_NUM); + uint32_t numOfCpuCore = get_nprocs() - 1; + return std::min(numOfCpuCore, MAX_PLATFORM_THREAD_NUM); } } // namespace panda::ecmascript diff --git a/ecmascript/platform/platform.h b/ecmascript/platform/platform.h index d62af797eeac0c26e5578b02f264aafa0debcf76..2b2fc0377e56c5ee5d5079db30173a75dd5bb62a 100644 --- a/ecmascript/platform/platform.h +++ b/ecmascript/platform/platform.h @@ -45,15 +45,22 @@ public: runner_->PostTask(std::move(task)); } + uint32_t GetTotalThreadNum() const + { + return runner_->GetTotalThreadNum(); + } + private: static constexpr uint32_t MAX_PLATFORM_THREAD_NUM = 7; static constexpr uint32_t DEFAULT_PLATFORM_THREAD_NUM = 0; - int TheMostSuitableThreadNum(int threadNum) const; + uint32_t TheMostSuitableThreadNum(uint32_t threadNum) const; std::unique_ptr runner_; int isInitialized_ = 0; os::memory::Mutex mutex_; + + friend class WorkerHelper; }; } // namespace panda::ecmascript #endif // ECMASCRIPT_PALTFORM_PLATFORM_H diff --git a/ecmascript/platform/runner.cpp b/ecmascript/platform/runner.cpp index 949607932c47888207d2a1457e4394163990f81c..6de2e51fa8d597315f9bba36bf186a7f6174975b 100644 --- a/ecmascript/platform/runner.cpp +++ b/ecmascript/platform/runner.cpp @@ -18,10 +18,11 @@ #include "os/thread.h" namespace panda::ecmascript { -Runner::Runner(int threadNum) +Runner::Runner(uint32_t threadNum) : totalThreadNum_(threadNum) { - for (int i = 0; i < threadNum; i++) { - std::unique_ptr thread = std::make_unique(&Runner::Run, this); + for (uint32_t i = 0; i < threadNum; i++) { + // main thread is 0; + std::unique_ptr thread = std::make_unique(&Runner::Run, this, i + 1); os::thread::SetThreadName(thread->native_handle(), "GC_WorkerThread"); threadPool_.emplace_back(std::move(thread)); } @@ -37,10 +38,10 @@ void Runner::Terminate() threadPool_.clear(); } -void Runner::Run() +void Runner::Run(uint32_t threadId) { while (std::unique_ptr task = taskQueue_.PopTask()) { - task->Run(); + task->Run(threadId); } } } // namespace panda::ecmascript diff --git a/ecmascript/platform/runner.h b/ecmascript/platform/runner.h index be0ada02795dae94bd2a6aae7301676b3cdae6e0..77be324f4eea36551c3e82499a701bf51749266f 100644 --- a/ecmascript/platform/runner.h +++ b/ecmascript/platform/runner.h @@ -25,7 +25,7 @@ namespace panda::ecmascript { class Runner { public: - explicit Runner(int threadNum); + explicit Runner(uint32_t threadNum); ~Runner() = default; NO_COPY_SEMANTIC(Runner); @@ -38,11 +38,18 @@ public: void Terminate(); + uint32_t GetTotalThreadNum() const + { + return totalThreadNum_; + } + private: - void Run(); + void Run(uint32_t threadId); std::vector> threadPool_ {}; TaskQueue taskQueue_ {}; + uint32_t totalThreadNum_ {0}; + std::vector threadIdToIndexList_; }; } // namespace panda::ecmascript #endif // ECMASCRIPT_PLATFORM_RUNNER_H diff --git a/ecmascript/platform/task.h b/ecmascript/platform/task.h index 71eff10111df1d949f1c4521acdfb63f55c0951a..7520e80417353091faefe8447cbc5ebde9db38b5 100644 --- a/ecmascript/platform/task.h +++ b/ecmascript/platform/task.h @@ -23,7 +23,7 @@ class Task { public: Task() = default; virtual ~Task() = default; - virtual bool Run() = 0; + virtual bool Run(uint32_t threadIndex) = 0; NO_COPY_SEMANTIC(Task); NO_MOVE_SEMANTIC(Task); diff --git a/ecmascript/tests/BUILD.gn b/ecmascript/tests/BUILD.gn index ce5365165d4d12eedf6c709ed65cd34fce35ec7f..8f17505eb1e2a9e547162c41b2bb72a9bb780f69 100644 --- a/ecmascript/tests/BUILD.gn +++ b/ecmascript/tests/BUILD.gn @@ -962,6 +962,33 @@ host_unittest_action("GcTest") { } } +host_unittest_action("ConcurrentMarkingTest") { + module_out_path = module_output_path + + sources = [ + # test file + "concurrent_marking_test.cpp", + ] + + configs = [ + "//ark/js_runtime:ecma_test_config", + "//ark/js_runtime:ark_jsruntime_public_config", # should add before + # arkruntime_public_config + "//ark/js_runtime:ark_jsruntime_common_config", + "$ark_root/runtime:arkruntime_public_config", + ] + + deps = [ + "$ark_root/libpandabase:libarkbase", + "//ark/js_runtime:libark_jsruntime_test", + sdk_libc_secshared_dep, + ] + + if (!is_standard_system) { + deps += [ "$ark_root/runtime:libarkruntime" ] + } +} + group("unittest") { testonly = true @@ -969,6 +996,7 @@ group("unittest") { deps = [ ":AssertScopeTest", ":BuiltinsTest", + ":ConcurrentMarkingTest", ":DumpTest", ":EcmaModuleTest", ":EcmaStringTest", @@ -1012,6 +1040,7 @@ group("host_unittest") { deps = [ ":AssertScopeTestAction", ":BuiltinsTestAction", + ":ConcurrentMarkingTestAction", ":DumpTestAction", ":EcmaModuleTestAction", ":EcmaStringTestAction", diff --git a/ecmascript/tests/concurrent_marking_test.cpp b/ecmascript/tests/concurrent_marking_test.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b7f2ed148e6063ace069a5ea1adc321e92e29dd2 --- /dev/null +++ b/ecmascript/tests/concurrent_marking_test.cpp @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2021 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ecmascript/tests/test_helper.h" + +#include "ecmascript/ecma_vm.h" +#include "ecmascript/global_env.h" +#include "ecmascript/js_handle.h" +#include "ecmascript/mem/clock_scope.h" +#include "ecmascript/mem/verification.h" + +using namespace panda::ecmascript; + +namespace panda::test { +class ConcurrentMarkingTest : public testing::Test { +public: + static void SetUpTestCase() + { + GTEST_LOG_(INFO) << "SetUpTestCase"; + } + + static void TearDownTestCase() + { + GTEST_LOG_(INFO) << "TearDownCase"; + } + + void SetUp() override + { + RuntimeOptions options; + options.SetShouldLoadBootPandaFiles(false); + options.SetShouldInitializeIntrinsics(false); + options.SetBootClassSpaces( {"ecmascript"} ); + options.SetRuntimeType("ecmascript"); + options.SetPreGcHeapVerifyEnabled(true); + static EcmaLanguageContext lcEcma; + [[maybe_unused]] bool success = Runtime::Create(options, {&lcEcma}); + ASSERT_TRUE(success) << "Cannot create Runtime"; + instance = Runtime::GetCurrent()->GetPandaVM(); + ASSERT_TRUE(instance != nullptr) << "Cannot create EcmaVM"; + thread = EcmaVM::Cast(instance)->GetJSThread(); + scope = new EcmaHandleScope(thread); + thread->SetIsEcmaInterpreter(true); + EcmaVM::Cast(instance)->GetFactory()->SetTriggerGc(false); + auto heap = const_cast(thread->GetEcmaVM()->GetHeap()); + heap->SetConcurrentMarkingEnable(true); + } + + void TearDown() override + { + TestHelper::DestroyEcmaVMWithScope(instance, scope); + } + + JSHandle CreateTaggedArray(array_size_t length, JSTaggedValue initVal, MemSpaceType spaceType) + { + ObjectFactory *factory = thread->GetEcmaVM()->GetFactory(); + return factory->NewTaggedArray(length, initVal, spaceType); + } + + PandaVM *instance {nullptr}; + ecmascript::EcmaHandleScope *scope {nullptr}; + JSThread *thread {nullptr}; +}; + +HWTEST_F_L0(ConcurrentMarkingTest, PerformanceWithConcurrentMarking) +{ + array_size_t rootLength = 1024; + JSHandle rootArray = + CreateTaggedArray(rootLength, JSTaggedValue::Undefined(), MemSpaceType::OLD_SPACE); + for (array_size_t i = 0; i < rootLength; i++) { + array_size_t subArrayLength = 1024; + auto array = CreateTaggedArray(subArrayLength, JSTaggedValue::Undefined(), MemSpaceType::OLD_SPACE); + rootArray->Set(thread, i, array); + } + auto heap = const_cast(thread->GetEcmaVM()->GetHeap()); + heap->TriggerConcurrentMarking(); // concurrent mark + for (array_size_t i = 0; i < rootLength; i++) { + array_size_t subArrayLength = 1024; + auto array = CreateTaggedArray(subArrayLength, JSTaggedValue::Undefined(), MemSpaceType::OLD_SPACE); + rootArray->Set(thread, i, array); + } + heap->CollectGarbage(TriggerGCType::OLD_GC); +} + +HWTEST_F_L0(ConcurrentMarkingTest, PerformanceWithoutConcurrentMarking) +{ + array_size_t rootLength = 1024; + JSHandle rootArray = + CreateTaggedArray(rootLength, JSTaggedValue::Undefined(), MemSpaceType::OLD_SPACE); + for (array_size_t i = 0; i < rootLength; i++) { + array_size_t subArrayLength = 1024; + auto array = CreateTaggedArray(subArrayLength, JSTaggedValue::Undefined(), MemSpaceType::OLD_SPACE); + rootArray->Set(thread, i, array); + } + auto heap = const_cast(thread->GetEcmaVM()->GetHeap()); + for (array_size_t i = 0; i < rootLength; i++) { + array_size_t subArrayLength = 1024; + auto array = CreateTaggedArray(subArrayLength, JSTaggedValue::Undefined(), MemSpaceType::OLD_SPACE); + rootArray->Set(thread, i, array); + } + heap->CollectGarbage(TriggerGCType::OLD_GC); +} +} // namespace panda::test diff --git a/ecmascript/thread/thread_safe_queue.h b/ecmascript/thread/thread_safe_queue.h deleted file mode 100644 index 27d8b2cca29fc94c530c4683efd1107aeafaee65..0000000000000000000000000000000000000000 --- a/ecmascript/thread/thread_safe_queue.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2021 Huawei Device Co., Ltd. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ECMASCRIPT_THREAD_THREAD_SAFE_QUEUE_H -#define ECMASCRIPT_THREAD_THREAD_SAFE_QUEUE_H - -#include "os/mutex.h" -#include - -namespace panda::ecmascript { -template -class ThreadSafeQueue { -public: - explicit ThreadSafeQueue() = default; - ~ThreadSafeQueue() = default; - - bool empty() - { - os::memory::LockHolder lock(mutex_); - return queue_.empty(); - } - - int size() - { - os::memory::LockHolder lock(mutex_); - return queue_.size(); - } - - void enqueue(T &t) - { - os::memory::LockHolder lock(mutex_); - queue_.push(t); - } - - bool dequeue(T &t) - { - os::memory::LockHolder lock(mutex_); - - if (queue_.empty()) { - return false; - } - t = std::move(queue_.front()); - queue_.pop(); - return true; - } - -private: - NO_COPY_SEMANTIC(ThreadSafeQueue); - NO_MOVE_SEMANTIC(ThreadSafeQueue); - - std::queue queue_; - os::memory::Mutex mutex_; -}; -} // namespace panda::ecmascript - -#endif // ECMASCRIPT_THREAD_THREAD_SAFE_QUEUE_H