From 837d6e9d1a8abff6d8dbbc7f01ce230158847233 Mon Sep 17 00:00:00 2001 From: linxiang Date: Fri, 22 Oct 2021 16:11:21 +0800 Subject: [PATCH 1/2] concurrent marking Signed-off-by: linxiang Change-Id: I8d786ae31619e1a47c718c2b2ac14d6846856405 --- BUILD.gn | 3 +- ecmascript/ecma_vm.cpp | 1 + ecmascript/interpreter/interpreter-inl.h | 6 + ecmascript/mem/barriers-inl.h | 16 +- ecmascript/mem/compress_collector.cpp | 23 +- ecmascript/mem/compress_collector.h | 7 +- ecmascript/mem/concurrent_marker.cpp | 236 ++++++++++++++++++ ecmascript/mem/concurrent_marker.h | 117 +++++++++ ecmascript/mem/concurrent_sweeper.cpp | 2 +- ecmascript/mem/concurrent_sweeper.h | 2 +- ecmascript/mem/ecma_heap_manager.h | 1 - ecmascript/mem/heap-inl.h | 1 + ecmascript/mem/heap.cpp | 126 ++++++++-- ecmascript/mem/heap.h | 62 ++++- ecmascript/mem/old_space_collector.cpp | 126 ++-------- ecmascript/mem/old_space_collector.h | 16 +- ...ce_worker.cpp => parallel_work_helper.cpp} | 209 ++++++---------- ..._space_worker.h => parallel_work_helper.h} | 117 +++------ ecmascript/mem/region.h | 19 +- ecmascript/mem/semi_space_collector-inl.h | 10 +- ecmascript/mem/semi_space_collector.cpp | 29 +-- ecmascript/mem/semi_space_collector.h | 22 +- ecmascript/mem/semi_space_marker.cpp | 2 +- ecmascript/mem/space-inl.h | 19 ++ ecmascript/mem/space.cpp | 6 + ecmascript/mem/tagged_object-inl.h | 6 +- ecmascript/platform/platform.cpp | 8 +- ecmascript/platform/platform.h | 9 +- ecmascript/platform/runner.cpp | 11 +- ecmascript/platform/runner.h | 11 +- ecmascript/platform/task.h | 2 +- ecmascript/tests/BUILD.gn | 29 +++ ecmascript/tests/concurrent_marking_test.cpp | 112 +++++++++ ecmascript/thread/thread_safe_queue.h | 68 ----- 34 files changed, 941 insertions(+), 493 deletions(-) create mode 100644 ecmascript/mem/concurrent_marker.cpp create mode 100644 ecmascript/mem/concurrent_marker.h rename ecmascript/mem/{semi_space_worker.cpp => parallel_work_helper.cpp} (63%) rename ecmascript/mem/{semi_space_worker.h => parallel_work_helper.h} (55%) create mode 100644 ecmascript/tests/concurrent_marking_test.cpp delete mode 100644 ecmascript/thread/thread_safe_queue.h diff --git a/BUILD.gn b/BUILD.gn index 042bdd5acf..caae009e7d 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -339,6 +339,7 @@ ecma_source = [ "ecmascript/mem/c_string.cpp", "ecmascript/mem/chunk.cpp", "ecmascript/mem/compress_collector.cpp", + "ecmascript/mem/concurrent_marker.cpp", "ecmascript/mem/concurrent_sweeper.cpp", "ecmascript/mem/ecma_heap_manager.cpp", "ecmascript/mem/free_object_kind.cpp", @@ -347,10 +348,10 @@ ecma_source = [ "ecmascript/mem/heap.cpp", "ecmascript/mem/mem_controller.cpp", "ecmascript/mem/old_space_collector.cpp", + "ecmascript/mem/parallel_work_helper.cpp", "ecmascript/mem/region_factory.cpp", "ecmascript/mem/semi_space_collector.cpp", "ecmascript/mem/semi_space_marker.cpp", - "ecmascript/mem/semi_space_worker.cpp", "ecmascript/mem/space.cpp", "ecmascript/mem/tagged_object.cpp", "ecmascript/mem/verification.cpp", diff --git a/ecmascript/ecma_vm.cpp b/ecmascript/ecma_vm.cpp index 7d5de2b91a..782d068a6e 100644 --- a/ecmascript/ecma_vm.cpp +++ b/ecmascript/ecma_vm.cpp @@ -33,6 +33,7 @@ #include "ecmascript/js_for_in_iterator.h" #include "ecmascript/js_invoker.h" #include "ecmascript/js_thread.h" +#include "ecmascript/mem/concurrent_marker.h" #include "ecmascript/mem/heap.h" #include "ecmascript/tagged_dictionary.h" #include "ecmascript/object_factory.h" diff --git a/ecmascript/interpreter/interpreter-inl.h b/ecmascript/interpreter/interpreter-inl.h index ad41ab5377..cd1f2fd99f 100644 --- a/ecmascript/interpreter/interpreter-inl.h +++ b/ecmascript/interpreter/interpreter-inl.h @@ -27,6 +27,7 @@ #include "ecmascript/js_generator_object.h" #include "ecmascript/js_tagged_value.h" #include "ecmascript/literal_data_extractor.h" +#include "ecmascript/mem/concurrent_marker.h" #include "ecmascript/runtime_call_id.h" #include "ecmascript/template_string.h" #include "ecmascript/vmstat/runtime_stat.h" @@ -3308,6 +3309,11 @@ JSTaggedValue EcmaInterpreter::GetRuntimeProfileTypeInfo(TaggedType *sp) bool EcmaInterpreter::UpdateHotnessCounter(JSThread* thread, TaggedType *sp, JSTaggedValue acc, int32_t offset) { + auto marker = thread->GetEcmaVM()->GetHeap()->GetConcurrentMarker(); + if (marker->IsFinished()) { + marker->CheckAndSweep(); + } + FrameState *state = GET_FRAME(sp); auto method = state->method; auto hotnessCounter = static_cast(method->GetHotnessCounter()); diff --git a/ecmascript/mem/barriers-inl.h b/ecmascript/mem/barriers-inl.h index 494474386f..c3568db594 100644 --- a/ecmascript/mem/barriers-inl.h +++ b/ecmascript/mem/barriers-inl.h @@ -24,14 +24,16 @@ namespace panda::ecmascript { static inline void MarkingBarrier(void *obj, size_t offset, JSTaggedType value) { - ASSERT(value != JSTaggedValue::VALUE_UNDEFINED); - Region *object_region = Region::ObjectAddressToRange(static_cast(obj)); - Region *value_region = Region::ObjectAddressToRange(reinterpret_cast(value)); - if (!object_region->InYoungGeneration() && value_region->InYoungGeneration()) { - [[maybe_unused]] uintptr_t slot_addr = ToUintPtr(obj) + offset; + Region *objectRegion = Region::ObjectAddressToRange(static_cast(obj)); + Region *valueRegion = Region::ObjectAddressToRange(reinterpret_cast(value)); + if (!objectRegion->InYoungGeneration() && valueRegion->InYoungGeneration()) { + uintptr_t slotAddr = ToUintPtr(obj) + offset; // Should align with '8' in 64 and 32 bit platform - ASSERT((slot_addr % static_cast(MemAlignment::MEM_ALIGN_OBJECT)) == 0); - object_region->InsertOldToNewRememberedSet(slot_addr); + objectRegion->InsertOldToNewRememberedSet(slotAddr); + } + if (objectRegion->GetSpace()->GetHeap()->IsConcurrentMarking()) { + uintptr_t slotAddr = ToUintPtr(obj) + offset; + objectRegion->InsertReferenceSet(slotAddr); } } diff --git a/ecmascript/mem/compress_collector.cpp b/ecmascript/mem/compress_collector.cpp index 8fd14bbb39..e3dd08946d 100644 --- a/ecmascript/mem/compress_collector.cpp +++ b/ecmascript/mem/compress_collector.cpp @@ -30,7 +30,8 @@ namespace panda::ecmascript { CompressCollector::CompressCollector(Heap *heap, bool parallelGc) : heap_(heap), paralledGC_(parallelGc), marker_(this), rootManager_(heap->GetEcmaVM()) { - workList_ = new CompressGCWorker(heap_, heap_->GetThreadPool()->GetThreadNum()); + workList_ = new WorkerHelper(heap_, Platform::GetCurrentPlatform()->GetTotalThreadNum() + 1, + ParallelGCTaskPhase::COMPRESS_HANDLE_GLOBAL_POOL_TASK); } CompressCollector::~CompressCollector() @@ -58,7 +59,7 @@ void CompressCollector::RunPhases() void CompressCollector::InitializePhase() { - heap_->GetThreadPool()->WaitTaskFinish(); + heap_->WaitRunningTaskFinished(); heap_->GetSweeper()->EnsureAllTaskFinish(); auto compressSpace = const_cast(heap_->GetCompressSpace()); if (compressSpace->GetCommittedSize() == 0) { @@ -93,7 +94,7 @@ void CompressCollector::InitializePhase() heap_->FlipCompressSpace(); heap_->FlipNewSpace(); - workList_->Initialize(); + workList_->Initialize(TriggerGCType::COMPRESS_FULL_GC); youngAndOldAliveSize_ = 0; nonMoveSpaceFreeSize_ = 0; youngSpaceCommitSize_ = heap_->GetFromSpace()->GetCommittedSize(); @@ -105,11 +106,7 @@ void CompressCollector::FinishPhase() { // swap if (paralledGC_) { - heap_->GetThreadPool()->Submit([this]([[maybe_unused]] uint32_t threadId) -> bool { - const_cast(heap_->GetCompressSpace())->ReclaimRegions(); - const_cast(heap_->GetFromSpace())->ReclaimRegions(); - return true; - }); + heap_->PostParallelGCTask(ParallelGCTaskPhase::COMPRESS_HANDLE_RECLIAM_REGION_TASK); } else { const_cast(heap_->GetCompressSpace())->ReclaimRegions(); const_cast(heap_->GetFromSpace())->ReclaimRegions(); @@ -140,9 +137,7 @@ void CompressCollector::MarkingPhase() rootManager_.VisitVMRoots(gcMarkYoung, gcMarkRangeYoung); ProcessMarkStack(0); - if (paralledGC_) { - heap_->GetThreadPool()->WaitTaskFinish(); - } + heap_->WaitRunningTaskFinished(); } void CompressCollector::ProcessMarkStack(uint32_t threadId) @@ -177,7 +172,8 @@ void CompressCollector::SweepPhases() { trace::ScopedTrace scoped_trace("CompressCollector::SweepPhases"); // process weak reference - for (uint32_t i = 0; i < heap_->GetThreadPool()->GetThreadNum(); i++) { + auto totalThreadCount = Platform::GetCurrentPlatform()->GetTotalThreadNum() + 1; // gc thread and main thread + for (uint32_t i = 0; i < totalThreadCount; i++) { ProcessQueue *queue = workList_->GetWeakReferenceQueue(i); while (true) { @@ -221,8 +217,7 @@ void CompressCollector::SweepPhases() MarkWord markWord(header); if (markWord.IsForwardingAddress()) { - TaggedObject *dst = markWord.ToForwardingAddress(); - return dst; + return markWord.ToForwardingAddress(); } return reinterpret_cast(ToUintPtr(nullptr)); }; diff --git a/ecmascript/mem/compress_collector.h b/ecmascript/mem/compress_collector.h index 8cfb405ed1..6e6c8c79db 100644 --- a/ecmascript/mem/compress_collector.h +++ b/ecmascript/mem/compress_collector.h @@ -17,8 +17,8 @@ #define ECMASCRIPT_MEM_COMPRESS_COLLECTOR_H #include "ecmascript/mem/compress_gc_marker.h" +#include "ecmascript/mem/parallel_work_helper.h" #include "ecmascript/mem/semi_space_collector.h" -#include "ecmascript/mem/semi_space_worker.h" namespace panda { namespace ecmascript { @@ -57,7 +57,7 @@ private: bool paralledGC_; CompressGCMarker marker_; HeapRootManager rootManager_; - CompressGCWorker *workList_; + WorkerHelper *workList_; os::memory::Mutex mtx_; BumpPointerAllocator fromSpaceAllocator_{}; FreeListAllocator oldSpaceAllocator_{}; @@ -70,7 +70,8 @@ private: friend class TlabAllocator; friend class CompressGCMarker; - friend class CompressGCWorker; + friend class WorkerHelper; + friend class Heap; }; } // namespace ecmascript } // namespace panda diff --git a/ecmascript/mem/concurrent_marker.cpp b/ecmascript/mem/concurrent_marker.cpp new file mode 100644 index 0000000000..2d1068821e --- /dev/null +++ b/ecmascript/mem/concurrent_marker.cpp @@ -0,0 +1,236 @@ +/* + * Copyright (c) 2021 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ecmascript/mem/concurrent_marker.h" + +#include "ecmascript/mem/allocator-inl.h" +#include "ecmascript/mem/heap-inl.h" +#include "ecmascript/mem/heap_roots-inl.h" +#include "ecmascript/platform/platform.h" + +#include "ecmascript/mem/verification.h" + +#include "os/mutex.h" + +namespace panda::ecmascript { +ConcurrentMarker::ConcurrentMarker(Heap *heap) : heap_(heap), rootManager_(heap_->GetEcmaVM()) +{ + workList_ = new WorkerHelper(heap_, Platform::GetCurrentPlatform()->GetTotalThreadNum() + 1, + ParallelGCTaskPhase::CONCURRENT_HANDLE_GLOBAL_POOL_TASK); +} + +void ConcurrentMarker::ConcurrentMarking() +{ + InitializeMarking(); + SetMarkingStatus(MarkingStatus::CONCURRENT_MARKING); + Platform::GetCurrentPlatform()->PostTask(std::make_unique(this)); +} + +void ConcurrentMarker::SweepPhases(WorkerHelper *worklist) +{ + auto totalThreadCount = Platform::GetCurrentPlatform()->GetTotalThreadNum() + 1; // gc thread and main thread + for (uint32_t i = 0; i < totalThreadCount; i++) { + ProcessQueue *queue = worklist->GetWeakReferenceQueue(i); + while (true) { + auto obj = queue->PopBack(); + if (UNLIKELY(obj == nullptr)) { + break; + } + ObjectSlot slot(ToUintPtr(obj)); + JSTaggedValue value(slot.GetTaggedType()); + + if (value.IsHole()) { + continue; + } + auto header = value.GetTaggedWeakRef(); + + Region *objectRegion = Region::ObjectAddressToRange(header); + auto markBitmap = objectRegion->GetMarkBitmap(); + if (!markBitmap->Test(header)) { + slot.Update(static_cast(JSTaggedValue::Undefined().GetRawData())); + } + } + } + + auto stringTable = heap_->GetEcmaVM()->GetEcmaStringTable(); + WeakRootVisitor gcUpdateWeak = [](TaggedObject *header) { + Region *objectRegion = Region::ObjectAddressToRange(reinterpret_cast(header)); + if (objectRegion->InYoungGeneration()) { + return header; + } + + auto markBitmap = objectRegion->GetMarkBitmap(); + if (markBitmap->Test(header)) { + return header; + } + return reinterpret_cast(ToUintPtr(nullptr)); + }; + stringTable->SweepWeakReference(gcUpdateWeak); + heap_->GetEcmaVM()->GetJSThread()->IterateWeakEcmaGlobalStorage(gcUpdateWeak); + heap_->GetEcmaVM()->ProcessReferences(gcUpdateWeak); + + heap_->GetSweeper()->SweepPhases(); +} + +void ConcurrentMarker::FinishPhase(WorkerHelper *worklist) +{ + size_t aliveSize = 0; + worklist->Finish(aliveSize); +} + +void ConcurrentMarker::ReMarking(WorkerHelper *worklist) +{ + MarkRoots(worklist); + heap_->EnumerateRegions([this, &worklist](Region *current) { + auto referenceSet = current->GetReferenceSet(); + if (referenceSet == nullptr) { + return; + } + referenceSet->IterateOverMarkedChunks([this, &worklist](void *mem) -> bool { + ObjectSlot slot(ToUintPtr(mem)); + JSTaggedValue value(slot.GetTaggedType()); + if (value.IsWeak()) { + RecordWeakReference(worklist, 0, reinterpret_cast(slot.SlotAddress())); + } else if (value.IsHeapObject()) { + MarkObject(worklist, 0, value.GetTaggedObject()); + } + return true; + }); + }); + ParallelMarkStack(worklist, 0); +} + +void ConcurrentMarker::MarkRoots(WorkerHelper *worklist) +{ + RootVisitor gcMark = [this, &worklist]([[maybe_unused]] Root type, ObjectSlot slot) { + JSTaggedValue value(slot.GetTaggedType()); + if (value.IsHeapObject()) { + MarkObject(worklist, 0, value.GetTaggedObject()); + } + }; + RootRangeVisitor gcMarkRange = [this, &worklist]([[maybe_unused]] Root type, ObjectSlot start, ObjectSlot end) { + for (ObjectSlot slot = start; slot < end; slot++) { + JSTaggedValue value(slot.GetTaggedType()); + if (value.IsHeapObject()) { + MarkObject(worklist, 0, value.GetTaggedObject()); + } + } + }; + HeapRootManager rootManager(heap_->GetEcmaVM()); + rootManager.VisitVMRoots(gcMark, gcMarkRange); +} + +void ConcurrentMarker::CheckAndSweep() // js-thread wait for sweep +{ + os::memory::LockHolder lock(waitMarkingFinishedMutex_); // dont need lock? => atomic? + if (vmThreadNeedSweep_) { + heap_->CollectGarbage(TriggerGCType::OLD_GC); + } +} + +void ConcurrentMarker::WaitConcurrentMarkingFinished() // call in EcmaVm thread, wait for mark finished +{ + os::memory::LockHolder lock(waitMarkingFinishedMutex_); + vmThreadWaitMarkingFinished_ = true; + waitMarkingFinishedCV_.Wait(&waitMarkingFinishedMutex_); +} + +// -------------------- privete method ------------------------------------------ +void ConcurrentMarker::InitializeMarking() +{ + heap_->WaitRunningTaskFinished(); + heap_->GetSweeper()->EnsureAllTaskFinish(); + heap_->EnumerateRegions([](Region *current) { + // ensure mark bitmap + auto bitmap = current->GetMarkBitmap(); + if (bitmap == nullptr) { + current->GetOrCreateMarkBitmap(); + } else { + bitmap->ClearAllBits(); + } + auto referenceSet = current->GetReferenceSet(); + if (referenceSet == nullptr) { + current->GetOrCreateReferenceSet(); + } else { + referenceSet->ClearAllBits(); + } + }); + workList_->Initialize(TriggerGCType::OLD_GC); + MarkRoots(workList_); +} + +void ConcurrentMarker::MarkObject(WorkerHelper *worklist, uint32_t threadId, TaggedObject *object) +{ + Region *objectRegion = Region::ObjectAddressToRange(object); + + auto markBitmap = objectRegion->GetOrCreateMarkBitmap(); + if (!markBitmap->AtomicTestAndSet(object)) { + worklist->Push(threadId, object); + } +} + +void ConcurrentMarker::RecordWeakReference(WorkerHelper *worklist, uint32_t threadId, JSTaggedType *ref) +{ + worklist->PushWeakReference(threadId, ref); +} + +void ConcurrentMarker::ParallelMarkStack(WorkerHelper *worklist, uint32_t threadId) +{ + while (true) { + TaggedObject *obj = nullptr; + if (!worklist->Pop(threadId, &obj)) { + break; + } + auto jsHclass = obj->GetClass(); + // mark dynClass + MarkObject(worklist, threadId, jsHclass); + + HeapRootManager rootManager(heap_->GetEcmaVM()); + rootManager.MarkObjectBody(obj, jsHclass, + [this, &threadId, &worklist]([[maybe_unused]] TaggedObject *root, ObjectSlot start, ObjectSlot end) { + for (ObjectSlot slot = start; slot < end; slot++) { + JSTaggedValue value(slot.GetTaggedType()); + if (value.IsWeak()) { + RecordWeakReference(worklist, threadId, reinterpret_cast(slot.SlotAddress())); + continue; + } + if (value.IsHeapObject()) { + MarkObject(worklist, threadId, value.GetTaggedObject()); + } + } + }); + } +} + +bool ConcurrentMarker::ConcurrentMarkerTask::Run(uint32_t threadId) +{ + marker_->ParallelMarkStack(marker_->workList_, 0); + marker_->heap_->WaitRunningTaskFinished(); + marker_->MarkingFinished(); + return true; +} + +void ConcurrentMarker::MarkingFinished() +{ + SetMarkingStatus(MarkingStatus::FINISHED); + os::memory::LockHolder lock(waitMarkingFinishedMutex_); + if (vmThreadWaitMarkingFinished_) { + waitMarkingFinishedCV_.Signal(); + vmThreadWaitMarkingFinished_ = false; + } else { + vmThreadNeedSweep_ = true; + } +} +} // namespace panda::ecmascript diff --git a/ecmascript/mem/concurrent_marker.h b/ecmascript/mem/concurrent_marker.h new file mode 100644 index 0000000000..9fc79aad4e --- /dev/null +++ b/ecmascript/mem/concurrent_marker.h @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2021 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ECMASCRIPT_MEM_CONCURRENT_MARKER_H +#define ECMASCRIPT_MEM_CONCURRENT_MARKER_H + +#include +#include + +#include "ecmascript/mem/heap_roots.h" +#include "ecmascript/mem/parallel_work_helper.h" +#include "ecmascript/mem/space.h" +#include "ecmascript/platform/task.h" + +#include "os/mutex.h" + +namespace panda::ecmascript { +class Heap; + +class ConcurrentMarker { +public: + ConcurrentMarker(Heap *heap); + ~ConcurrentMarker() = default; + + NO_COPY_SEMANTIC(ConcurrentMarker); + NO_MOVE_SEMANTIC(ConcurrentMarker); + + void ConcurrentMarking(); + void SweepPhases(WorkerHelper *worklist); + void FinishPhase(WorkerHelper *worklist); + void ReMarking(WorkerHelper *worklist); + void MarkRoots(WorkerHelper *worklist); + // Parallel GC threadID starts with 1. 0 is main threadId + void ParallelMarkStack(WorkerHelper *worklist, uint32_t threadId); + + void CheckAndSweep(); // call in vm thread. + void WaitConcurrentMarkingFinished(); // call in main thread + + void Reset() + { + SetMarkingStatus(MarkingStatus::INITIALIZED); + vmThreadNeedSweep_ = false; + } + + WorkerHelper *GetWorklist() const + { + return workList_; + } + + bool IsInitialized() const + { + return markingStatus_.load(std::memory_order_acquire) == MarkingStatus::INITIALIZED; + } + + bool IsConcurrentMarking() const + { + return markingStatus_.load(std::memory_order_acquire) == MarkingStatus::CONCURRENT_MARKING; + } + + bool IsFinished() const + { + return markingStatus_.load(std::memory_order_acquire) == MarkingStatus::FINISHED; + } +private: + enum class MarkingStatus { + INITIALIZED, + CONCURRENT_MARKING, + FINISHED + }; + + class ConcurrentMarkerTask : public Task { + public: + ConcurrentMarkerTask(ConcurrentMarker *marker) : marker_(marker) {} + ~ConcurrentMarkerTask() override = default; + bool Run(uint32_t threadId) override; + + NO_COPY_SEMANTIC(ConcurrentMarkerTask); + NO_MOVE_SEMANTIC(ConcurrentMarkerTask); + + private: + ConcurrentMarker *marker_ {nullptr}; + }; + + void SetMarkingStatus(MarkingStatus status) + { + markingStatus_.store(status, std::memory_order_release); + } + + void InitializeMarking(); + void MarkingFinished(); + void MarkObject(WorkerHelper *worklist, uint32_t threadId, TaggedObject *object); + void RecordWeakReference(WorkerHelper *worklist, uint32_t threadId, JSTaggedType *ref); + + Heap *heap_ {nullptr}; + WorkerHelper *workList_ {nullptr}; + HeapRootManager rootManager_; + + std::atomic markingStatus_ {MarkingStatus::INITIALIZED}; + bool vmThreadNeedSweep_ {false}; // notify js-thread that marking is finished and need sweep + bool vmThreadWaitMarkingFinished_ {false}; // jsMainThread waiting for concurrentGC FINISHED + os::memory::Mutex waitMarkingFinishedMutex_; + os::memory::ConditionVariable waitMarkingFinishedCV_; +}; +} // namespace panda::ecmascript +#endif // ECMASCRIPT_MEM_CONCURRENT_MARKER_H \ No newline at end of file diff --git a/ecmascript/mem/concurrent_sweeper.cpp b/ecmascript/mem/concurrent_sweeper.cpp index 59068857bb..a24bf6f740 100644 --- a/ecmascript/mem/concurrent_sweeper.cpp +++ b/ecmascript/mem/concurrent_sweeper.cpp @@ -235,7 +235,7 @@ void ConcurrentSweeper::FinishSweeping(MemSpaceType type) } } -bool ConcurrentSweeper::SweeperTask::Run() +bool ConcurrentSweeper::SweeperTask::Run(uint32_t threadIndex) { int sweepTypeNum = FREE_LIST_NUM - sweeper_->startSpaceType_; for (size_t i = sweeper_->startSpaceType_; i < FREE_LIST_NUM; i++) { diff --git a/ecmascript/mem/concurrent_sweeper.h b/ecmascript/mem/concurrent_sweeper.h index b24b5a65f5..b52ae85a5d 100644 --- a/ecmascript/mem/concurrent_sweeper.h +++ b/ecmascript/mem/concurrent_sweeper.h @@ -52,7 +52,7 @@ private: public: SweeperTask(ConcurrentSweeper *sweeper, MemSpaceType type) : sweeper_(sweeper), type_(type) {}; ~SweeperTask() override = default; - bool Run() override; + bool Run(uint32_t threadIndex) override; NO_COPY_SEMANTIC(SweeperTask); NO_MOVE_SEMANTIC(SweeperTask); diff --git a/ecmascript/mem/ecma_heap_manager.h b/ecmascript/mem/ecma_heap_manager.h index 82ec7bc71d..a8b2c8c245 100644 --- a/ecmascript/mem/ecma_heap_manager.h +++ b/ecmascript/mem/ecma_heap_manager.h @@ -33,7 +33,6 @@ public: inline TaggedObject *AllocateYoungGenerationOrHugeObject(JSHClass *hclass); inline TaggedObject *TryAllocateYoungGeneration(size_t size); inline TaggedObject *AllocateYoungGenerationOrHugeObject(JSHClass *hclass, size_t size); - inline TaggedObject *AllocateNonMovableOrHugeObject(JSHClass *hclass, size_t size); inline TaggedObject *AllocateNonMovableOrHugeObject(JSHClass *hclass); inline TaggedObject *AllocateHugeObject(JSHClass *hclass, size_t size); diff --git a/ecmascript/mem/heap-inl.h b/ecmascript/mem/heap-inl.h index a459e3e372..40b91abc95 100644 --- a/ecmascript/mem/heap-inl.h +++ b/ecmascript/mem/heap-inl.h @@ -17,6 +17,7 @@ #define ECMASCRIPT_MEM_HEAP_INL_H #include "ecmascript/mem/heap.h" + #include "ecmascript/mem/mem_controller.h" #include "ecmascript/mem/space.h" #include "ecmascript/hprof/heap_tracker.h" diff --git a/ecmascript/mem/heap.cpp b/ecmascript/mem/heap.cpp index 76bfc94b4c..440e059b4b 100644 --- a/ecmascript/mem/heap.cpp +++ b/ecmascript/mem/heap.cpp @@ -20,13 +20,14 @@ #include "ecmascript/ecma_vm.h" #include "ecmascript/mem/assert_scope-inl.h" #include "ecmascript/mem/compress_collector.h" +#include "ecmascript/mem/concurrent_marker.h" #include "ecmascript/mem/concurrent_sweeper.h" #include "ecmascript/mem/ecma_heap_manager.h" #include "ecmascript/mem/mark_stack.h" #include "ecmascript/mem/mem_controller.h" #include "ecmascript/mem/old_space_collector.h" +#include "ecmascript/mem/parallel_work_helper.h" #include "ecmascript/mem/semi_space_collector.h" -#include "ecmascript/mem/semi_space_worker.h" #include "ecmascript/mem/verification.h" namespace panda::ecmascript { @@ -56,21 +57,12 @@ void Heap::Initialize() machineCodeSpace_->Initialize(); hugeObjectSpace_ = new HugeObjectSpace(this); bool paralledGc = ecmaVm_->GetOptions().IsEnableParalledYoungGc(); - if (paralledGc) { - int numOfCpuCore = get_nprocs(); - int numThread = std::min(numOfCpuCore, THREAD_NUM_FOR_YOUNG_GC); - pool_ = new ThreadPool(numThread); - semiSpaceCollector_ = new SemiSpaceCollector(this, true); - compressCollector_ = new CompressCollector(this, true); - oldSpaceCollector_ = new OldSpaceCollector(this, true); - } else { - pool_ = new ThreadPool(1); - semiSpaceCollector_ = new SemiSpaceCollector(this, false); - compressCollector_ = new CompressCollector(this, false); - oldSpaceCollector_ = new OldSpaceCollector(this, false); - } + semiSpaceCollector_ = new SemiSpaceCollector(this, paralledGc); + compressCollector_ = new CompressCollector(this, paralledGc); + oldSpaceCollector_ = new OldSpaceCollector(this, paralledGc); // After EcmaOptions merged, it will modified to EcmaOptions configuration sweeper_ = new ConcurrentSweeper(this, true); + concurrentMarker_ = new ConcurrentMarker(this); } void Heap::FlipNewSpace() @@ -88,7 +80,7 @@ void Heap::FlipCompressSpace() } void Heap::Destroy() { - pool_->WaitTaskFinish(); + WaitRunningTaskFinished(); sweeper_->EnsureAllTaskFinish(); toSpace_->Destroy(); delete toSpace_; @@ -125,10 +117,15 @@ void Heap::Destroy() regionFactory_ = nullptr; delete memController_; memController_ = nullptr; - delete pool_; - pool_ = nullptr; delete sweeper_; sweeper_ = nullptr; + delete concurrentMarker_; + concurrentMarker_ = nullptr; +} + +void Heap::TriggerConcurrentMarking() +{ + concurrentMarker_->ConcurrentMarking(); } void Heap::CollectGarbage(TriggerGCType gcType) @@ -144,6 +141,7 @@ void Heap::CollectGarbage(TriggerGCType gcType) } } } + switch (gcType) { case TriggerGCType::SEMI_GC: if (GetMemController()->IsInAppStartup()) { @@ -255,4 +253,98 @@ bool Heap::CheckAndTriggerNonMovableGC() CollectGarbage(TriggerGCType::NON_MOVE_GC); return true; } + +bool Heap::IsConcurrentMarking() const +{ + return concurrentMarker_->IsConcurrentMarking(); +} + +bool Heap::IsConcurrentMarkingFinished() const +{ + return concurrentMarker_->IsFinished(); +} + +bool Heap::IsConcurrentMarkingInitialized() const +{ + return concurrentMarker_->IsInitialized(); +} + +void Heap::WaitRunningTaskFinished() +{ + os::memory::LockHolder holder(waitTashFinishedMutex_); + while (runningTastCount_ > 0) { + waitTashFinishedCV_.Wait(&waitTashFinishedMutex_); + } +} + +void Heap::WaitConcurrentMarkingFinished() +{ + concurrentMarker_->WaitConcurrentMarkingFinished(); +} + +bool Heap::ConcurrentMarkingEnable() const +{ + return concurrentMarkingEnable_; +} + +void Heap::PostParallelGCTask(ParallelGCTaskPhase gcTask) +{ + IncreaseTaskCount(); + Platform::GetCurrentPlatform()->PostTask(std::make_unique(this, gcTask)); +} + +void Heap::IncreaseTaskCount() +{ + os::memory::LockHolder holder(waitTashFinishedMutex_); + runningTastCount_++; +} + +bool Heap::CheckCanDistributeTask() +{ + return runningTastCount_ < Platform::GetCurrentPlatform()->GetTotalThreadNum() - 1; +} + +void Heap::ReduceTaskCount() +{ + os::memory::LockHolder holder(waitTashFinishedMutex_); + runningTastCount_--; + if (runningTastCount_ == 0) { + waitTashFinishedCV_.SignalAll(); + } +} + +bool Heap::ParallelGCTask::Run(uint32_t threadIndex) +{ + switch (taskPhase_) { + case ParallelGCTaskPhase::SEMI_HANDLE_THREAD_ROOTS_TASK: + heap_->GetSemiSpaceCollector()->ParallelHandleThreadRoots(threadIndex); + break; + case ParallelGCTaskPhase::SEMI_HANDLE_SNAPSHOT_TASK: + heap_->GetSemiSpaceCollector()->ParallelHandleSnapShot(threadIndex); + break; + case ParallelGCTaskPhase::SEMI_HANDLE_GLOBAL_POOL_TASK: + heap_->GetSemiSpaceCollector()->ParallelHandleGlobalPool(threadIndex); + break; + case ParallelGCTaskPhase::SEMI_HANDLE_RECLIAM_REGION_TASK: + const_cast(heap_->GetFromSpace())->ReclaimRegions(); + break; + case ParallelGCTaskPhase::OLD_HANDLE_GLOBAL_POOL_TASK: + heap_->GetConcurrentMarker()->ParallelMarkStack(heap_->GetOldSpaceCollector()->GetWorklist(), threadIndex); + break; + case ParallelGCTaskPhase::COMPRESS_HANDLE_GLOBAL_POOL_TASK: + heap_->GetCompressCollector()->ProcessMarkStack(threadIndex); + break; + case ParallelGCTaskPhase::COMPRESS_HANDLE_RECLIAM_REGION_TASK: + const_cast(heap_->GetCompressSpace())->ReclaimRegions(); + const_cast(heap_->GetFromSpace())->ReclaimRegions(); + break; + case ParallelGCTaskPhase::CONCURRENT_HANDLE_GLOBAL_POOL_TASK: + heap_->GetConcurrentMarker()->ParallelMarkStack(heap_->GetConcurrentMarker()->GetWorklist(), threadIndex); + break; + default: + break; + } + heap_->ReduceTaskCount(); + return true; +} } // namespace panda::ecmascript diff --git a/ecmascript/mem/heap.h b/ecmascript/mem/heap.h index 55ce680c72..ec083189f2 100644 --- a/ecmascript/mem/heap.h +++ b/ecmascript/mem/heap.h @@ -16,9 +16,9 @@ #ifndef ECMASCRIPT_MEM_HEAP_H #define ECMASCRIPT_MEM_HEAP_H -#include "ecmascript/thread/thread_pool.h" #include "ecmascript/mem/mark_stack.h" #include "ecmascript/mem/space.h" +#include "ecmascript/platform/platform.h" namespace panda::ecmascript { class EcmaVM; @@ -32,6 +32,19 @@ class RegionFactory; class HeapTracker; class MemController; class ConcurrentSweeper; +class ConcurrentMarker; + +enum ParallelGCTaskPhase { + SEMI_HANDLE_THREAD_ROOTS_TASK, + SEMI_HANDLE_SNAPSHOT_TASK, + SEMI_HANDLE_GLOBAL_POOL_TASK, + SEMI_HANDLE_RECLIAM_REGION_TASK, + OLD_HANDLE_GLOBAL_POOL_TASK, + COMPRESS_HANDLE_GLOBAL_POOL_TASK, + COMPRESS_HANDLE_RECLIAM_REGION_TASK, + CONCURRENT_HANDLE_GLOBAL_POOL_TASK, + TASK_LAST // Count of different Task phase +}; class Heap { public: @@ -114,14 +127,14 @@ public: return sweeper_; } - EcmaVM *GetEcmaVM() const + ConcurrentMarker *GetConcurrentMarker() const { - return ecmaVm_; + return concurrentMarker_; } - ThreadPool *GetThreadPool() const + EcmaVM *GetEcmaVM() const { - return pool_; + return ecmaVm_; } void FlipNewSpace(); @@ -257,7 +270,38 @@ public: inline void ClearSlotsRange(Region *current, uintptr_t freeStart, uintptr_t freeEnd); + void WaitRunningTaskFinished(); + bool CheckCanDistributeTask(); + void PostParallelGCTask(ParallelGCTaskPhase gcTask); + bool IsEnableParallelGC() const + { + return paralledGc_; + } + void WaitConcurrentMarkingFinished(); + bool IsConcurrentMarkingInitialized() const; + bool IsConcurrentMarkingFinished() const; + bool IsConcurrentMarking() const; + bool ConcurrentMarkingEnable() const; + + void TriggerConcurrentMarking(); private: + void IncreaseTaskCount(); + void ReduceTaskCount(); + + class ParallelGCTask : public Task { + public: + ParallelGCTask(Heap *heap, ParallelGCTaskPhase taskPhase) : heap_(heap), taskPhase_(taskPhase) {}; + ~ParallelGCTask() override = default; + bool Run(uint32_t threadIndex) override; + + NO_COPY_SEMANTIC(ParallelGCTask); + NO_MOVE_SEMANTIC(ParallelGCTask); + + private: + Heap *heap_ {nullptr}; + ParallelGCTaskPhase taskPhase_; + }; + EcmaVM *ecmaVm_ {nullptr}; SemiSpace *fromSpace_ {nullptr}; SemiSpace *toSpace_ {nullptr}; @@ -275,9 +319,15 @@ private: RegionFactory *regionFactory_ {nullptr}; HeapTracker *tracker_ {nullptr}; MemController *memController_ {nullptr}; - ThreadPool *pool_ {nullptr}; size_t oldSpaceAllocLimit_ {OLD_SPACE_LIMIT_BEGIN}; + ConcurrentMarker *concurrentMarker_; + uint32_t runningTastCount_ {0}; + os::memory::Mutex waitTashFinishedMutex_; + os::memory::ConditionVariable waitTashFinishedCV_; + bool paralledGc_ {false}; + + bool concurrentMarkingEnable_ {false}; inline void SetMaximumCapacity(SemiSpace *space, size_t maximumCapacity); }; } // namespace panda::ecmascript diff --git a/ecmascript/mem/old_space_collector.cpp b/ecmascript/mem/old_space_collector.cpp index 343f7c7e03..a8fedd7438 100644 --- a/ecmascript/mem/old_space_collector.cpp +++ b/ecmascript/mem/old_space_collector.cpp @@ -17,6 +17,7 @@ #include "ecmascript/ecma_vm.h" #include "ecmascript/mem/clock_scope.h" +#include "ecmascript/mem/concurrent_marker.h" #include "ecmascript/mem/ecma_heap_manager.h" #include "ecmascript/mem/heap-inl.h" #include "ecmascript/mem/heap_roots-inl.h" @@ -30,7 +31,8 @@ namespace panda::ecmascript { OldSpaceCollector::OldSpaceCollector(Heap *heap, bool parallelGc) : heap_(heap), rootManager_(heap->GetEcmaVM()), paralledGC_(parallelGc) { - workList_ = new OldGCWorker(heap_, heap_->GetThreadPool()->GetThreadNum()); + workList_ = new WorkerHelper(heap_, Platform::GetCurrentPlatform()->GetTotalThreadNum() + 1, + ParallelGCTaskPhase::OLD_HANDLE_GLOBAL_POOL_TASK); } void OldSpaceCollector::RunPhases() @@ -39,17 +41,28 @@ void OldSpaceCollector::RunPhases() INTERPRETER_TRACE(thread, OldSpaceCollector_RunPhases); trace::ScopedTrace scoped_trace("OldSpaceCollector::RunPhases"); [[maybe_unused]] ClockScope clock("OldSpaceCollector::RunPhases"); - InitializePhase(); - MarkingPhase(); - SweepPhases(); - FinishPhase(); + if (heap_->ConcurrentMarkingEnable() && !heap_->IsConcurrentMarkingInitialized()) { + if (heap_->IsConcurrentMarking()) { + heap_->WaitConcurrentMarkingFinished(); + } + auto worklist = heap_->GetConcurrentMarker()->GetWorklist(); + heap_->GetConcurrentMarker()->ReMarking(worklist); + heap_->GetConcurrentMarker()->SweepPhases(worklist); + heap_->GetConcurrentMarker()->FinishPhase(worklist); + heap_->GetConcurrentMarker()->Reset(); + } else { + InitializePhase(); + MarkingPhase(); + heap_->GetConcurrentMarker()->SweepPhases(workList_); + heap_->GetConcurrentMarker()->FinishPhase(workList_); + } heap_->GetEcmaVM()->GetEcmaGCStats()->StatisticOldCollector(clock.GetPauseTime(), freeSize_, oldSpaceCommitSize_, nonMoveSpaceCommitSize_); } void OldSpaceCollector::InitializePhase() { - heap_->GetThreadPool()->WaitTaskFinish(); + heap_->WaitRunningTaskFinished(); heap_->GetSweeper()->EnsureAllTaskFinish(); heap_->EnumerateRegions([](Region *current) { // ensure mark bitmap @@ -60,111 +73,18 @@ void OldSpaceCollector::InitializePhase() bitmap->ClearAllBits(); } }); - workList_->Initialize(); + workList_->Initialize(TriggerGCType::OLD_GC); freeSize_ = 0; hugeSpaceFreeSize_ = 0; oldSpaceCommitSize_ = heap_->GetOldSpace()->GetCommittedSize(); nonMoveSpaceCommitSize_ = heap_->GetNonMovableSpace()->GetCommittedSize(); } -void OldSpaceCollector::FinishPhase() -{ - size_t aliveSize = 0; - workList_->Finish(aliveSize); -} - void OldSpaceCollector::MarkingPhase() { trace::ScopedTrace scoped_trace("OldSpaceCollector::MarkingPhase"); - RootVisitor gcMarkYoung = [this]([[maybe_unused]] Root type, ObjectSlot slot) { - JSTaggedValue value(slot.GetTaggedType()); - if (value.IsHeapObject()) { - MarkObject(0, value.GetTaggedObject()); - } - }; - RootRangeVisitor gcMarkRangeYoung = [this]([[maybe_unused]] Root type, ObjectSlot start, ObjectSlot end) { - for (ObjectSlot slot = start; slot < end; slot++) { - JSTaggedValue value(slot.GetTaggedType()); - if (value.IsHeapObject()) { - MarkObject(0, value.GetTaggedObject()); - } - } - }; - rootManager_.VisitVMRoots(gcMarkYoung, gcMarkRangeYoung); - - ProcessMarkStack(0); - if (paralledGC_) { - heap_->GetThreadPool()->WaitTaskFinish(); - } -} - -void OldSpaceCollector::ProcessMarkStack(uint64_t threadId) -{ - while (true) { - TaggedObject *obj = nullptr; - if (!workList_->Pop(threadId, &obj)) { - break; - } - auto jsHclass = obj->GetClass(); - // mark dynClass - MarkObject(threadId, jsHclass); - - rootManager_.MarkObjectBody( - obj, jsHclass, [this, &threadId]([[maybe_unused]] TaggedObject *root, ObjectSlot start, ObjectSlot end) { - for (ObjectSlot slot = start; slot < end; slot++) { - JSTaggedValue value(slot.GetTaggedType()); - if (value.IsWeak()) { - RecordWeakReference(threadId, reinterpret_cast(slot.SlotAddress())); - continue; - } - if (value.IsHeapObject()) { - MarkObject(threadId, value.GetTaggedObject()); - } - } - }); - } -} - -void OldSpaceCollector::SweepPhases() -{ - trace::ScopedTrace scoped_trace("OldSpaceCollector::SweepPhases"); - // process weak reference - for (uint32_t i = 0; i < heap_->GetThreadPool()->GetThreadNum(); i++) { - ProcessQueue *queue = workList_->GetWeakReferenceQueue(i); - while (true) { - auto obj = queue->PopBack(); - if (UNLIKELY(obj == nullptr)) { - break; - } - ObjectSlot slot(ToUintPtr(obj)); - JSTaggedValue value(slot.GetTaggedType()); - auto header = value.GetTaggedWeakRef(); - - Region *objectRegion = Region::ObjectAddressToRange(header); - auto markBitmap = objectRegion->GetMarkBitmap(); - if (!markBitmap->Test(header)) { - slot.Update(static_cast(JSTaggedValue::Undefined().GetRawData())); - } - } - } - - auto stringTable = heap_->GetEcmaVM()->GetEcmaStringTable(); - WeakRootVisitor gcUpdateWeak = [](TaggedObject *header) { - Region *objectRegion = Region::ObjectAddressToRange(reinterpret_cast(header)); - if (objectRegion->InYoungGeneration()) { - return header; - } - - auto markBitmap = objectRegion->GetMarkBitmap(); - if (markBitmap->Test(header)) { - return header; - } - return reinterpret_cast(ToUintPtr(nullptr)); - }; - stringTable->SweepWeakReference(gcUpdateWeak); - heap_->GetEcmaVM()->GetJSThread()->IterateWeakEcmaGlobalStorage(gcUpdateWeak); - heap_->GetEcmaVM()->ProcessReferences(gcUpdateWeak); - - heap_->GetSweeper()->SweepPhases(); + heap_->GetConcurrentMarker()->MarkRoots(workList_); + heap_->GetConcurrentMarker()->ParallelMarkStack(workList_, 0); + heap_->WaitRunningTaskFinished(); } } // namespace panda::ecmascript diff --git a/ecmascript/mem/old_space_collector.h b/ecmascript/mem/old_space_collector.h index 5da8b52071..5916eb8951 100644 --- a/ecmascript/mem/old_space_collector.h +++ b/ecmascript/mem/old_space_collector.h @@ -21,7 +21,7 @@ #include "ecmascript/mem/allocator.h" #include "ecmascript/mem/mark_stack-inl.h" #include "ecmascript/mem/mark_word.h" -#include "ecmascript/mem/semi_space_worker.h" +#include "ecmascript/mem/parallel_work_helper.h" #include "ecmascript/mem/slots.h" #include "ecmascript/mem/heap_roots.h" #include "ecmascript/mem/remembered_set.h" @@ -45,6 +45,11 @@ public: return heap_; } + WorkerHelper *GetWorklist() const + { + return workList_; + } + private: void InitializePhase(); void MarkingPhase(); @@ -59,14 +64,15 @@ private: Heap *heap_; HeapRootManager rootManager_; - bool paralledGC_{false}; - OldGCWorker *workList_{nullptr}; - size_t freeSize_{0}; + bool paralledGC_ {false}; + WorkerHelper *workList_ {nullptr}; + size_t freeSize_ {0}; size_t hugeSpaceFreeSize_ = 0; size_t oldSpaceCommitSize_ = 0; size_t nonMoveSpaceCommitSize_ = 0; - friend class OldGCWorker; + friend class WorkerHelper; + friend class Heap; }; } // namespace ecmascript } // namespace panda diff --git a/ecmascript/mem/semi_space_worker.cpp b/ecmascript/mem/parallel_work_helper.cpp similarity index 63% rename from ecmascript/mem/semi_space_worker.cpp rename to ecmascript/mem/parallel_work_helper.cpp index 0df831f442..a4b216342c 100644 --- a/ecmascript/mem/semi_space_worker.cpp +++ b/ecmascript/mem/parallel_work_helper.cpp @@ -13,7 +13,7 @@ * limitations under the License. */ -#include "ecmascript/mem/semi_space_worker.h" +#include "ecmascript/mem/parallel_work_helper.h" #include "ecmascript/mem/area.h" #include "ecmascript/mem/compress_collector.h" @@ -24,36 +24,26 @@ #include "ecmascript/mem/tlab_allocator-inl.h" namespace panda::ecmascript { -void Worker::Finish(size_t &aliveSize) +WorkerHelper::WorkerHelper(Heap *heap, uint32_t threadNum, ParallelGCTaskPhase parallelTask) + : heap_(heap), threadNum_(threadNum), markSpace_(0), spaceTop_(0), markSpaceEnd_(0), parallelTask_(parallelTask) { for (uint32_t i = 0; i < threadNum_; i++) { - WorkNodeHolder &holder = workList_[i]; - holder.weakQueue_->FinishMarking(continuousQueue_[i]); - delete holder.weakQueue_; - holder.weakQueue_ = nullptr; - delete holder.allocator_; - holder.allocator_ = nullptr; - holder.waitUpdate_.clear(); - aliveSize += holder.aliveSize_; - } - - while (!unuseSpace_.empty()) { - const_cast(heap_->GetRegionFactory())->FreeBuffer(reinterpret_cast( - unuseSpace_.back())); - unuseSpace_.pop_back(); + continuousQueue_[i] = new ProcessQueue(heap); } + markSpace_ = ToUintPtr(const_cast(heap_->GetRegionFactory())->AllocateBuffer(SPACE_SIZE)); } -void Worker::Finish(size_t &aliveSize, size_t &promoteSize) +WorkerHelper::~WorkerHelper() { - Finish(aliveSize); for (uint32_t i = 0; i < threadNum_; i++) { - WorkNodeHolder &holder = workList_[i]; - promoteSize += holder.aliveSize_; + continuousQueue_[i]->Destroy(); + delete continuousQueue_[i]; + continuousQueue_[i] = nullptr; } + const_cast(heap_->GetRegionFactory())->FreeBuffer(reinterpret_cast(markSpace_)); } -bool Worker::Push(uint32_t threadId, TaggedObject *object) +bool WorkerHelper::Push(uint32_t threadId, TaggedObject *object) { WorkNode *&pushNode = workList_[threadId].pushNode_; if (!pushNode->Push(ToUintPtr(object))) { @@ -63,7 +53,19 @@ bool Worker::Push(uint32_t threadId, TaggedObject *object) return true; } -bool Worker::Pop(uint32_t threadId, TaggedObject **object) +void WorkerHelper::PushWorkNodeToGlobal(uint32_t threadId) +{ + WorkNode *&pushNode = workList_[threadId].pushNode_; + if (!pushNode->IsEmpty()) { + globalWork_.Push(pushNode); + pushNode = AllocalWorkNode(); + if (heap_->IsEnableParallelGC() && heap_->CheckCanDistributeTask()) { + heap_->PostParallelGCTask(parallelTask_); + } + } +} + +bool WorkerHelper::Pop(uint32_t threadId, TaggedObject **object) { WorkNode *&popNode = workList_[threadId].popNode_; WorkNode *&pushNode = workList_[threadId].pushNode_; @@ -80,126 +82,41 @@ bool Worker::Pop(uint32_t threadId, TaggedObject **object) return true; } -bool Worker::PopWorkNodeFromGlobal(uint32_t threadId) +bool WorkerHelper::PopWorkNodeFromGlobal(uint32_t threadId) { return globalWork_.Pop(&workList_[threadId].popNode_); } -WorkNode *Worker::AllocalWorkNode() -{ - size_t totalSize = sizeof(WorkNode) + sizeof(Stack) + STACK_AREA_SIZE; - // CAS - volatile auto atomicField = reinterpret_cast *>(&spaceTop_); - bool result = false; - uintptr_t begin = 0; - do { - begin = atomicField->load(std::memory_order_acquire); - if (begin + totalSize >= markSpaceEnd_) { - os::memory::LockHolder lock(mtx_); - begin = atomicField->load(std::memory_order_acquire); - if (begin + totalSize >= markSpaceEnd_) { - unuseSpace_.emplace_back(markSpace_); - markSpace_ = - ToUintPtr(const_cast(heap_->GetRegionFactory())->AllocateBuffer(SPACE_SIZE)); - spaceTop_ = markSpace_; - markSpaceEnd_ = markSpace_ + SPACE_SIZE; - begin = spaceTop_; - } - } - result = std::atomic_compare_exchange_strong_explicit(atomicField, &begin, begin + totalSize, - std::memory_order_release, std::memory_order_relaxed); - } while (!result); - Stack *stack = reinterpret_cast(begin + sizeof(WorkNode)); - stack->ResetBegin(begin + sizeof(WorkNode) + sizeof(Stack), begin + totalSize); - WorkNode *work = reinterpret_cast(begin); - return new (work) WorkNode(stack); -} - -Worker::Worker(Heap *heap, uint32_t threadNum) - : heap_(heap), threadNum_(threadNum), markSpace_(0), spaceTop_(0), markSpaceEnd_(0) -{ - for (uint32_t i = 0; i < threadNum_; i++) { - continuousQueue_[i] = new ProcessQueue(heap); - } - markSpace_ = ToUintPtr(const_cast(heap_->GetRegionFactory())->AllocateBuffer(SPACE_SIZE)); -} - -Worker::~Worker() +void WorkerHelper::Finish(size_t &aliveSize) { - for (uint32_t i = 0; i < threadNum_; i++) { - continuousQueue_[i]->Destroy(); - delete continuousQueue_[i]; - continuousQueue_[i] = nullptr; - } - const_cast(heap_->GetRegionFactory())->FreeBuffer(reinterpret_cast(markSpace_)); -} - -SemiSpaceWorker::~SemiSpaceWorker() = default; - -CompressGCWorker::~CompressGCWorker() = default; - -void SemiSpaceWorker::PushWorkNodeToGlobal(uint32_t threadId) -{ - WorkNode *&pushNode = workList_[threadId].pushNode_; - if (!pushNode->IsEmpty()) { - globalWork_.Push(pushNode); - pushNode = AllocalWorkNode(); - - auto pool = heap_->GetThreadPool(); - if (pool->GetTaskCount() < pool->GetThreadNum() - 1) { - pool->Submit(std::bind(&SemiSpaceCollector::ParallelHandleGlobalPool, heap_->GetSemiSpaceCollector(), - std::placeholders::_1)); - } - } -} - -void SemiSpaceWorker::Initialize() -{ - spaceTop_ = markSpace_; - markSpaceEnd_ = markSpace_ + SPACE_SIZE; for (uint32_t i = 0; i < threadNum_; i++) { WorkNodeHolder &holder = workList_[i]; - holder.pushNode_ = AllocalWorkNode(); - holder.popNode_ = AllocalWorkNode(); - holder.weakQueue_ = new ProcessQueue(); - holder.weakQueue_->BeginMarking(heap_, continuousQueue_[i]); - holder.allocator_ = new TlabAllocator(heap_, TriggerGCType::SEMI_GC); - holder.aliveSize_ = 0; - holder.promoteSize_ = 0; + holder.weakQueue_->FinishMarking(continuousQueue_[i]); + delete holder.weakQueue_; + holder.weakQueue_ = nullptr; + delete holder.allocator_; + holder.allocator_ = nullptr; + holder.waitUpdate_.clear(); + aliveSize += holder.aliveSize_; } -} - -void CompressGCWorker::PushWorkNodeToGlobal(uint32_t threadId) -{ - WorkNode *&pushNode = workList_[threadId].pushNode_; - if (!pushNode->IsEmpty()) { - globalWork_.Push(pushNode); - pushNode = AllocalWorkNode(); - auto pool = heap_->GetThreadPool(); - if (pool->GetTaskCount() < pool->GetThreadNum() - 1) { - pool->Submit( - std::bind(&CompressCollector::ProcessMarkStack, heap_->GetCompressCollector(), std::placeholders::_1)); - } + while (!unuseSpace_.empty()) { + const_cast(heap_->GetRegionFactory())->FreeBuffer(reinterpret_cast( + unuseSpace_.back())); + unuseSpace_.pop_back(); } } -void CompressGCWorker::Initialize() +void WorkerHelper::Finish(size_t &aliveSize, size_t &promoteSize) { - spaceTop_ = markSpace_; - markSpaceEnd_ = markSpace_ + SPACE_SIZE; + Finish(aliveSize); for (uint32_t i = 0; i < threadNum_; i++) { WorkNodeHolder &holder = workList_[i]; - holder.pushNode_ = AllocalWorkNode(); - holder.popNode_ = AllocalWorkNode(); - holder.weakQueue_ = new ProcessQueue(); - holder.weakQueue_->BeginMarking(heap_, continuousQueue_[i]); - holder.allocator_ = new TlabAllocator(heap_, TriggerGCType::COMPRESS_FULL_GC); - holder.aliveSize_ = 0; + promoteSize += holder.aliveSize_; } } -void OldGCWorker::Initialize() +void WorkerHelper::Initialize(TriggerGCType gcType) { spaceTop_ = markSpace_; markSpaceEnd_ = markSpace_ + SPACE_SIZE; @@ -209,21 +126,43 @@ void OldGCWorker::Initialize() holder.popNode_ = AllocalWorkNode(); holder.weakQueue_ = new ProcessQueue(); holder.weakQueue_->BeginMarking(heap_, continuousQueue_[i]); + holder.aliveSize_ = 0; + holder.promoteSize_ = 0; + if (gcType == TriggerGCType::SEMI_GC) { + holder.allocator_ = new TlabAllocator(heap_, TriggerGCType::SEMI_GC); + } else if (gcType == TriggerGCType::COMPRESS_FULL_GC) { + holder.allocator_ = new TlabAllocator(heap_, TriggerGCType::COMPRESS_FULL_GC); + } } } -void OldGCWorker::PushWorkNodeToGlobal(uint32_t threadId) +WorkNode *WorkerHelper::AllocalWorkNode() { - WorkNode *&pushNode = workList_[threadId].pushNode_; - if (!pushNode->IsEmpty()) { - globalWork_.Push(pushNode); - pushNode = AllocalWorkNode(); - - auto pool = heap_->GetThreadPool(); - if (pool->GetTaskCount() < pool->GetThreadNum() - 1) { - pool->Submit(std::bind(&OldSpaceCollector::ProcessMarkStack, heap_->GetOldSpaceCollector(), - std::placeholders::_1)); + size_t totalSize = sizeof(WorkNode) + sizeof(Stack) + STACK_AREA_SIZE; + // CAS + volatile auto atomicField = reinterpret_cast *>(&spaceTop_); + bool result = false; + uintptr_t begin = 0; + do { + begin = atomicField->load(std::memory_order_acquire); + if (begin + totalSize >= markSpaceEnd_) { + os::memory::LockHolder lock(mtx_); + begin = atomicField->load(std::memory_order_acquire); + if (begin + totalSize >= markSpaceEnd_) { + unuseSpace_.emplace_back(markSpace_); + markSpace_ = + ToUintPtr(const_cast(heap_->GetRegionFactory())->AllocateBuffer(SPACE_SIZE)); + spaceTop_ = markSpace_; + markSpaceEnd_ = markSpace_ + SPACE_SIZE; + begin = spaceTop_; + } } - } + result = std::atomic_compare_exchange_strong_explicit(atomicField, &begin, begin + totalSize, + std::memory_order_release, std::memory_order_relaxed); + } while (!result); + Stack *stack = reinterpret_cast(begin + sizeof(WorkNode)); + stack->ResetBegin(begin + sizeof(WorkNode) + sizeof(Stack), begin + totalSize); + WorkNode *work = reinterpret_cast(begin); + return new (work) WorkNode(stack); } } // namespace panda::ecmascript diff --git a/ecmascript/mem/semi_space_worker.h b/ecmascript/mem/parallel_work_helper.h similarity index 55% rename from ecmascript/mem/semi_space_worker.h rename to ecmascript/mem/parallel_work_helper.h index 0a88f31f0e..90244e2274 100644 --- a/ecmascript/mem/semi_space_worker.h +++ b/ecmascript/mem/parallel_work_helper.h @@ -13,19 +13,17 @@ * limitations under the License. */ -#ifndef ECMASCRIPT_MEM_SEMI_SPACE_WORKER_H -#define ECMASCRIPT_MEM_SEMI_SPACE_WORKER_H +#ifndef ECMASCRIPT_MEM_PARALLEL_WORK_HELPER_H +#define ECMASCRIPT_MEM_PARALLEL_WORK_HELPER_H -#include "ecmascript/mem/mark_stack.h" +#include "ecmascript/mem/mark_stack-inl.h" #include "ecmascript/mem/slots.h" namespace panda::ecmascript { using SlotNeedUpdate = std::pair; static constexpr uint32_t MARKSTACK_MAX_SIZE = 100; -static constexpr uint32_t THREAD_NUM_FOR_YOUNG_GC = 6; -static constexpr uint32_t STACK_AREA_SIZE = sizeof(uintptr_t *) * MARKSTACK_MAX_SIZE; - +static constexpr uint32_t STACK_AREA_SIZE = sizeof(uintptr_t) * MARKSTACK_MAX_SIZE; static constexpr uint32_t SPACE_SIZE = 8 * 1024; class Heap; @@ -118,24 +116,22 @@ private: }; struct WorkNodeHolder { - WorkNode *pushNode_{nullptr}; - WorkNode *popNode_{nullptr}; - ProcessQueue *weakQueue_{nullptr}; + WorkNode *pushNode_ {nullptr}; + WorkNode *popNode_ {nullptr}; + ProcessQueue *weakQueue_ {nullptr}; std::vector waitUpdate_; - TlabAllocator *allocator_{nullptr}; + TlabAllocator *allocator_ {nullptr}; size_t aliveSize_ = 0; size_t promoteSize_ = 0; }; -class Worker { +class WorkerHelper final { public: - Worker() = delete; - explicit Worker(Heap *heap, uint32_t threadNum); - - virtual ~Worker() = 0; - virtual void PushWorkNodeToGlobal(uint32_t threadId) = 0; - virtual void Initialize() = 0; + WorkerHelper() = delete; + explicit WorkerHelper(Heap *heap, uint32_t threadNum, ParallelGCTaskPhase parallelTask); + ~WorkerHelper(); + void Initialize(TriggerGCType gcType); void Finish(size_t &aliveSize); void Finish(size_t &aliveSize, size_t &promoteSize); @@ -143,64 +139,33 @@ public: bool Pop(uint32_t threadId, TaggedObject **object); bool PopWorkNodeFromGlobal(uint32_t threadId); + void PushWorkNodeToGlobal(uint32_t threadId); - void PushWeakReference(uint32_t threadId, JSTaggedType *weak) + inline void PushWeakReference(uint32_t threadId, JSTaggedType *weak) { workList_[threadId].weakQueue_->PushBack(weak); } - void AddAliveSize(uint32_t threadId, size_t size) + inline void AddAliveSize(uint32_t threadId, size_t size) { workList_[threadId].aliveSize_ += size; } - void AddPromoteSize(uint32_t threadId, size_t size) + inline void AddPromoteSize(uint32_t threadId, size_t size) { workList_[threadId].promoteSize_ += size; } - ProcessQueue *GetWeakReferenceQueue(uint32_t threadId) const + inline ProcessQueue *GetWeakReferenceQueue(uint32_t threadId) const { return workList_[threadId].weakQueue_; } - TlabAllocator *GetTlabAllocator(uint32_t threadId) const + inline TlabAllocator *GetTlabAllocator(uint32_t threadId) const { return workList_[threadId].allocator_; } - NO_COPY_SEMANTIC(Worker); - NO_MOVE_SEMANTIC(Worker); - -protected: - WorkNode *AllocalWorkNode(); - - Heap *heap_; // NOLINT(misc-non-private-member-variables-in-classes) - uint32_t threadNum_; // NOLINT(misc-non-private-member-variables-in-classes) - // NOLINTNEXTLINE(misc-non-private-member-variables-in-classes, modernize-avoid-c-arrays) - WorkNodeHolder workList_[THREAD_NUM_FOR_YOUNG_GC]; - // NOLINTNEXTLINE(misc-non-private-member-variables-in-classes, modernize-avoid-c-arrays) - ContinuousStack *continuousQueue_[THREAD_NUM_FOR_YOUNG_GC]; - GlobalWorkList globalWork_; // NOLINT(misc-non-private-member-variables-in-classes) - - uintptr_t markSpace_; // NOLINT(misc-non-private-member-variables-in-classes) - uintptr_t spaceTop_; // NOLINT(misc-non-private-member-variables-in-classes) - uintptr_t markSpaceEnd_; // NOLINT(misc-non-private-member-variables-in-classes) - -private: - std::vector unuseSpace_; - os::memory::Mutex mtx_; -}; - -class SemiSpaceWorker : public Worker { -public: - SemiSpaceWorker() = delete; - explicit SemiSpaceWorker(Heap *heap, uint32_t threadNum) : Worker(heap, threadNum) {} - - ~SemiSpaceWorker() override; - void PushWorkNodeToGlobal(uint32_t threadId) override; - void Initialize() override; - inline void PushWaitUpdateSlot(uint32_t threadId, SlotNeedUpdate slot) { workList_[threadId].waitUpdate_.emplace_back(slot); @@ -217,35 +182,23 @@ public: return true; } - NO_COPY_SEMANTIC(SemiSpaceWorker); - NO_MOVE_SEMANTIC(SemiSpaceWorker); -}; - -class CompressGCWorker : public Worker { -public: - CompressGCWorker() = delete; - explicit CompressGCWorker(Heap *heap, uint32_t threadNum) : Worker(heap, threadNum) {} - - ~CompressGCWorker() override; - void PushWorkNodeToGlobal(uint32_t threadId) override; - void Initialize() override; - - NO_COPY_SEMANTIC(CompressGCWorker); - NO_MOVE_SEMANTIC(CompressGCWorker); -}; - -class OldGCWorker : public Worker { -public: - OldGCWorker() = delete; - OldGCWorker(Heap *heap, uint32_t threadNum) : Worker(heap, threadNum) {} - - ~OldGCWorker() override = default; +private: + NO_COPY_SEMANTIC(WorkerHelper); + NO_MOVE_SEMANTIC(WorkerHelper); - void PushWorkNodeToGlobal(uint32_t threadId) override; - void Initialize() override; + WorkNode *AllocalWorkNode(); - NO_COPY_SEMANTIC(OldGCWorker); - NO_MOVE_SEMANTIC(OldGCWorker); + Heap *heap_; + uint32_t threadNum_; + WorkNodeHolder workList_[Platform::MAX_PLATFORM_THREAD_NUM + 1]; + ContinuousStack *continuousQueue_[Platform::MAX_PLATFORM_THREAD_NUM + 1]; + GlobalWorkList globalWork_; + uintptr_t markSpace_; + uintptr_t spaceTop_; + uintptr_t markSpaceEnd_; + std::vector unuseSpace_; + os::memory::Mutex mtx_; + ParallelGCTaskPhase parallelTask_; }; } // namespace panda::ecmascript -#endif // ECMASCRIPT_MEM_SEMI_SPACE_WORKER_H +#endif // ECMASCRIPT_MEM_PARALLEL_WORK_HELPER_H diff --git a/ecmascript/mem/region.h b/ecmascript/mem/region.h index 207abda5fd..814d12e5e0 100644 --- a/ecmascript/mem/region.h +++ b/ecmascript/mem/region.h @@ -139,6 +139,11 @@ public: return markBitmap_; } + RememberedSet *GetReferenceSet() + { + return referenceSet_; + } + RememberedSet *GetCrossRegionRememberedSet() { return crossRegionSet_; @@ -180,10 +185,13 @@ public: } RangeBitmap *CreateMarkBitmap(); + inline RememberedSet *CreateReferenceSet(); + inline RememberedSet *GetOrCreateReferenceSet(); inline RememberedSet *CreateRememberedSet(); inline RememberedSet *GetOrCreateCrossRegionRememberedSet(); inline RememberedSet *GetOrCreateOldToNewRememberedSet(); inline void InsertCrossRegionRememberedSet(uintptr_t addr); + inline void InsertReferenceSet(uintptr_t addr); inline void InsertOldToNewRememberedSet(uintptr_t addr); uintptr_t GetAllocateBase() const @@ -246,11 +254,12 @@ private: uintptr_t begin_; uintptr_t end_; uintptr_t highWaterMark_; - Region *next_{nullptr}; - Region *prev_{nullptr}; - RangeBitmap *markBitmap_{nullptr}; - RememberedSet *crossRegionSet_{nullptr}; - RememberedSet *oldToNewSet_{nullptr}; + Region *next_ {nullptr}; + Region *prev_ {nullptr}; + RangeBitmap *markBitmap_ {nullptr}; + RememberedSet *referenceSet_ {nullptr}; + RememberedSet *crossRegionSet_ {nullptr}; + RememberedSet *oldToNewSet_ {nullptr}; Span kinds_; friend class SnapShot; }; diff --git a/ecmascript/mem/semi_space_collector-inl.h b/ecmascript/mem/semi_space_collector-inl.h index 806393c760..522a3c7c8c 100644 --- a/ecmascript/mem/semi_space_collector-inl.h +++ b/ecmascript/mem/semi_space_collector-inl.h @@ -17,12 +17,14 @@ #define ECMASCRIPT_MEM_SEMI_SAPACE_COLLECTOR_INL_H #include "ecmascript/mem/semi_space_collector.h" -#include "ecmascript/mem/mem.h" + +#include "ecmascript/js_hclass-inl.h" #include "ecmascript/mem/heap.h" -#include "ecmascript/mem/region.h" #include "ecmascript/mem/mark_word.h" -#include "ecmascript/js_hclass-inl.h" -#include "ecmascript/mem/semi_space_worker.h" +#include "ecmascript/mem/mem.h" +#include "ecmascript/mem/parallel_work_helper.h" +#include "ecmascript/mem/region.h" + namespace panda::ecmascript { void SemiSpaceCollector::UpdatePromotedSlot(TaggedObject *object, ObjectSlot slot) diff --git a/ecmascript/mem/semi_space_collector.cpp b/ecmascript/mem/semi_space_collector.cpp index 0cc31fd8f1..65d8685528 100644 --- a/ecmascript/mem/semi_space_collector.cpp +++ b/ecmascript/mem/semi_space_collector.cpp @@ -31,7 +31,8 @@ namespace panda::ecmascript { SemiSpaceCollector::SemiSpaceCollector(Heap *heap, bool parallelGc) : heap_(heap), rootManager_(heap->GetEcmaVM()), paralledGC_(parallelGc), markObject_(this) { - workList_ = new SemiSpaceWorker(heap_, heap_->GetThreadPool()->GetThreadNum()); + workList_ = new WorkerHelper(heap_, Platform::GetCurrentPlatform()->GetTotalThreadNum() + 1, + ParallelGCTaskPhase::SEMI_HANDLE_GLOBAL_POOL_TASK); } SemiSpaceCollector::~SemiSpaceCollector() @@ -48,6 +49,9 @@ void SemiSpaceCollector::RunPhases() INTERPRETER_TRACE(thread, SemiSpaceCollector_RunPhases); trace::ScopedTrace scoped_trace("SemiSpaceCollector::RunPhases"); [[maybe_unused]] ClockScope clock("SemiSpaceCollector::RunPhases"); + if (heap_->ConcurrentMarkingEnable() && heap_->IsConcurrentMarking()) { + heap_->WaitConcurrentMarkingFinished(); + } InitializePhase(); ParallelMarkingPhase(); SweepPhases(); @@ -58,7 +62,7 @@ void SemiSpaceCollector::RunPhases() void SemiSpaceCollector::InitializePhase() { - heap_->GetThreadPool()->WaitTaskFinish(); + heap_->WaitRunningTaskFinished(); heap_->GetSweeper()->EnsureAllTaskFinish(); auto fromSpace = heap_->GetFromSpace(); if (fromSpace->GetCommittedSize() == 0) { @@ -69,7 +73,7 @@ void SemiSpaceCollector::InitializePhase() oldSpaceAllocator_.Swap(heapManager->GetOldSpaceAllocator()); ageMark_ = heap_->GetNewSpace()->GetAgeMark(); heap_->FlipNewSpace(); - workList_->Initialize(); + workList_->Initialize(TriggerGCType::SEMI_GC); promotedSize_ = 0; semiCopiedSize_ = 0; commitSize_ = heap_->GetFromSpace()->GetCommittedSize(); @@ -80,10 +84,7 @@ void SemiSpaceCollector::FinishPhase() // swap const_cast(heap_->GetNewSpace())->Swap(const_cast(heap_->GetFromSpace())); if (paralledGC_) { - heap_->GetThreadPool()->Submit([this]([[maybe_unused]] uint32_t threadId) -> bool { - const_cast(heap_->GetFromSpace())->ReclaimRegions(); - return true; - }); + heap_->PostParallelGCTask(ParallelGCTaskPhase::SEMI_HANDLE_RECLIAM_REGION_TASK); } else { const_cast(heap_->GetFromSpace())->ReclaimRegions(); } @@ -193,19 +194,18 @@ void SemiSpaceCollector::ParallelMarkingPhase() auto region = oldSpace->GetCurrentRegion(); if (paralledGC_) { - heap_->GetThreadPool()->Submit( - std::bind(&SemiSpaceCollector::ParallelHandleThreadRoots, this, std::placeholders::_1)); - heap_->GetThreadPool()->Submit( - std::bind(&SemiSpaceCollector::ParallelHandleSnapShot, this, std::placeholders::_1)); + heap_->PostParallelGCTask(ParallelGCTaskPhase::SEMI_HANDLE_THREAD_ROOTS_TASK); + heap_->PostParallelGCTask(ParallelGCTaskPhase::SEMI_HANDLE_SNAPSHOT_TASK); ParallelHandleOldToNew(0, region); - heap_->GetThreadPool()->WaitTaskFinish(); } else { ParallelHandleOldToNew(0, region); ParallelHandleSnapShot(0); ParallelHandleThreadRoots(0); } + heap_->WaitRunningTaskFinished(); - for (uint32_t i = 0; i < heap_->GetThreadPool()->GetThreadNum(); i++) { + auto totalThreadCount = Platform::GetCurrentPlatform()->GetTotalThreadNum() + 1; // gc thread and main thread + for (uint32_t i = 0; i < totalThreadCount; i++) { SlotNeedUpdate needUpdate(nullptr, ObjectSlot(0)); while (workList_->GetSlotNeedUpdate(i, &needUpdate)) { UpdatePromotedSlot(needUpdate.first, needUpdate.second); @@ -248,7 +248,8 @@ void SemiSpaceCollector::ProcessMarkStack(uint64_t threadId) void SemiSpaceCollector::SweepPhases() { trace::ScopedTrace scoped_trace("SemiSpaceCollector::SweepPhases"); - for (uint32_t i = 0; i < heap_->GetThreadPool()->GetThreadNum(); i++) { + auto totalThreadCount = Platform::GetCurrentPlatform()->GetTotalThreadNum() + 1; // gc thread and main thread + for (uint32_t i = 0; i < totalThreadCount; i++) { ProcessQueue *queue = workList_->GetWeakReferenceQueue(i); while (true) { auto obj = queue->PopBack(); diff --git a/ecmascript/mem/semi_space_collector.h b/ecmascript/mem/semi_space_collector.h index 3a76149e5c..ca8fe63ef8 100644 --- a/ecmascript/mem/semi_space_collector.h +++ b/ecmascript/mem/semi_space_collector.h @@ -29,7 +29,6 @@ #include "ecmascript/mem/chunk_containers.h" #include "ecmascript/mem/tlab_allocator.h" -#include "ecmascript/thread/thread_pool.h" #include "ecmascript/mem/semi_space_marker.h" #include "os/mutex.h" @@ -38,7 +37,7 @@ namespace panda { namespace ecmascript { class Heap; class JSHClass; -class SemiSpaceWorker; +class WorkerHelper; class GarbageCollector { public: @@ -61,7 +60,7 @@ public: { return heap_; } - + private: bool ParallelHandleOldToNew(uint32_t threadId, Region *region); bool ParallelHandleThreadRoots(uint32_t threadId); @@ -82,17 +81,18 @@ private: Heap *heap_; HeapRootManager rootManager_; os::memory::Mutex allocatorLock_; - BumpPointerAllocator fromSpaceAllocator_{}; - FreeListAllocator oldSpaceAllocator_{}; - bool paralledGC_{false}; - SemiSpaceWorker *workList_{nullptr}; + BumpPointerAllocator fromSpaceAllocator_ {}; + FreeListAllocator oldSpaceAllocator_ {}; + bool paralledGC_ {false}; + WorkerHelper *workList_ {nullptr}; SemiSpaceMarker markObject_; - size_t promotedSize_{0}; - size_t semiCopiedSize_{0}; + size_t promotedSize_ {0}; + size_t semiCopiedSize_ {0}; size_t commitSize_ = 0; - uintptr_t ageMark_{0}; + uintptr_t ageMark_ {0}; + friend class TlabAllocator; - friend class SemiSpaceWorker; + friend class WorkerHelper; friend class SemiSpaceMarker; friend class Heap; }; diff --git a/ecmascript/mem/semi_space_marker.cpp b/ecmascript/mem/semi_space_marker.cpp index d4ca483813..7605e94e8e 100644 --- a/ecmascript/mem/semi_space_marker.cpp +++ b/ecmascript/mem/semi_space_marker.cpp @@ -17,9 +17,9 @@ #include "ecmascript/free_object.h" #include "ecmascript/js_hclass-inl.h" +#include "ecmascript/mem/parallel_work_helper.h" #include "ecmascript/mem/region.h" #include "ecmascript/mem/semi_space_collector-inl.h" -#include "ecmascript/mem/semi_space_worker.h" #include "ecmascript/mem/tlab_allocator-inl.h" namespace panda::ecmascript { diff --git a/ecmascript/mem/space-inl.h b/ecmascript/mem/space-inl.h index a37e5c7d04..8a355556ce 100644 --- a/ecmascript/mem/space-inl.h +++ b/ecmascript/mem/space-inl.h @@ -38,6 +38,19 @@ void Space::EnumerateRegions(const Callback &cb, Region *region) const } } +RememberedSet *Region::CreateReferenceSet() +{ + return CreateRememberedSet(); +} + +RememberedSet *Region::GetOrCreateReferenceSet() +{ + if (UNLIKELY(referenceSet_ == nullptr)) { + referenceSet_ = CreateReferenceSet(); + } + return referenceSet_; +} + RememberedSet *Region::CreateRememberedSet() { auto setSize = RememberedSet::GetSizeInByte(GetCapacity()); @@ -70,6 +83,12 @@ void Region::InsertCrossRegionRememberedSet(uintptr_t addr) set->Insert(addr); } +void Region::InsertReferenceSet(uintptr_t addr) +{ + auto set = GetOrCreateReferenceSet(); + set->Insert(addr); +} + void Region::InsertOldToNewRememberedSet(uintptr_t addr) { auto set = GetOrCreateOldToNewRememberedSet(); diff --git a/ecmascript/mem/space.cpp b/ecmascript/mem/space.cpp index c1156bc0ee..a8deb8bb5a 100644 --- a/ecmascript/mem/space.cpp +++ b/ecmascript/mem/space.cpp @@ -67,6 +67,12 @@ void Space::ClearAndFreeRegion(Region *region) const_cast(heap_->GetRegionFactory())->Free(bitmap->GetBitMap().Data(), size); delete bitmap; } + if (region->GetReferenceSet() != nullptr) { + auto referenceSet = region->GetReferenceSet(); + auto size = RememberedSet::GetSizeInByte(region->GetCapacity()); + const_cast(heap_->GetRegionFactory())->Free(referenceSet->GetBitMap().Data(), size); + delete referenceSet; + } if (region->GetCrossRegionRememberedSet() != nullptr) { auto rememberedSet = region->GetCrossRegionRememberedSet(); auto size = RememberedSet::GetSizeInByte(region->GetCapacity()); diff --git a/ecmascript/mem/tagged_object-inl.h b/ecmascript/mem/tagged_object-inl.h index d993a5aed4..42b9fde4a8 100644 --- a/ecmascript/mem/tagged_object-inl.h +++ b/ecmascript/mem/tagged_object-inl.h @@ -26,7 +26,11 @@ namespace panda::ecmascript { inline void TaggedObject::SetClass(JSHClass *hclass) { - *reinterpret_cast(ToUintPtr(this)) = reinterpret_cast(hclass); + if (hclass == nullptr) { + *reinterpret_cast(ToUintPtr(this)) = reinterpret_cast(hclass); + } else { + Barriers::SetDynObject(GetJSThread(), this, 0, JSTaggedValue(hclass).GetRawData()); + } } inline void TaggedObject::SetClass(JSHandle hclass) diff --git a/ecmascript/platform/platform.cpp b/ecmascript/platform/platform.cpp index f589f3807e..01a76cf85a 100644 --- a/ecmascript/platform/platform.cpp +++ b/ecmascript/platform/platform.cpp @@ -34,12 +34,12 @@ void Platform::Destory() } } -int Platform::TheMostSuitableThreadNum(int threadNum) const +uint32_t Platform::TheMostSuitableThreadNum(uint32_t threadNum) const { if (threadNum > 0) { - return std::min(threadNum, MAX_PLATFORM_THREAD_NUM); + return std::min(threadNum, MAX_PLATFORM_THREAD_NUM); } - int numOfCpuCore = get_nprocs() - 1; - return std::min(numOfCpuCore, MAX_PLATFORM_THREAD_NUM); + uint32_t numOfCpuCore = get_nprocs() - 1; + return std::min(numOfCpuCore, MAX_PLATFORM_THREAD_NUM); } } // namespace panda::ecmascript diff --git a/ecmascript/platform/platform.h b/ecmascript/platform/platform.h index d62af797ee..2b2fc0377e 100644 --- a/ecmascript/platform/platform.h +++ b/ecmascript/platform/platform.h @@ -45,15 +45,22 @@ public: runner_->PostTask(std::move(task)); } + uint32_t GetTotalThreadNum() const + { + return runner_->GetTotalThreadNum(); + } + private: static constexpr uint32_t MAX_PLATFORM_THREAD_NUM = 7; static constexpr uint32_t DEFAULT_PLATFORM_THREAD_NUM = 0; - int TheMostSuitableThreadNum(int threadNum) const; + uint32_t TheMostSuitableThreadNum(uint32_t threadNum) const; std::unique_ptr runner_; int isInitialized_ = 0; os::memory::Mutex mutex_; + + friend class WorkerHelper; }; } // namespace panda::ecmascript #endif // ECMASCRIPT_PALTFORM_PLATFORM_H diff --git a/ecmascript/platform/runner.cpp b/ecmascript/platform/runner.cpp index 949607932c..6de2e51fa8 100644 --- a/ecmascript/platform/runner.cpp +++ b/ecmascript/platform/runner.cpp @@ -18,10 +18,11 @@ #include "os/thread.h" namespace panda::ecmascript { -Runner::Runner(int threadNum) +Runner::Runner(uint32_t threadNum) : totalThreadNum_(threadNum) { - for (int i = 0; i < threadNum; i++) { - std::unique_ptr thread = std::make_unique(&Runner::Run, this); + for (uint32_t i = 0; i < threadNum; i++) { + // main thread is 0; + std::unique_ptr thread = std::make_unique(&Runner::Run, this, i + 1); os::thread::SetThreadName(thread->native_handle(), "GC_WorkerThread"); threadPool_.emplace_back(std::move(thread)); } @@ -37,10 +38,10 @@ void Runner::Terminate() threadPool_.clear(); } -void Runner::Run() +void Runner::Run(uint32_t threadId) { while (std::unique_ptr task = taskQueue_.PopTask()) { - task->Run(); + task->Run(threadId); } } } // namespace panda::ecmascript diff --git a/ecmascript/platform/runner.h b/ecmascript/platform/runner.h index be0ada0279..77be324f4e 100644 --- a/ecmascript/platform/runner.h +++ b/ecmascript/platform/runner.h @@ -25,7 +25,7 @@ namespace panda::ecmascript { class Runner { public: - explicit Runner(int threadNum); + explicit Runner(uint32_t threadNum); ~Runner() = default; NO_COPY_SEMANTIC(Runner); @@ -38,11 +38,18 @@ public: void Terminate(); + uint32_t GetTotalThreadNum() const + { + return totalThreadNum_; + } + private: - void Run(); + void Run(uint32_t threadId); std::vector> threadPool_ {}; TaskQueue taskQueue_ {}; + uint32_t totalThreadNum_ {0}; + std::vector threadIdToIndexList_; }; } // namespace panda::ecmascript #endif // ECMASCRIPT_PLATFORM_RUNNER_H diff --git a/ecmascript/platform/task.h b/ecmascript/platform/task.h index 71eff10111..7520e80417 100644 --- a/ecmascript/platform/task.h +++ b/ecmascript/platform/task.h @@ -23,7 +23,7 @@ class Task { public: Task() = default; virtual ~Task() = default; - virtual bool Run() = 0; + virtual bool Run(uint32_t threadIndex) = 0; NO_COPY_SEMANTIC(Task); NO_MOVE_SEMANTIC(Task); diff --git a/ecmascript/tests/BUILD.gn b/ecmascript/tests/BUILD.gn index ce5365165d..8f17505eb1 100644 --- a/ecmascript/tests/BUILD.gn +++ b/ecmascript/tests/BUILD.gn @@ -962,6 +962,33 @@ host_unittest_action("GcTest") { } } +host_unittest_action("ConcurrentMarkingTest") { + module_out_path = module_output_path + + sources = [ + # test file + "concurrent_marking_test.cpp", + ] + + configs = [ + "//ark/js_runtime:ecma_test_config", + "//ark/js_runtime:ark_jsruntime_public_config", # should add before + # arkruntime_public_config + "//ark/js_runtime:ark_jsruntime_common_config", + "$ark_root/runtime:arkruntime_public_config", + ] + + deps = [ + "$ark_root/libpandabase:libarkbase", + "//ark/js_runtime:libark_jsruntime_test", + sdk_libc_secshared_dep, + ] + + if (!is_standard_system) { + deps += [ "$ark_root/runtime:libarkruntime" ] + } +} + group("unittest") { testonly = true @@ -969,6 +996,7 @@ group("unittest") { deps = [ ":AssertScopeTest", ":BuiltinsTest", + ":ConcurrentMarkingTest", ":DumpTest", ":EcmaModuleTest", ":EcmaStringTest", @@ -1012,6 +1040,7 @@ group("host_unittest") { deps = [ ":AssertScopeTestAction", ":BuiltinsTestAction", + ":ConcurrentMarkingTestAction", ":DumpTestAction", ":EcmaModuleTestAction", ":EcmaStringTestAction", diff --git a/ecmascript/tests/concurrent_marking_test.cpp b/ecmascript/tests/concurrent_marking_test.cpp new file mode 100644 index 0000000000..286aa95744 --- /dev/null +++ b/ecmascript/tests/concurrent_marking_test.cpp @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2021 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ecmascript/tests/test_helper.h" + +#include "ecmascript/ecma_vm.h" +#include "ecmascript/global_env.h" +#include "ecmascript/js_handle.h" +#include "ecmascript/mem/clock_scope.h" +#include "ecmascript/mem/verification.h" + +using namespace panda::ecmascript; + +namespace panda::test { +class ConcurrentMarkingTest : public testing::Test { +public: + static void SetUpTestCase() + { + GTEST_LOG_(INFO) << "SetUpTestCase"; + } + + static void TearDownTestCase() + { + GTEST_LOG_(INFO) << "TearDownCase"; + } + + void SetUp() override + { + RuntimeOptions options; + options.SetShouldLoadBootPandaFiles(false); + options.SetShouldInitializeIntrinsics(false); + options.SetBootClassSpaces( {"ecmascript"} ); + options.SetRuntimeType("ecmascript"); + options.SetPreGcHeapVerifyEnabled(true); + static EcmaLanguageContext lcEcma; + [[maybe_unused]] bool success = Runtime::Create(options, {&lcEcma}); + ASSERT_TRUE(success) << "Cannot create Runtime"; + instance = Runtime::GetCurrent()->GetPandaVM(); + ASSERT_TRUE(instance != nullptr) << "Cannot create EcmaVM"; + thread = EcmaVM::Cast(instance)->GetJSThread(); + scope = new EcmaHandleScope(thread); + thread->SetIsEcmaInterpreter(true); + EcmaVM::Cast(instance)->GetFactory()->SetTriggerGc(false); + } + + void TearDown() override + { + TestHelper::DestroyEcmaVMWithScope(instance, scope); + } + + JSHandle CreateTaggedArray(array_size_t length, JSTaggedValue initVal, MemSpaceType spaceType) + { + ObjectFactory *factory = thread->GetEcmaVM()->GetFactory(); + return factory->NewTaggedArray(length, initVal, spaceType); + } + + PandaVM *instance {nullptr}; + ecmascript::EcmaHandleScope *scope {nullptr}; + JSThread *thread {nullptr}; +}; + +HWTEST_F_L0(ConcurrentMarkingTest, PerformanceWithConcurrentMarking) +{ + array_size_t rootLength = 1024; + JSHandle rootArray = + CreateTaggedArray(rootLength, JSTaggedValue::Undefined(), MemSpaceType::OLD_SPACE); + for (array_size_t i = 0; i < rootLength; i++) { + array_size_t subArrayLength = 1024; + auto array = CreateTaggedArray(subArrayLength, JSTaggedValue::Undefined(), MemSpaceType::OLD_SPACE); + rootArray->Set(thread, i, array); + } + auto heap = const_cast(thread->GetEcmaVM()->GetHeap()); + heap->TriggerConcurrentMarking(); // concurrent mark + for (array_size_t i = 0; i < rootLength; i++) { + array_size_t subArrayLength = 1024; + auto array = CreateTaggedArray(subArrayLength, JSTaggedValue::Undefined(), MemSpaceType::OLD_SPACE); + rootArray->Set(thread, i, array); + } + heap->CollectGarbage(TriggerGCType::OLD_GC); +} + +HWTEST_F_L0(ConcurrentMarkingTest, PerformanceWithoutConcurrentMarking) +{ + array_size_t rootLength = 1024; + JSHandle rootArray = + CreateTaggedArray(rootLength, JSTaggedValue::Undefined(), MemSpaceType::OLD_SPACE); + for (array_size_t i = 0; i < rootLength; i++) { + array_size_t subArrayLength = 1024; + auto array = CreateTaggedArray(subArrayLength, JSTaggedValue::Undefined(), MemSpaceType::OLD_SPACE); + rootArray->Set(thread, i, array); + } + auto heap = const_cast(thread->GetEcmaVM()->GetHeap()); + for (array_size_t i = 0; i < rootLength; i++) { + array_size_t subArrayLength = 1024; + auto array = CreateTaggedArray(subArrayLength, JSTaggedValue::Undefined(), MemSpaceType::OLD_SPACE); + rootArray->Set(thread, i, array); + } + heap->CollectGarbage(TriggerGCType::OLD_GC); +} +} // namespace panda::test diff --git a/ecmascript/thread/thread_safe_queue.h b/ecmascript/thread/thread_safe_queue.h deleted file mode 100644 index 27d8b2cca2..0000000000 --- a/ecmascript/thread/thread_safe_queue.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2021 Huawei Device Co., Ltd. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ECMASCRIPT_THREAD_THREAD_SAFE_QUEUE_H -#define ECMASCRIPT_THREAD_THREAD_SAFE_QUEUE_H - -#include "os/mutex.h" -#include - -namespace panda::ecmascript { -template -class ThreadSafeQueue { -public: - explicit ThreadSafeQueue() = default; - ~ThreadSafeQueue() = default; - - bool empty() - { - os::memory::LockHolder lock(mutex_); - return queue_.empty(); - } - - int size() - { - os::memory::LockHolder lock(mutex_); - return queue_.size(); - } - - void enqueue(T &t) - { - os::memory::LockHolder lock(mutex_); - queue_.push(t); - } - - bool dequeue(T &t) - { - os::memory::LockHolder lock(mutex_); - - if (queue_.empty()) { - return false; - } - t = std::move(queue_.front()); - queue_.pop(); - return true; - } - -private: - NO_COPY_SEMANTIC(ThreadSafeQueue); - NO_MOVE_SEMANTIC(ThreadSafeQueue); - - std::queue queue_; - os::memory::Mutex mutex_; -}; -} // namespace panda::ecmascript - -#endif // ECMASCRIPT_THREAD_THREAD_SAFE_QUEUE_H -- Gitee From e3a197be17ec56e7fa796050556ce04a3b2bfbdd Mon Sep 17 00:00:00 2001 From: lukai Date: Tue, 26 Oct 2021 16:40:37 +0800 Subject: [PATCH 2/2] concurrentmarking modificaton Signed-off-by: lukai --- ecmascript/interpreter/interpreter-inl.h | 9 ++-- ecmascript/js_thread.h | 33 +++++++++++++ ecmascript/mem/barriers-inl.h | 2 +- ecmascript/mem/concurrent_marker.cpp | 25 +++++++--- ecmascript/mem/concurrent_marker.h | 39 ++------------- ecmascript/mem/concurrent_sweeper.cpp | 6 ++- ecmascript/mem/heap.cpp | 51 ++++++++++---------- ecmascript/mem/heap.h | 21 +++++--- ecmascript/mem/old_space_collector.cpp | 6 +-- ecmascript/mem/region.h | 10 ++++ ecmascript/mem/semi_space_collector.cpp | 5 +- ecmascript/tests/concurrent_marking_test.cpp | 2 + 12 files changed, 122 insertions(+), 87 deletions(-) diff --git a/ecmascript/interpreter/interpreter-inl.h b/ecmascript/interpreter/interpreter-inl.h index cd1f2fd99f..de0aec2049 100644 --- a/ecmascript/interpreter/interpreter-inl.h +++ b/ecmascript/interpreter/interpreter-inl.h @@ -3309,17 +3309,16 @@ JSTaggedValue EcmaInterpreter::GetRuntimeProfileTypeInfo(TaggedType *sp) bool EcmaInterpreter::UpdateHotnessCounter(JSThread* thread, TaggedType *sp, JSTaggedValue acc, int32_t offset) { - auto marker = thread->GetEcmaVM()->GetHeap()->GetConcurrentMarker(); - if (marker->IsFinished()) { - marker->CheckAndSweep(); - } - FrameState *state = GET_FRAME(sp); auto method = state->method; auto hotnessCounter = static_cast(method->GetHotnessCounter()); hotnessCounter += offset; if (UNLIKELY(hotnessCounter <= 0)) { + if (thread->IsConcurrentMarkingFinished()) { + auto marker = thread->GetEcmaVM()->GetHeap()->GetConcurrentMarker(); + marker->HandleGCRequest(); + } if (state->profileTypeInfo == JSTaggedValue::Undefined()) { state->acc = acc; auto numVregs = method->GetNumVregs(); diff --git a/ecmascript/js_thread.h b/ecmascript/js_thread.h index 9edc94579a..29e7b03ac1 100644 --- a/ecmascript/js_thread.h +++ b/ecmascript/js_thread.h @@ -29,8 +29,17 @@ class EcmaVM; class RegionFactory; class InternalCallParams; +enum class ConcurentMarkingStatus : uint8_t +{ + INITIALIZED, + CONCURRENT_MARKING, + FINISHED +}; + class JSThread : public ManagedThread { public: + static constexpr int CONCURRENT_MARKING_BITFIELD_NUM = 2; + using ConcurrentMarkingStatusBits = BitField; using Address = uintptr_t; static JSThread *Cast(ManagedThread *thread) { @@ -199,6 +208,29 @@ public: void IterateWeakEcmaGlobalStorage(const WeakRootVisitor &visitor); + void SetConcurrentMarkingStatus(ConcurentMarkingStatus status) + { + uint64_t newVal = ConcurrentMarkingStatusBits::Update(threadStatusBitField_.load(std::memory_order_relaxed), status); + threadStatusBitField_.store(newVal, std::memory_order_release); + } + + bool IsConcurrentMarkingInitialized() const + { + auto status = ConcurrentMarkingStatusBits::Decode(threadStatusBitField_.load(std::memory_order_acquire)); + return status == ConcurentMarkingStatus::INITIALIZED; + } + + bool IsConcurrentMarking() const + { + auto status = ConcurrentMarkingStatusBits::Decode(threadStatusBitField_.load(std::memory_order_acquire)); + return status == ConcurentMarkingStatus::CONCURRENT_MARKING; + } + + bool IsConcurrentMarkingFinished() const + { + auto status = ConcurrentMarkingStatusBits::Decode(threadStatusBitField_.load(std::memory_order_acquire)); + return status == ConcurentMarkingStatus::FINISHED; + } private: NO_COPY_SEMANTIC(JSThread); NO_MOVE_SEMANTIC(JSThread); @@ -233,6 +265,7 @@ private: bool stableArrayElementsGuardians_ {true}; GlobalEnvConstants globalConst_; // Place-Holder InternalCallParams *internalCallParams_ {nullptr}; + std::atomic threadStatusBitField_ {0ULL}; friend class EcmaHandleScope; friend class GlobalHandleCollection; diff --git a/ecmascript/mem/barriers-inl.h b/ecmascript/mem/barriers-inl.h index c3568db594..2251b268f8 100644 --- a/ecmascript/mem/barriers-inl.h +++ b/ecmascript/mem/barriers-inl.h @@ -31,7 +31,7 @@ static inline void MarkingBarrier(void *obj, size_t offset, JSTaggedType value) // Should align with '8' in 64 and 32 bit platform objectRegion->InsertOldToNewRememberedSet(slotAddr); } - if (objectRegion->GetSpace()->GetHeap()->IsConcurrentMarking()) { + if (objectRegion->IsConcurrentMarking()) { uintptr_t slotAddr = ToUintPtr(obj) + offset; objectRegion->InsertReferenceSet(slotAddr); } diff --git a/ecmascript/mem/concurrent_marker.cpp b/ecmascript/mem/concurrent_marker.cpp index 2d1068821e..3ad4060284 100644 --- a/ecmascript/mem/concurrent_marker.cpp +++ b/ecmascript/mem/concurrent_marker.cpp @@ -34,7 +34,7 @@ ConcurrentMarker::ConcurrentMarker(Heap *heap) : heap_(heap), rootManager_(heap_ void ConcurrentMarker::ConcurrentMarking() { InitializeMarking(); - SetMarkingStatus(MarkingStatus::CONCURRENT_MARKING); + heap_->GetEcmaVM()->GetJSThread()->SetConcurrentMarkingStatus(ConcurentMarkingStatus::CONCURRENT_MARKING); Platform::GetCurrentPlatform()->PostTask(std::make_unique(this)); } @@ -132,10 +132,10 @@ void ConcurrentMarker::MarkRoots(WorkerHelper *worklist) rootManager.VisitVMRoots(gcMark, gcMarkRange); } -void ConcurrentMarker::CheckAndSweep() // js-thread wait for sweep +void ConcurrentMarker::HandleGCRequest() // js-thread wait for sweep { - os::memory::LockHolder lock(waitMarkingFinishedMutex_); // dont need lock? => atomic? - if (vmThreadNeedSweep_) { + os::memory::LockHolder lock(waitMarkingFinishedMutex_); + if (notifyMarkingFinished_) { heap_->CollectGarbage(TriggerGCType::OLD_GC); } } @@ -143,8 +143,16 @@ void ConcurrentMarker::CheckAndSweep() // js-thread wait for sweep void ConcurrentMarker::WaitConcurrentMarkingFinished() // call in EcmaVm thread, wait for mark finished { os::memory::LockHolder lock(waitMarkingFinishedMutex_); - vmThreadWaitMarkingFinished_ = true; - waitMarkingFinishedCV_.Wait(&waitMarkingFinishedMutex_); + if (!notifyMarkingFinished_) { + vmThreadWaitMarkingFinished_ = true; + waitMarkingFinishedCV_.Wait(&waitMarkingFinishedMutex_); + } +} + +void ConcurrentMarker::Reset() +{ + heap_->GetEcmaVM()->GetJSThread()->SetConcurrentMarkingStatus(ConcurentMarkingStatus::INITIALIZED); + notifyMarkingFinished_ = false; } // -------------------- privete method ------------------------------------------ @@ -166,6 +174,7 @@ void ConcurrentMarker::InitializeMarking() } else { referenceSet->ClearAllBits(); } + current->SetConcurrentMarking(true); }); workList_->Initialize(TriggerGCType::OLD_GC); MarkRoots(workList_); @@ -224,13 +233,13 @@ bool ConcurrentMarker::ConcurrentMarkerTask::Run(uint32_t threadId) void ConcurrentMarker::MarkingFinished() { - SetMarkingStatus(MarkingStatus::FINISHED); + heap_->GetEcmaVM()->GetJSThread()->SetConcurrentMarkingStatus(ConcurentMarkingStatus::FINISHED); os::memory::LockHolder lock(waitMarkingFinishedMutex_); if (vmThreadWaitMarkingFinished_) { waitMarkingFinishedCV_.Signal(); vmThreadWaitMarkingFinished_ = false; } else { - vmThreadNeedSweep_ = true; + notifyMarkingFinished_ = true; } } } // namespace panda::ecmascript diff --git a/ecmascript/mem/concurrent_marker.h b/ecmascript/mem/concurrent_marker.h index 9fc79aad4e..43b0176a7a 100644 --- a/ecmascript/mem/concurrent_marker.h +++ b/ecmascript/mem/concurrent_marker.h @@ -45,40 +45,15 @@ public: // Parallel GC threadID starts with 1. 0 is main threadId void ParallelMarkStack(WorkerHelper *worklist, uint32_t threadId); - void CheckAndSweep(); // call in vm thread. + void HandleGCRequest(); // call in vm thread. void WaitConcurrentMarkingFinished(); // call in main thread - - void Reset() - { - SetMarkingStatus(MarkingStatus::INITIALIZED); - vmThreadNeedSweep_ = false; - } + void Reset(); WorkerHelper *GetWorklist() const { return workList_; } - - bool IsInitialized() const - { - return markingStatus_.load(std::memory_order_acquire) == MarkingStatus::INITIALIZED; - } - - bool IsConcurrentMarking() const - { - return markingStatus_.load(std::memory_order_acquire) == MarkingStatus::CONCURRENT_MARKING; - } - - bool IsFinished() const - { - return markingStatus_.load(std::memory_order_acquire) == MarkingStatus::FINISHED; - } private: - enum class MarkingStatus { - INITIALIZED, - CONCURRENT_MARKING, - FINISHED - }; class ConcurrentMarkerTask : public Task { public: @@ -93,11 +68,6 @@ private: ConcurrentMarker *marker_ {nullptr}; }; - void SetMarkingStatus(MarkingStatus status) - { - markingStatus_.store(status, std::memory_order_release); - } - void InitializeMarking(); void MarkingFinished(); void MarkObject(WorkerHelper *worklist, uint32_t threadId, TaggedObject *object); @@ -106,9 +76,8 @@ private: Heap *heap_ {nullptr}; WorkerHelper *workList_ {nullptr}; HeapRootManager rootManager_; - - std::atomic markingStatus_ {MarkingStatus::INITIALIZED}; - bool vmThreadNeedSweep_ {false}; // notify js-thread that marking is finished and need sweep + + bool notifyMarkingFinished_ {false}; // notify js-thread that marking is finished and need sweep bool vmThreadWaitMarkingFinished_ {false}; // jsMainThread waiting for concurrentGC FINISHED os::memory::Mutex waitMarkingFinishedMutex_; os::memory::ConditionVariable waitMarkingFinishedCV_; diff --git a/ecmascript/mem/concurrent_sweeper.cpp b/ecmascript/mem/concurrent_sweeper.cpp index a24bf6f740..0bf00bd61d 100644 --- a/ecmascript/mem/concurrent_sweeper.cpp +++ b/ecmascript/mem/concurrent_sweeper.cpp @@ -95,7 +95,10 @@ void ConcurrentSweeper::SweepSpace(MemSpaceType type, bool isMain) void ConcurrentSweeper::SweepSpace(Space *space, FreeListAllocator &allocator) { allocator.RebuildFreeList(); - space->EnumerateRegions([this, &allocator](Region *current) { FreeRegion(current, allocator); }); + space->EnumerateRegions([this, &allocator](Region *current) { + current->SetConcurrentMarking(false); + FreeRegion(current, allocator); + }); } void ConcurrentSweeper::SweepHugeSpace() @@ -105,6 +108,7 @@ void ConcurrentSweeper::SweepHugeSpace() Region *currentRegion = space->GetRegionList().GetFirst(); while (currentRegion != nullptr) { + currentRegion->SetConcurrentMarking(false); Region *next = currentRegion->GetNext(); auto markBitmap = currentRegion->GetMarkBitmap(); bool isMarked = false; diff --git a/ecmascript/mem/heap.cpp b/ecmascript/mem/heap.cpp index 440e059b4b..55f0991fc4 100644 --- a/ecmascript/mem/heap.cpp +++ b/ecmascript/mem/heap.cpp @@ -123,11 +123,6 @@ void Heap::Destroy() concurrentMarker_ = nullptr; } -void Heap::TriggerConcurrentMarking() -{ - concurrentMarker_->ConcurrentMarking(); -} - void Heap::CollectGarbage(TriggerGCType gcType) { CHECK_NO_GC @@ -154,6 +149,9 @@ void Heap::CollectGarbage(TriggerGCType gcType) semiSpaceCollector_->RunPhases(); } } + if (ConcurrentMarkingEnable()) { + TryTriggerConcurrentMarking(); + } break; case TriggerGCType::OLD_GC: oldSpaceCollector_->RunPhases(); @@ -227,6 +225,19 @@ void Heap::RecomputeLimits() oldSpaceAllocLimit_ = newOldSpaceLimit; } +void Heap::TriggerConcurrentMarking() +{ + concurrentMarker_->ConcurrentMarking(); +} + +void Heap::TryTriggerConcurrentMarking() +{ + if (ecmaVm_->GetJSThread()->IsConcurrentMarkingInitialized() && + oldSpace_->GetHeapObjectSize() >= 0.8 * oldSpaceAllocLimit_) { + TriggerConcurrentMarking(); + } +} + bool Heap::CheckAndTriggerOldGC() { if ((oldSpace_->GetCommittedSize() + hugeObjectSpace_->GetCommittedSize()) <= oldSpaceAllocLimit_) { @@ -254,26 +265,11 @@ bool Heap::CheckAndTriggerNonMovableGC() return true; } -bool Heap::IsConcurrentMarking() const -{ - return concurrentMarker_->IsConcurrentMarking(); -} - -bool Heap::IsConcurrentMarkingFinished() const -{ - return concurrentMarker_->IsFinished(); -} - -bool Heap::IsConcurrentMarkingInitialized() const -{ - return concurrentMarker_->IsInitialized(); -} - void Heap::WaitRunningTaskFinished() { - os::memory::LockHolder holder(waitTashFinishedMutex_); + os::memory::LockHolder holder(waitTaskFinishedMutex_); while (runningTastCount_ > 0) { - waitTashFinishedCV_.Wait(&waitTashFinishedMutex_); + waitTaskFinishedCV_.Wait(&waitTaskFinishedMutex_); } } @@ -282,6 +278,11 @@ void Heap::WaitConcurrentMarkingFinished() concurrentMarker_->WaitConcurrentMarkingFinished(); } +void Heap::SetConcurrentMarkingEnable(bool flag) +{ + concurrentMarkingEnable_ = flag; +} + bool Heap::ConcurrentMarkingEnable() const { return concurrentMarkingEnable_; @@ -295,7 +296,7 @@ void Heap::PostParallelGCTask(ParallelGCTaskPhase gcTask) void Heap::IncreaseTaskCount() { - os::memory::LockHolder holder(waitTashFinishedMutex_); + os::memory::LockHolder holder(waitTaskFinishedMutex_); runningTastCount_++; } @@ -306,10 +307,10 @@ bool Heap::CheckCanDistributeTask() void Heap::ReduceTaskCount() { - os::memory::LockHolder holder(waitTashFinishedMutex_); + os::memory::LockHolder holder(waitTaskFinishedMutex_); runningTastCount_--; if (runningTastCount_ == 0) { - waitTashFinishedCV_.SignalAll(); + waitTaskFinishedCV_.SignalAll(); } } diff --git a/ecmascript/mem/heap.h b/ecmascript/mem/heap.h index ec083189f2..afe30ae1f6 100644 --- a/ecmascript/mem/heap.h +++ b/ecmascript/mem/heap.h @@ -189,6 +189,10 @@ public: inline void OnAllocateEvent(uintptr_t address); inline void OnMoveEvent(uintptr_t address, uintptr_t forwardAddress); + void TriggerConcurrentMarking(); + + void TryTriggerConcurrentMarking(); + bool CheckAndTriggerOldGC(); bool CheckAndTriggerCompressGC(); @@ -271,21 +275,24 @@ public: inline void ClearSlotsRange(Region *current, uintptr_t freeStart, uintptr_t freeEnd); void WaitRunningTaskFinished(); + bool CheckCanDistributeTask(); + void PostParallelGCTask(ParallelGCTaskPhase gcTask); + bool IsEnableParallelGC() const { return paralledGc_; } + void WaitConcurrentMarkingFinished(); - bool IsConcurrentMarkingInitialized() const; - bool IsConcurrentMarkingFinished() const; - bool IsConcurrentMarking() const; - bool ConcurrentMarkingEnable() const; - void TriggerConcurrentMarking(); + void SetConcurrentMarkingEnable(bool flag); + + bool ConcurrentMarkingEnable() const; private: void IncreaseTaskCount(); + void ReduceTaskCount(); class ParallelGCTask : public Task { @@ -323,8 +330,8 @@ private: ConcurrentMarker *concurrentMarker_; uint32_t runningTastCount_ {0}; - os::memory::Mutex waitTashFinishedMutex_; - os::memory::ConditionVariable waitTashFinishedCV_; + os::memory::Mutex waitTaskFinishedMutex_; + os::memory::ConditionVariable waitTaskFinishedCV_; bool paralledGc_ {false}; bool concurrentMarkingEnable_ {false}; diff --git a/ecmascript/mem/old_space_collector.cpp b/ecmascript/mem/old_space_collector.cpp index a8fedd7438..ed91b3154f 100644 --- a/ecmascript/mem/old_space_collector.cpp +++ b/ecmascript/mem/old_space_collector.cpp @@ -37,12 +37,12 @@ OldSpaceCollector::OldSpaceCollector(Heap *heap, bool parallelGc) void OldSpaceCollector::RunPhases() { - [[maybe_unused]] ecmascript::JSThread *thread = heap_->GetEcmaVM()->GetJSThread(); + ecmascript::JSThread *thread = heap_->GetEcmaVM()->GetJSThread(); INTERPRETER_TRACE(thread, OldSpaceCollector_RunPhases); trace::ScopedTrace scoped_trace("OldSpaceCollector::RunPhases"); [[maybe_unused]] ClockScope clock("OldSpaceCollector::RunPhases"); - if (heap_->ConcurrentMarkingEnable() && !heap_->IsConcurrentMarkingInitialized()) { - if (heap_->IsConcurrentMarking()) { + if (heap_->ConcurrentMarkingEnable() && !thread->IsConcurrentMarkingInitialized()) { + if (thread->IsConcurrentMarking()) { heap_->WaitConcurrentMarkingFinished(); } auto worklist = heap_->GetConcurrentMarker()->GetWorklist(); diff --git a/ecmascript/mem/region.h b/ecmascript/mem/region.h index 814d12e5e0..7eab577693 100644 --- a/ecmascript/mem/region.h +++ b/ecmascript/mem/region.h @@ -247,6 +247,15 @@ public: } } + bool IsConcurrentMarking() + { + return concurrentMarking_; + } + + void SetConcurrentMarking(bool isMarking) + { + concurrentMarking_ = isMarking; + } private: Space *space_; uintptr_t flags_; // Memory alignment, only low 32bits are used now @@ -254,6 +263,7 @@ private: uintptr_t begin_; uintptr_t end_; uintptr_t highWaterMark_; + bool concurrentMarking_ {false}; Region *next_ {nullptr}; Region *prev_ {nullptr}; RangeBitmap *markBitmap_ {nullptr}; diff --git a/ecmascript/mem/semi_space_collector.cpp b/ecmascript/mem/semi_space_collector.cpp index 65d8685528..61b4a66adf 100644 --- a/ecmascript/mem/semi_space_collector.cpp +++ b/ecmascript/mem/semi_space_collector.cpp @@ -45,12 +45,13 @@ SemiSpaceCollector::~SemiSpaceCollector() void SemiSpaceCollector::RunPhases() { - [[maybe_unused]] ecmascript::JSThread *thread = heap_->GetEcmaVM()->GetJSThread(); + ecmascript::JSThread *thread = heap_->GetEcmaVM()->GetJSThread(); INTERPRETER_TRACE(thread, SemiSpaceCollector_RunPhases); trace::ScopedTrace scoped_trace("SemiSpaceCollector::RunPhases"); [[maybe_unused]] ClockScope clock("SemiSpaceCollector::RunPhases"); - if (heap_->ConcurrentMarkingEnable() && heap_->IsConcurrentMarking()) { + if (heap_->ConcurrentMarkingEnable() && thread->IsConcurrentMarking()) { heap_->WaitConcurrentMarkingFinished(); + heap_->CollectGarbage(TriggerGCType::OLD_GC); } InitializePhase(); ParallelMarkingPhase(); diff --git a/ecmascript/tests/concurrent_marking_test.cpp b/ecmascript/tests/concurrent_marking_test.cpp index 286aa95744..b7f2ed148e 100644 --- a/ecmascript/tests/concurrent_marking_test.cpp +++ b/ecmascript/tests/concurrent_marking_test.cpp @@ -53,6 +53,8 @@ public: scope = new EcmaHandleScope(thread); thread->SetIsEcmaInterpreter(true); EcmaVM::Cast(instance)->GetFactory()->SetTriggerGc(false); + auto heap = const_cast(thread->GetEcmaVM()->GetHeap()); + heap->SetConcurrentMarkingEnable(true); } void TearDown() override -- Gitee