From a2d38a584de2fe3f728654bed96234af5a9855ea Mon Sep 17 00:00:00 2001 From: Panferov Ivan Date: Wed, 8 May 2024 18:12:37 +0800 Subject: [PATCH] Refactoring caching refernces from RemSets in G1GC Description: Create global bitmap for RemSets to iterate over it instead of using CardTable Issue: #I9NL99 Testing: ninja all tests Signed-off-by: Panferov Ivan --- static_core/runtime/mem/gc/g1/g1-gc.cpp | 100 ++++-------------------- static_core/runtime/mem/gc/g1/g1-gc.h | 10 +-- static_core/runtime/mem/rem_set-inl.h | 71 ++++++++++++++++- static_core/runtime/mem/rem_set.h | 41 +++++++++- 4 files changed, 130 insertions(+), 92 deletions(-) diff --git a/static_core/runtime/mem/gc/g1/g1-gc.cpp b/static_core/runtime/mem/gc/g1/g1-gc.cpp index 19966227e..e61d09297 100644 --- a/static_core/runtime/mem/gc/g1/g1-gc.cpp +++ b/static_core/runtime/mem/gc/g1/g1-gc.cpp @@ -39,21 +39,6 @@ namespace ark::mem { -#ifndef NDEBUG -static bool IsCardTableClear(CardTable *cardTable) -{ - bool clear = true; - cardTable->VisitMarked( - [&clear](const MemRange &range) { - LOG(ERROR, GC) << "Card [" << ToVoidPtr(range.GetStartAddress()) << " - " - << ToVoidPtr(range.GetEndAddress()) << "] is not clear"; - clear = false; - }, - CardTableProcessedFlag::VISIT_MARKED | CardTableProcessedFlag::VISIT_PROCESSED); - return clear; -} -#endif - /* static */ template void G1GC::CalcLiveBytesMarkPreprocess(const ObjectHeader *object, BaseClass *baseKlass) @@ -1092,8 +1077,8 @@ void G1GC::RunGC(GCTask &task, const CollectionSet &collectibleR uint64_t youngPauseTime; { time::Timer timer(&youngPauseTime, true); - MemRange dirtyCardsRange = MixedMarkAndCacheRefs(task, collectibleRegions); - ClearDirtyAndYoungCards(dirtyCardsRange); + MixedMarkAndCacheRefs(task, collectibleRegions); + ClearYoungCards(collectibleRegions); CollectAndMove(collectibleRegions); ClearRefsFromRemsetsCache(); this->GetObjectGenAllocator()->InvalidateSpaceData(); @@ -1105,7 +1090,7 @@ void G1GC::RunGC(GCTask &task, const CollectionSet &collectibleR } template -MemRange G1GC::MixedMarkAndCacheRefs(const GCTask &task, const CollectionSet &collectibleRegions) +void G1GC::MixedMarkAndCacheRefs(const GCTask &task, const CollectionSet &collectibleRegions) { GCScope scopedTrace(__FUNCTION__, this, GCPhase::GC_PHASE_MARK_YOUNG); bool useGcWorkers = this->GetSettings()->ParallelMarkingEnabled(); @@ -1126,13 +1111,13 @@ MemRange G1GC::MixedMarkAndCacheRefs(const GCTask &task, const C // since we reach this by graph from tenured roots, // because we will process all young regions at young GC we will find all required references RefCacheBuilder builder(this, &uniqueRefsFromRemsets_, regionSizeBits_, &objectsStack); - auto refsChecker = [this, &builder](const MemRange &memRange, Region *region) { + auto refsChecker = [this, &builder](Region *region, const MemRange &memRange) { IterateOverRefsInMemRange(memRange, region, builder); return builder.AllCrossRegionRefsProcessed(); }; analytics_.ReportMarkingStart(ark::time::GetCurrentTimeInNanos()); - MemRange dirtyCardsRange = CacheRefsFromRemsets(refsChecker); + CacheRefsFromRemsets(refsChecker); auto refPred = [this](const ObjectHeader *obj) { return this->InGCSweepRange(obj); }; GCRootVisitor gcMarkCollectionSet = [&objectsStack, this, &refPred](const GCRoot &gcRoot) { @@ -1180,7 +1165,6 @@ MemRange G1GC::MixedMarkAndCacheRefs(const GCTask &task, const C // HandleReferences could write a new barriers - so we need to handle them before moving ProcessDirtyCards(); - return dirtyCardsRange; } template @@ -2031,102 +2015,60 @@ void G1GC::UpdateRefsFromRemSets(const Visitor &visitor) visitor(object, ObjectAccessor::GetObject(object, offset), offset); return true; }; - auto refsChecker = [this, &fieldVisitor](const MemRange &memRange, Region *region) { + auto refsChecker = [this, &fieldVisitor](Region *region, const MemRange &memRange) { IterateOverRefsInMemRange(memRange, region, fieldVisitor); return true; }; - MemRange dirtyCards = CacheRefsFromRemsets(refsChecker); - ClearDirtyAndYoungCards(dirtyCards); + CacheRefsFromRemsets(refsChecker); } template -MemRange G1GC::CacheRefsFromRemsets(const MemRangeRefsChecker &refsChecker) +void G1GC::CacheRefsFromRemsets(const MemRangeRefsChecker &refsChecker) { GCScope cacheRefsFromRemsetScope(__FUNCTION__, this); // Collect only unique objects to not proceed them more than once. ASSERT(!uniqueCardsInitialized_); - CardTable *cardTable = this->GetCardTable(); - uintptr_t minDirtyAddr = cardTable->GetMinAddress() + cardTable->GetCardsCount() * cardTable->GetCardSize(); - uintptr_t maxDirtyAddr = cardTable->GetMinAddress(); size_t remsetSize = 0; - ASSERT(IsCardTableClear(cardTable)); - auto visitor = [cardTable, &minDirtyAddr, &maxDirtyAddr, &remsetSize, &refsChecker](Region *r, - const MemRange &range) { - // The proper DEFAULT_REGION_SIZE value is 256_KB, that value allows card caching of already processed - // memory ranges, the following code will not work with a different DEFAULT_REGION size - constexpr uint64_t EXPECTED_DEFAULT_REGION_SIZE = 256_KB; - static_assert(DEFAULT_REGION_SIZE == EXPECTED_DEFAULT_REGION_SIZE, "Unsupported default region size"); - - // Use the card table to mark the ranges we already processed. - // Each card is uint8_t. Use it as a bitmap. Set bit means the corresponding memory - // range is processed. - CardTable::CardPtr card = cardTable->GetCardPtr(range.GetStartAddress()); - uintptr_t cardAddr = cardTable->GetCardStartAddress(card); - constexpr size_t MEM_SIZE = DEFAULT_REGION_SIZE / RemSet<>::Bitmap::GetNumBits(); - size_t bitIdx = (range.GetStartAddress() - cardAddr) / MEM_SIZE; - if ((card->GetCard() & (1U << bitIdx)) == 0) { - card->SetCard(card->GetCard() | (1U << bitIdx)); - if (minDirtyAddr > cardAddr) { - minDirtyAddr = cardAddr; - } - if (maxDirtyAddr < cardAddr + CardTable::GetCardSize()) { - maxDirtyAddr = cardAddr + CardTable::GetCardSize(); - } - remsetSize++; - return refsChecker(range, r); - } - // some cross region refs might be not processed - return false; + auto visitor = [&remsetSize, &refsChecker](Region *r, const MemRange &range) { + remsetSize++; + return refsChecker(r, range); }; - for (auto region : collectionSet_) { - region->GetRemSet()->Iterate(RemsetRegionPredicate, visitor); - } + + GlobalRemSet globalRemSet; + globalRemSet.ProcessRemSets(collectionSet_, RemsetRegionPredicate, visitor); analytics_.ReportRemsetSize(remsetSize, GetUniqueRemsetRefsCount()); if (!this->IsFullGC()) { auto dirtyCardsCount = dirtyCards_.size(); analytics_.ReportScanDirtyCardsStart(ark::time::GetCurrentTimeInNanos()); - CacheRefsFromDirtyCards(visitor); + CacheRefsFromDirtyCards(globalRemSet, refsChecker); analytics_.ReportScanDirtyCardsEnd(ark::time::GetCurrentTimeInNanos(), dirtyCardsCount); #ifndef NDEBUG uniqueCardsInitialized_ = true; #endif // NDEBUG } - - if (minDirtyAddr > maxDirtyAddr) { - minDirtyAddr = maxDirtyAddr; - } - return MemRange(minDirtyAddr, maxDirtyAddr); } template template -void G1GC::CacheRefsFromDirtyCards(Visitor visitor) +void G1GC::CacheRefsFromDirtyCards(GlobalRemSet &globalRemSet, Visitor visitor) { ScopedTiming t(__FUNCTION__, *this->GetTiming()); auto cardTable = this->GetCardTable(); - constexpr size_t MEM_SIZE = DEFAULT_REGION_SIZE / RemSet<>::Bitmap::GetNumBits(); for (auto it = dirtyCards_.cbegin(); it != dirtyCards_.cend();) { auto range = cardTable->GetMemoryRange(*it); auto addr = range.GetStartAddress(); ASSERT_DO(IsHeapSpace(PoolManager::GetMmapMemPool()->GetSpaceTypeForAddr(ToVoidPtr(addr))), std::cerr << "Invalid space type for the " << addr << std::endl); - auto endAddr = range.GetEndAddress(); auto region = ark::mem::AddrToRegion(ToVoidPtr(addr)); if (!RemsetRegionPredicate(region)) { it = dirtyCards_.erase(it); continue; } - auto allCrossRegionRefsProcessed = true; - while (addr < endAddr) { - if (!visitor(region, MemRange(addr, addr + MEM_SIZE))) { - allCrossRegionRefsProcessed = false; - } - addr += MEM_SIZE; - } + auto allCrossRegionRefsProcessed = globalRemSet.IterateOverUniqueRange(region, range, visitor); if (allCrossRegionRefsProcessed) { it = dirtyCards_.erase(it); continue; @@ -2153,14 +2095,6 @@ void G1GC::ClearYoungCards(const CollectionSet &collectionSet) } } -template -void G1GC::ClearDirtyAndYoungCards(const MemRange &dirtyCardsRange) -{ - CardTable *cardTable = this->GetCardTable(); - ClearYoungCards(collectionSet_); - cardTable->ClearCardRange(dirtyCardsRange.GetStartAddress(), dirtyCardsRange.GetEndAddress()); -} - template void G1GC::ClearRefsFromRemsetsCache() { diff --git a/static_core/runtime/mem/gc/g1/g1-gc.h b/static_core/runtime/mem/gc/g1/g1-gc.h index 7a81cb7b7..62b156df3 100644 --- a/static_core/runtime/mem/gc/g1/g1-gc.h +++ b/static_core/runtime/mem/gc/g1/g1-gc.h @@ -66,7 +66,7 @@ template class G1GC : public GenerationalGC { using RefVector = PandaVector; using ReferenceCheckPredicateT = typename GC::ReferenceCheckPredicateT; - using MemRangeRefsChecker = std::function; + using MemRangeRefsChecker = std::function; template using MovedObjectsContainer = std::conditional_t *>, PandaVector *>>; @@ -189,7 +189,7 @@ private: void IterateOverRefsInMemRange(const MemRange &memRange, Region *region, Handler &refsHandler); template - void CacheRefsFromDirtyCards(Visitor visitor); + void CacheRefsFromDirtyCards(GlobalRemSet &globalRemSet, Visitor visitor); void InitializeImpl() override; @@ -243,7 +243,7 @@ private: static void CalcLiveBytesNotAtomicallyMarkPreprocess(const ObjectHeader *object, BaseClass *baseKlass); /// Caches refs from remset and marks objects in collection set (young-generation + maybe some tenured regions). - MemRange MixedMarkAndCacheRefs(const GCTask &task, const CollectionSet &collectibleRegions); + void MixedMarkAndCacheRefs(const GCTask &task, const CollectionSet &collectibleRegions); /** * Mark roots and add them to the stack @@ -393,7 +393,7 @@ private: template void UpdateRefsFromRemSets(const Visitor &visitor); - MemRange CacheRefsFromRemsets(const MemRangeRefsChecker &refsChecker); + void CacheRefsFromRemsets(const MemRangeRefsChecker &refsChecker); void ClearRefsFromRemsetsCache(); @@ -405,8 +405,6 @@ private: void ClearYoungCards(const CollectionSet &collectionSet); - void ClearDirtyAndYoungCards(const MemRange &dirtyCardsRange); - size_t GetMaxMixedRegionsCount(); void PrepareYoungRegionsForFullGC(const CollectionSet &collectionSet); diff --git a/static_core/runtime/mem/rem_set-inl.h b/static_core/runtime/mem/rem_set-inl.h index fd64d3497..4328555f1 100644 --- a/static_core/runtime/mem/rem_set-inl.h +++ b/static_core/runtime/mem/rem_set-inl.h @@ -196,9 +196,9 @@ void RemSet::RemoveRefRegion(Region *region) template size_t RemSet::GetIdxInBitmap(uintptr_t addr, uintptr_t bitmapBeginAddr) { - size_t memSize = DEFAULT_REGION_SIZE / Bitmap::GetNumBits(); + static constexpr size_t MEM_SIZE = DEFAULT_REGION_SIZE / Bitmap::GetNumBits(); ASSERT(bitmapBeginAddr <= addr && addr < bitmapBeginAddr + DEFAULT_REGION_SIZE); - return (addr - bitmapBeginAddr) / memSize; + return (addr - bitmapBeginAddr) / MEM_SIZE; } template @@ -225,6 +225,73 @@ void RemSet::Dump(std::ostream &out) out << std::dec; } +template +template +void RemSet::VisitBitmaps(const Visitor &visitor) const +{ + for (auto &[bitmapBeginAddr, bitmap] : bitmaps_) { + visitor(bitmapBeginAddr, bitmap); + } +} + +template +void GlobalRemSet::ProcessRemSets(const RegionContainer &cont, const RegionPred ®ionPred, const MemVisitor &visitor) +{ + for (auto *region : cont) { + FillBitmap(*region->GetRemSet(), regionPred); + } + IterateOverBits(visitor); +} + +template +void GlobalRemSet::FillBitmap(const RemSet<> &remSet, const RegionPred ®ionPred) +{ + remSet.VisitBitmaps([this, ®ionPred](uintptr_t beginAddr, const RemSet<>::Bitmap &bitmap) { + auto *region = AddrToRegion(ToVoidPtr(beginAddr)); + if (regionPred(region)) { + bitmaps_[beginAddr].AddBits(bitmap); + } + }); +} + +template +void GlobalRemSet::IterateOverBits(const MemVisitor &visitor) const +{ + for (auto &[bitmapBeginAddr, bitmap] : bitmaps_) { + auto *region = AddrToRegion(ToVoidPtr(bitmapBeginAddr)); + MemRange bitmapRange(bitmapBeginAddr, bitmapBeginAddr + DEFAULT_REGION_SIZE); + bitmap.Iterate(bitmapRange, [region, visitor](const MemRange &range) { visitor(region, range); }); + } +} + +template +bool GlobalRemSet::IterateOverUniqueRange(Region *region, MemRange range, const MemVisitor &visitor) +{ + auto addr = range.GetStartAddress(); + auto bitmapBeginAddr = addr & ~DEFAULT_REGION_MASK; + auto bitmapIt = bitmaps_.find(bitmapBeginAddr); + if (bitmapIt == bitmaps_.cend()) { + return visitor(region, range); + } + + auto &bitmap = bitmapIt->second; + auto endAddr = range.GetEndAddress() + 1U; + static constexpr size_t MEM_SIZE = DEFAULT_REGION_SIZE / RemSet<>::Bitmap::GetNumBits(); + ASSERT(((endAddr - addr) % MEM_SIZE) == 0); + bool allRefsProcessed = true; + for (; addr != endAddr; addr += MEM_SIZE) { + auto isMarked = bitmap.Check(RemSet<>::GetIdxInBitmap(addr, bitmapBeginAddr)); + if (isMarked) { + allRefsProcessed = false; + continue; + } + if (!visitor(region, MemRange(addr, addr + MEM_SIZE))) { + allRefsProcessed = false; + } + } + return allRefsProcessed; +} + } // namespace ark::mem #endif // PANDA_MEM_GC_G1_REM_SET_INL_H diff --git a/static_core/runtime/mem/rem_set.h b/static_core/runtime/mem/rem_set.h index 98cd332fd..7a4379c75 100644 --- a/static_core/runtime/mem/rem_set.h +++ b/static_core/runtime/mem/rem_set.h @@ -17,6 +17,7 @@ #define PANDA_MEM_GC_G1_REM_SET_H #include +#include namespace ark::mem { @@ -86,6 +87,11 @@ public: void Dump(std::ostream &out); + template + void VisitBitmaps(const Visitor &visitor) const; + + static size_t GetIdxInBitmap(uintptr_t addr, uintptr_t bitmapBeginAddr); + class Bitmap { public: static constexpr size_t GetBitmapSizeInBytes() @@ -106,6 +112,21 @@ public: bitmap_[elemIdx] |= 1ULL << bitOffset; } + bool Check(size_t idx) const + { + size_t elemIdx = idx / ELEM_BITS; + ASSERT(elemIdx < SIZE); + size_t bitOffset = idx - elemIdx * ELEM_BITS; + return (bitmap_[elemIdx] & (1ULL << bitOffset)) != 0; + } + + void AddBits(const Bitmap &other) + { + for (size_t i = 0; i < SIZE; ++i) { + bitmap_[i] |= other.bitmap_[i]; + } + } + template void Iterate(const MemRange &range, const Visitor &visitor) const { @@ -131,7 +152,6 @@ public: }; private: - static size_t GetIdxInBitmap(uintptr_t addr, uintptr_t bitmapBeginAddr); template PandaUnorderedSet *GetRefRegions(); template @@ -148,5 +168,24 @@ private: friend class test::RemSetTest; }; + +class GlobalRemSet { +public: + template + void ProcessRemSets(const RegionContainer &cont, const RegionPred ®ionPred, const MemVisitor &visitor); + + template + bool IterateOverUniqueRange(Region *region, MemRange range, const MemVisitor &visitor); + +private: + template + void FillBitmap(const RemSet<> &remSet, const RegionPred ®ionPred); + + template + void IterateOverBits(const MemVisitor &visitor) const; + + PandaUnorderedMap::Bitmap> bitmaps_; +}; + } // namespace ark::mem #endif // PANDA_MEM_GC_G1_REM_SET_H -- Gitee