From 09f161d2dfa9a4c1b23c3ae665f86d26063d8698 Mon Sep 17 00:00:00 2001 From: ZhouGuangyuan Date: Sat, 12 Jul 2025 12:41:34 +0800 Subject: [PATCH] asm write barrier for cmcgc Issue: https://gitee.com/openharmony/arkcompiler_ets_runtime/issues/ICL8GT Signed-off-by: ZhouGuangyuan Change-Id: I22ffe82451c4cf4f48019a47a1585f0750c25c3a --- common_components/base/globals.h | 6 + common_components/common_runtime/hooks.h | 2 + .../heap/allocator/region_desc.h | 4 +- .../heap/collector/region_bitmap.h | 3 + .../assembler/aarch64/assembler_aarch64.cpp | 16 ++ .../assembler/aarch64/assembler_aarch64.h | 2 + .../aarch64/assembler_aarch64_constants.h | 5 + .../compiler/assembler/x64/assembler_x64.cpp | 25 +++ .../compiler/assembler/x64/assembler_x64.h | 3 + ecmascript/compiler/barrier_stub_builder.cpp | 32 +--- ecmascript/compiler/call_signature.cpp | 35 +++- ecmascript/compiler/call_signature.h | 6 +- .../compiler/codegen/llvm/llvm_ir_builder.cpp | 9 +- .../compiler/codegen/llvm/llvm_ir_builder.h | 1 + ecmascript/compiler/post_schedule.cpp | 6 +- ecmascript/compiler/stub_builder.cpp | 2 +- .../aarch64/asm_interpreter_call.cpp | 170 ++++++++++++++++++ .../compiler/trampoline/aarch64/common_call.h | 11 ++ .../trampoline/x64/asm_interpreter_call.cpp | 119 +++++++++++- .../compiler/trampoline/x64/common_call.h | 11 ++ ecmascript/ecma_vm.cpp | 5 +- ecmascript/mem/cmc_gc/hooks.cpp | 46 ++++- ecmascript/stubs/runtime_stub_list.h | 7 +- ecmascript/stubs/runtime_stubs.cpp | 8 +- ecmascript/stubs/runtime_stubs.h | 2 +- 25 files changed, 486 insertions(+), 50 deletions(-) diff --git a/common_components/base/globals.h b/common_components/base/globals.h index 6321c95796..ee5b5aacd0 100755 --- a/common_components/base/globals.h +++ b/common_components/base/globals.h @@ -52,6 +52,12 @@ constexpr bool IsPowerOfTwo(T x) return ret; } +template +static constexpr int Log2(T n, T acc = 0) +{ + return (n == 1) ? acc : Log2(n >> 1, acc + 1); +} + template T RoundDown(T x, typename Identity::type n) { diff --git a/common_components/common_runtime/hooks.h b/common_components/common_runtime/hooks.h index 4fe9a9487d..fe6183ce43 100644 --- a/common_components/common_runtime/hooks.h +++ b/common_components/common_runtime/hooks.h @@ -20,6 +20,7 @@ #include "common_interfaces/heap/heap_visitor.h" #include "common_interfaces/thread/mutator_base.h" +#include "common_components/heap/collector/gc_request.h" // Visitor that iterate all `RefField`s in a TaggedObject and add them to // `WorkStack` Should be moved to BaseRT and panda namespace later @@ -45,6 +46,7 @@ PUBLIC_API void VisitDynamicThreadPreforwardRoot(const RefFieldVisitor &visitorF PUBLIC_API void VisitJSThread(void *jsThread, CommonRootVisitor visitor); PUBLIC_API void SynchronizeGCPhaseToJSThread(void *jsThread, GCPhase gcPhase); +PUBLIC_API void UpdateCMCWriteBarrierStub(void *jsThread, GCPhase gcPhase, GCReason gcReason); // CMC-GC dependent interface PUBLIC_API void FillFreeObject(void *object, size_t size); diff --git a/common_components/heap/allocator/region_desc.h b/common_components/heap/allocator/region_desc.h index 8fd7191254..6f9f5cf8a9 100755 --- a/common_components/heap/allocator/region_desc.h +++ b/common_components/heap/allocator/region_desc.h @@ -94,13 +94,15 @@ public: // default common region unit size. static constexpr size_t UNIT_SIZE = 256 * KB; + static constexpr int UNIT_SIZE_LOG2 = Log2(UNIT_SIZE); // result == 18 + // threshold for object to unique a region static constexpr size_t LARGE_OBJECT_DEFAULT_THRESHOLD = UNIT_SIZE * 2 / 3; // release a large object when the size is greater than 4096KB. static constexpr size_t LARGE_OBJECT_RELEASE_THRESHOLD = 4096 * KB; - static constexpr size_t DEFAULT_REGION_UNIT_MASK = RegionDesc::UNIT_SIZE - 1; + static constexpr uint64_t DEFAULT_REGION_UNIT_MASK = RegionDesc::UNIT_SIZE - 1; RegionDesc() { diff --git a/common_components/heap/collector/region_bitmap.h b/common_components/heap/collector/region_bitmap.h index 0aeac4f6f4..5ac877d171 100755 --- a/common_components/heap/collector/region_bitmap.h +++ b/common_components/heap/collector/region_bitmap.h @@ -26,8 +26,11 @@ namespace common { static constexpr size_t kBitsPerByte = 8; +static constexpr size_t kBitsPerByteLog2 = Log2(kBitsPerByte); static constexpr size_t kMarkedBytesPerBit = 8; +static constexpr size_t kMarkedBytesPerBitLog2 = Log2(kMarkedBytesPerBit); static constexpr size_t kBitsPerWord = sizeof(uint64_t) * kBitsPerByte; +static constexpr size_t kBitsPerWordLog2 = Log2(kBitsPerWord); static constexpr size_t kBytesPerWord = sizeof(uint64_t) / sizeof(uint8_t); struct RegionBitmap { static constexpr uint8_t factor = 16; diff --git a/ecmascript/compiler/assembler/aarch64/assembler_aarch64.cpp b/ecmascript/compiler/assembler/aarch64/assembler_aarch64.cpp index 6bc9cda990..d3a9383975 100644 --- a/ecmascript/compiler/assembler/aarch64/assembler_aarch64.cpp +++ b/ecmascript/compiler/assembler/aarch64/assembler_aarch64.cpp @@ -296,6 +296,22 @@ void AssemblerAarch64::Ldr(const Register &rt, const MemoryOperand &operand, Sca } } +void AssemblerAarch64::Ldxr(const Register &rt, const Register &rn) +{ + bool regX = !rt.IsW(); + uint32_t op = ExclusiveOpCode::LDXR; + uint32_t instructionCode = ((regX << 30) | op | Rn(rn.GetId()) | Rt(rt.GetId())); + EmitU32(instructionCode); +} + +void AssemblerAarch64::Stxr(const Register &rm, const Register &rt, const Register &rn) +{ + bool regX = !rt.IsW(); + uint32_t op = ExclusiveOpCode::STXR; + uint32_t instructionCode = ((regX << 30) | op | Rm(rm.GetId()) | Rn(rn.GetId()) | Rt(rt.GetId())); + EmitU32(instructionCode); +} + void AssemblerAarch64::Ldr(const Register &rt, const MemoryOperand &operand) { Ldr(rt, operand, Scale::Q); diff --git a/ecmascript/compiler/assembler/aarch64/assembler_aarch64.h b/ecmascript/compiler/assembler/aarch64/assembler_aarch64.h index 8e84f4e87d..ccc18b1530 100644 --- a/ecmascript/compiler/assembler/aarch64/assembler_aarch64.h +++ b/ecmascript/compiler/assembler/aarch64/assembler_aarch64.h @@ -354,6 +354,8 @@ public: void Ret(const Register &rn); void Brk(const Immediate &imm); void Bind(Label *target); + void Ldxr(const Register &rt, const Register &rn); + void Stxr(const Register &rm, const Register &rt, const Register &rn); private: // common reg field defines inline uint32_t Rd(uint32_t id) diff --git a/ecmascript/compiler/assembler/aarch64/assembler_aarch64_constants.h b/ecmascript/compiler/assembler/aarch64/assembler_aarch64_constants.h index 3145a24ce1..7baf1aefa8 100644 --- a/ecmascript/compiler/assembler/aarch64/assembler_aarch64_constants.h +++ b/ecmascript/compiler/assembler/aarch64/assembler_aarch64_constants.h @@ -116,6 +116,11 @@ enum BitwiseOpCode { ORR_Shift = 0x2a000000, }; +enum ExclusiveOpCode { + LDXR = 0x885F7C00, + STXR = 0x88007C00, +}; + // branch code enum BranchOpCode { BranchFMask = 0x7C000000, diff --git a/ecmascript/compiler/assembler/x64/assembler_x64.cpp b/ecmascript/compiler/assembler/x64/assembler_x64.cpp index db984e9260..709f3e6b87 100644 --- a/ecmascript/compiler/assembler/x64/assembler_x64.cpp +++ b/ecmascript/compiler/assembler/x64/assembler_x64.cpp @@ -1316,6 +1316,16 @@ void AssemblerX64::Btq(Immediate src, Register dst) EmitModrm(4, dst); EmitI8(static_cast(src.Value())); } + +void AssemblerX64::Btq(Register src, const Operand& dst) +{ + EmitRexPrefix(src, dst); + EmitU8(0x0F); + EmitU8(0xA3); + + EmitOperand(src, dst); +} + void AssemblerX64::Btl(Immediate src, Register dst) { EmitRexPrefix(dst); @@ -1365,6 +1375,21 @@ void AssemblerX64::Btsl(Register src, Register dst) EmitModrm(src, dst); } +void AssemblerX64::LockPrefix() +{ + EmitU8(0xF0); +} + +void AssemblerX64::Btsq(Register src, const Operand& dst) +{ + EmitRexPrefix(src, dst); + // 0F AB: bts r32, r32; + EmitU8(0x0F); + EmitU8(0xAB); + + EmitOperand(src, dst); +} + void AssemblerX64::Int3() { // CC :: INT3 diff --git a/ecmascript/compiler/assembler/x64/assembler_x64.h b/ecmascript/compiler/assembler/x64/assembler_x64.h index aad96bde62..428f44f224 100644 --- a/ecmascript/compiler/assembler/x64/assembler_x64.h +++ b/ecmascript/compiler/assembler/x64/assembler_x64.h @@ -124,6 +124,7 @@ public: void Or(Immediate src, Register dst); void Orq(Register src, Register dst); void Btq(Immediate src, Register dst); + void Btq(Register src, const Operand &dst); void Btl(Immediate src, Register dst); void Cmpl(Register src, Register dst); void CMovbe(Register src, Register dst); @@ -154,6 +155,8 @@ public: void Shll(Immediate src, Register dst); void Shlq(Immediate src, Register dst); void Btsl(Register src, Register dst); + void LockPrefix(); + void Btsq(Register src, const Operand &dst); void Testq(Immediate src, Register dst); void Testb(Immediate src, Register dst); void Int3(); diff --git a/ecmascript/compiler/barrier_stub_builder.cpp b/ecmascript/compiler/barrier_stub_builder.cpp index 3702fbf4c7..43a9fda0ce 100644 --- a/ecmascript/compiler/barrier_stub_builder.cpp +++ b/ecmascript/compiler/barrier_stub_builder.cpp @@ -751,12 +751,8 @@ void BarrierStubBuilder::DoReverseBarrier() Label markInBuffer(env); Label continueProcessing(env); Label isTaggedObject(env); - Label RefisTaggedObject(env); - Label markRSet(env); Label continueLoopHead(env); Label continueLoopEnd(env); - Label notMarkRSetLoopHead(env); - Label notMarkRSetLoopEnd(env); Label iLessLength(env); Label indexLessLength(env); Label notIdlePhase(env); @@ -799,29 +795,10 @@ void BarrierStubBuilder::DoReverseBarrier() } Bind(¬MarkRSet); { - DEFVARIABLE(index, VariableType::INT32(), Int32(0)); - GateRef shouldProcessSATB = ShouldProcessSATB(gcPhase); - Jump(¬MarkRSetLoopHead); - LoopBegin(¬MarkRSetLoopHead); - { - BRANCH_LIKELY(Int32UnsignedLessThan(*index, slotCount_), &indexLessLength, &exit); - Bind(&indexLessLength); - GateRef offset = PtrMul(ZExtInt32ToPtr(*index), IntPtr(JSTaggedValue::TaggedTypeSize())); - GateRef ref = LoadPrimitive(VariableType::JS_ANY(), dstAddr_, offset); - BRANCH(TaggedIsHeapObject(ref), &RefisTaggedObject, ¬MarkRSetLoopEnd); - Bind(&RefisTaggedObject); - BRANCH_UNLIKELY(shouldProcessSATB, &markInBuffer, &exit); - Bind(&markInBuffer); - { - ASSERT(RuntimeStubCSigns::Get(RTSTUB_ID(MarkInBuffer))->IsNoTailCall()); - CallNGCRuntime(glue_, RTSTUB_ID(MarkInBuffer), {ref}); - Jump(¬MarkRSetLoopEnd); - } - - Bind(¬MarkRSetLoopEnd); - index = Int32Add(*index, Int32(1)); - LoopEnd(¬MarkRSetLoopHead); - } + BRANCH_UNLIKELY(shouldProcessSATB, &markInBuffer, &exit); + Bind(&markInBuffer); + CallNGCRuntime(glue_, RTSTUB_ID(BatchMarkInBuffer), {TaggedCastToIntPtr(dstAddr_), slotCount_}); + Jump(&exit); } } } @@ -846,7 +823,6 @@ void BarrierStubBuilder::DoReverseBarrier() env->SubCfgExit(); } - void BarrierStubBuilder::DoReverseBarrierInternal() { auto env = GetEnvironment(); diff --git a/ecmascript/compiler/call_signature.cpp b/ecmascript/compiler/call_signature.cpp index faff89b72d..cf92084880 100644 --- a/ecmascript/compiler/call_signature.cpp +++ b/ecmascript/compiler/call_signature.cpp @@ -970,6 +970,30 @@ DEF_CALL_SIGNATURE(ASMFastWriteBarrier) callSign->SetTargetKind(CallSignature::TargetKind::ASM_CALL_BARRIER_STUB); } +DEF_CALL_SIGNATURE(ASMCMCFastUpdateRSet) +{ + SETVALUEBARRIER_CALL_ARGS_SIGNATURE_COMMON(ASMCMCFastUpdateRSet); + callSign->SetTargetKind(CallSignature::TargetKind::ASM_CALL_BARRIER_STUB); +} + +DEF_CALL_SIGNATURE(ASMCMCFastUpdateRSetAndMarkSatb) +{ + SETVALUEBARRIER_CALL_ARGS_SIGNATURE_COMMON(ASMCMCFastUpdateRSetAndMarkSatb); + callSign->SetTargetKind(CallSignature::TargetKind::ASM_CALL_BARRIER_STUB); +} + +DEF_CALL_SIGNATURE(ASMCMCFastMarkSatb) +{ + SETVALUEBARRIER_CALL_ARGS_SIGNATURE_COMMON(ASMCMCFastMarkSatb); + callSign->SetTargetKind(CallSignature::TargetKind::ASM_CALL_BARRIER_STUB); +} + +DEF_CALL_SIGNATURE(ASMCMCFastDoNothing) +{ + SETVALUEBARRIER_CALL_ARGS_SIGNATURE_COMMON(ASMCMCFastDoNothing); + callSign->SetTargetKind(CallSignature::TargetKind::ASM_CALL_BARRIER_STUB); +} + DEF_CALL_SIGNATURE(VerifyBarrier) { SETVALUEBARRIER_CALL_ARGS_SIGNATURE_COMMON(VerifyBarrier); @@ -3503,12 +3527,15 @@ DEF_CALL_SIGNATURE(MarkRSetCardTable) DEF_CALL_SIGNATURE(MarkInBuffer) { - // 3 : 3 input parameters - CallSignature MarkInBuffer("MarkInBuffer", 0, 1, ArgumentsOrder::DEFAULT_ORDER, - VariableType::BOOL()); + // 4 : 4 input parameters + CallSignature MarkInBuffer("MarkInBuffer", 0, 4, ArgumentsOrder::DEFAULT_ORDER, + VariableType::VOID()); *callSign = MarkInBuffer; - std::array params = { // 1 : 1 input parameters + std::array params = { // 4 : 4 input parameters + VariableType::NATIVE_POINTER(), VariableType::JS_POINTER(), + VariableType::NATIVE_POINTER(), + VariableType::JS_ANY(), }; callSign->SetParameters(params.data()); callSign->SetGCLeafFunction(true); diff --git a/ecmascript/compiler/call_signature.h b/ecmascript/compiler/call_signature.h index a39c47be8a..8273993cb0 100644 --- a/ecmascript/compiler/call_signature.h +++ b/ecmascript/compiler/call_signature.h @@ -720,7 +720,11 @@ private: V(MarkInBuffer) \ V(BatchMarkInBuffer) \ V(CMCSetValueWithBarrier) \ - V(UpdateSharedModule) + V(UpdateSharedModule) \ + V(ASMCMCFastUpdateRSet) \ + V(ASMCMCFastUpdateRSetAndMarkSatb) \ + V(ASMCMCFastMarkSatb) \ + V(ASMCMCFastDoNothing) #define DECL_CALL_SIGNATURE(name) \ class name##CallSignature final { \ diff --git a/ecmascript/compiler/codegen/llvm/llvm_ir_builder.cpp b/ecmascript/compiler/codegen/llvm/llvm_ir_builder.cpp index 0d9475fbbb..87d305e59d 100644 --- a/ecmascript/compiler/codegen/llvm/llvm_ir_builder.cpp +++ b/ecmascript/compiler/codegen/llvm/llvm_ir_builder.cpp @@ -83,6 +83,7 @@ LLVMIRBuilder::LLVMIRBuilder(const std::vector> *schedule, ASSERT(GlobalTargetBuilders().count(triple) && "unsupported target"); targetBuilder_ = GlobalTargetBuilders()[triple](); ASMBarrierCall_ = targetBuilder_->GetASMBarrierCall(module, enableOptDirectCall_); + ASMBarrierIndirectCall_ = targetBuilder_->GetASMBarrierCall(module, false); const char* attrName = "no-builtin-memset"; const char* attrValue = ""; LLVMAddAttributeAtIndex( @@ -1090,7 +1091,7 @@ void LLVMIRBuilder::VisitCall(GateRef gate, const std::vector &inList, } else if (op == OpCode::ASM_CALL_BARRIER) { const size_t index = acc_.GetConstantValue(inList[targetIndex]); calleeDescriptor = RuntimeStubCSigns::Get(index); - if (enableOptDirectCall_) { + if (enableOptDirectCall_ && isStwCopyStub_) { callee = GetOrDeclareFunction(calleeDescriptor); } else { rtoffset = GetRTStubOffset(glue, index); @@ -1165,11 +1166,13 @@ void LLVMIRBuilder::VisitCall(GateRef gate, const std::vector &inList, LLVMValueRef call = nullptr; if (op == OpCode::ASM_CALL_BARRIER) { - if (!enableOptDirectCall_) { + if (enableOptDirectCall_ && isStwCopyStub_) { + call = LLVMBuildCall(builder_, ASMBarrierCall_, params.data(), params.size(), ""); + } else { callee = LLVMBuildPointerCast(builder_, callee, llvmModule_->GetRawPtrT(), ""); params.insert(params.begin(), callee); + call = LLVMBuildCall(builder_, ASMBarrierIndirectCall_, params.data(), params.size(), ""); } - call = LLVMBuildCall(builder_, ASMBarrierCall_, params.data(), params.size(), ""); } else { LLVMTypeRef funcType = llvmModule_->GenerateFuncType(params, calleeDescriptor); callee = LLVMBuildPointerCast(builder_, callee, LLVMPointerType(funcType, 0), ""); diff --git a/ecmascript/compiler/codegen/llvm/llvm_ir_builder.h b/ecmascript/compiler/codegen/llvm/llvm_ir_builder.h index cef89978e9..f6dcd60709 100644 --- a/ecmascript/compiler/codegen/llvm/llvm_ir_builder.h +++ b/ecmascript/compiler/codegen/llvm/llvm_ir_builder.h @@ -503,6 +503,7 @@ private: bool enableOptBranchProfiling_ {true}; bool isStwCopyStub_ {false}; LLVMValueRef ASMBarrierCall_ {nullptr}; + LLVMValueRef ASMBarrierIndirectCall_ {nullptr}; LLVMTargetBuilder* targetBuilder_ {nullptr}; static constexpr std::string_view COLD_ATTR = "cold"; static constexpr std::string_view READONLY_ATTR = "readonly"; diff --git a/ecmascript/compiler/post_schedule.cpp b/ecmascript/compiler/post_schedule.cpp index d05772ca16..e1159122d0 100644 --- a/ecmascript/compiler/post_schedule.cpp +++ b/ecmascript/compiler/post_schedule.cpp @@ -618,9 +618,9 @@ int PostSchedule::SelectBarrier(MemoryAttribute::ShareFlag share, const CallSign { int index = 0; if (!isStwCopyStub_) { - index = CommonStubCSigns::SetValueWithBarrier; - cs = CommonStubCSigns::Get(index); - comment = "cmcgc store barrier\0"; + index = RuntimeStubCSigns::ID_ASMFastWriteBarrier; + cs = RuntimeStubCSigns::Get(index); + comment = "asm store barrier\0"; return index; } switch (share) { diff --git a/ecmascript/compiler/stub_builder.cpp b/ecmascript/compiler/stub_builder.cpp index 2f3bdd3aef..af6077aed2 100644 --- a/ecmascript/compiler/stub_builder.cpp +++ b/ecmascript/compiler/stub_builder.cpp @@ -2061,7 +2061,7 @@ void StubBuilder::CMCSetValueWithBarrier(GateRef glue, GateRef obj, [[maybe_unus BRANCH_UNLIKELY(shouldProcessSATB, &markInBuffer, &exit); Bind(&markInBuffer); { - CallNGCRuntime(glue, RTSTUB_ID(MarkInBuffer), {value}); + CallNGCRuntime(glue, RTSTUB_ID(MarkInBuffer), {glue, obj, offset, value}); Jump(&exit); } Bind(&exit); diff --git a/ecmascript/compiler/trampoline/aarch64/asm_interpreter_call.cpp b/ecmascript/compiler/trampoline/aarch64/asm_interpreter_call.cpp index ff7358cd2f..7633d7c6c4 100644 --- a/ecmascript/compiler/trampoline/aarch64/asm_interpreter_call.cpp +++ b/ecmascript/compiler/trampoline/aarch64/asm_interpreter_call.cpp @@ -17,6 +17,7 @@ #include "ecmascript/js_generator_object.h" #include "ecmascript/message_string.h" +#include "common_components/heap/allocator/region_desc.h" namespace panda::ecmascript::aarch64 { using Label = panda::ecmascript::Label; @@ -1438,6 +1439,175 @@ void AsmInterpreterCall::ASMFastSharedWriteBarrier(ExtendedAssembler* assembler, } } +// ASMFastWriteBarrier(GateRef glue, GateRef obj, GateRef offset, GateRef value) +// c calling convention, but preserve all general registers except %x15 +// %x0 - glue +// %x1 - obj +// %x2 - offset +// %x3 - value +void AsmInterpreterCall::ASMCMCFastUpdateRSet(ExtendedAssembler *assembler, + const std::function &doShortcut) +{ + using common::RegionDesc; + Label checkValue; + Label checkRSet; + Label updateRSet; + { + // InlinedRegionMetaData *objRegionData = obj & (~RegionDesc::DEFAULT_REGION_UNIT_MASK) + // RegionType objRegionType = objRegionData->regionType_ + // if (objRegionType < THREAD_LOCAL_REGION || objRegionType > FROM_REGION) { + // // obj is in old, check value region + // goto checkValue + // } + __ And(X15, X1, LogicalImmediate::Create(~RegionDesc::DEFAULT_REGION_UNIT_MASK, RegXSize)); + // X15 is InlinedRegionMetaData for obj. + __ Ldrb(Register(X15, W), MemoryOperand(X15, RegionDesc::REGION_TYPE_IN_INLINED_METADATA_OFFSET)); + // X15 is RegionType from InlinedRegionMetaData. + __ Sub(Register(X15, W), Register(X15, W), + Immediate(static_cast(RegionDesc::RegionType::THREAD_LOCAL_REGION))); + // X15 = X15 - 2 + __ Cmp(Register(X15, W), Immediate(static_cast(RegionDesc::RegionType::FROM_REGION) + - static_cast(RegionDesc::RegionType::THREAD_LOCAL_REGION))); + __ B(HI, &checkValue); // if X15 - 2 > 2 => checkValue + doShortcut(assembler); + } + __ Bind(&checkValue); + { + // InlinedRegionMetaData *valueRegionData = value & (~RegionDesc::DEFAULT_REGION_UNIT_MASK) + // RegionType valueRegionType = valueRegionData->regionType_ + // if (valueRegionType >= THREAD_LOCAL_REGION && valueRegionType <= FROM_REGION){ + // // value is in young, check and update rset + // goto checkRSet + // } + __ And(X15, X3, LogicalImmediate::Create(~RegionDesc::DEFAULT_REGION_UNIT_MASK, RegXSize)); + // X15 is InlinedRegionMetaData for value. + __ Ldrb(Register(X15, W), MemoryOperand(X15, RegionDesc::REGION_TYPE_IN_INLINED_METADATA_OFFSET)); + // X15 is RegionType from InlinedRegionMetaData. + __ Sub(Register(X15, W), Register(X15, W), + Immediate(static_cast(RegionDesc::RegionType::THREAD_LOCAL_REGION))); + // X15 = X15 - 2 + __ Cmp(Register(X15, W), Immediate(static_cast(RegionDesc::RegionType::FROM_REGION) + - static_cast(RegionDesc::RegionType::THREAD_LOCAL_REGION))); + __ B(LS, &checkRSet); // if X15 - 2 <= 2 => checkRSet + doShortcut(assembler); + } + __ Bind(&checkRSet); + { + { + __ Stp(X16, X17, MemoryOperand(SP, -DOUBLE_SLOT_SIZE, PREINDEX)); + } + // InlinedRegionMetaData *objRegionData = obj & (~RegionDesc::DEFAULT_REGION_UNIT_MASK) + // RegionRSet *objRegionRSet = objRegionData->regionRSet_ + // uint64[512] cardTable = objRegionRSet->cardTable + __ And(X15, X1, LogicalImmediate::Create(~RegionDesc::DEFAULT_REGION_UNIT_MASK, RegXSize)); + // X15 is InlinedRegionMetaData for obj. + __ Ldr(Register(X15, X), MemoryOperand(X15, RegionDesc::REGION_RSET_IN_INLINED_METADATA_OFFSET)); + // X15 is objRegionRSet. + __ Add(Register(X15, X), Register(X15, X), Immediate(common::RegionRSet::CARD_TABLE_DATA_OFFSET)); + // X15 is cardTable. + + // the logic to get card index: + // [63------------------------------------------18][17------9][8----3][2-0] + // slotOffset: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa bbbbbbbbb cccccc ddd + // Ubfm obj address 9 17 + // index: bbbbbbbbb + __ Ubfm(X17, X1, common::kMarkedBytesPerBitLog2 + common::kBitsPerWordLog2, RegionDesc::UNIT_SIZE_LOG2 - 1); + // X17 => obj % RegionDesc::UNIT_SIZE / common::kMarkedBytesPerBit / common::kBitsPerWord + __ Add(X16, X15, Operand(Register(X17), LSL, 3)); // 3 : left shift 3 bits, x 8 + // X16 => cardTable + X17 x sizeof(uint64_t) + + // the logic to get headMaskBitStart: + // [63------------------------------------------18][17------9][8----3][2-0] + // slotOffset: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa bbbbbbbbb cccccc ddd + // Ubfm obj address 3 8 + // headMaskBitStart: cccccc + __ Ubfm(X15, X1, common::kMarkedBytesPerBitLog2, common::kMarkedBytesPerBitLog2 + common::kBitsPerWordLog2 - 1); + // X15 => obj / common::kMarkedBytesPerBit % common::kBitsPerWord + + __ Mov(Register(X17, X), 1); + __ Lsl(Register(X15, X), Register(X17, X), Register(X15, X)); // X17 is the mask + // X15 => 1 << X15, is the one bit mask for obj + + __ Ldr(X17, MemoryOperand(X16, 0)); + // X17 => *X16, is cardTable[index] + __ Tst(X17, Operand(Register(X15))); + // check if the specific bit is marked or not. + __ B(EQ, &updateRSet); + // if (X17 & X15 == 0) { + // goto updateRSet + // } + { + __ Ldp(X16, X17, MemoryOperand(SP, DOUBLE_SLOT_SIZE, POSTINDEX)); + } + doShortcut(assembler); + } + __ Bind(&updateRSet); + { + // atomic update + // updateRSet: + // ldxr x17, [x16] + // orr x17, x17, x15 + // stxr w17, x17, [x16] + // cbnz x17, updateRSet + __ Ldxr(X17, X16); + __ Orr(Register(X17, X), Register(X17, X), Register(X15, X)); + __ Stxr(Register(X17, W), X17, X16); + __ Cbnz(X17, &updateRSet); + { + __ Ldp(X16, X17, MemoryOperand(SP, DOUBLE_SLOT_SIZE, POSTINDEX)); + } + } +} + +// ASMCMCFastUpdateRSet(GateRef glue, GateRef obj, GateRef offset, GateRef value) +// c calling convention, but preserve all general registers except %r11 +// %rd1 - glue +// %rsi - obj +// %rdx - offset +// %rcx - value +void AsmInterpreterCall::ASMCMCFastUpdateRSet(ExtendedAssembler *assembler) +{ + __ BindAssemblerStub(RTSTUB_ID(ASMCMCFastUpdateRSet)); + ASMCMCFastUpdateRSet(assembler, [](ExtendedAssembler *assembler) { + __ Ret(); + }); + __ Ret(); +} + +void AsmInterpreterCall::ASMCMCFastUpdateRSetAndMarkSatb(ExtendedAssembler *assembler) +{ + __ BindAssemblerStub(RTSTUB_ID(ASMCMCFastUpdateRSetAndMarkSatb)); + Label markSatb; + ASMCMCFastUpdateRSet(assembler, [&markSatb](ExtendedAssembler *assembler) { + __ B(&markSatb); + }); + __ Bind(&markSatb); + { + int32_t rtMarkInSatb = static_cast(JSThread::GlueData::GetRTStubEntriesOffset(false)) + + RTSTUB_ID(MarkInBuffer) * FRAME_SLOT_SIZE; + __ Mov(X15, rtMarkInSatb); + __ Ldr(X15, MemoryOperand(X0, Register(X15), UXTX)); + PreserveMostCall(assembler); + } +} + +void AsmInterpreterCall::ASMCMCFastMarkSatb(ExtendedAssembler *assembler) +{ + __ BindAssemblerStub(RTSTUB_ID(ASMCMCFastMarkSatb)); + int32_t rtMarkInSatb = static_cast(JSThread::GlueData::GetRTStubEntriesOffset(false)) + + RTSTUB_ID(MarkInBuffer) * FRAME_SLOT_SIZE; + __ Mov(X15, rtMarkInSatb); + __ Ldr(X15, MemoryOperand(X0, Register(X15), UXTX)); + PreserveMostCall(assembler); +} + + +void AsmInterpreterCall::ASMCMCFastDoNothing(ExtendedAssembler *assembler) +{ + __ BindAssemblerStub(RTSTUB_ID(ASMCMCFastDoNothing)); + __ Ret(); +} + // Generate code for generator re-entering asm interpreter // c++ calling convention // Input: %X0 - glue diff --git a/ecmascript/compiler/trampoline/aarch64/common_call.h b/ecmascript/compiler/trampoline/aarch64/common_call.h index 2bb07bff4c..c096e3494e 100644 --- a/ecmascript/compiler/trampoline/aarch64/common_call.h +++ b/ecmascript/compiler/trampoline/aarch64/common_call.h @@ -217,8 +217,19 @@ public: static void ASMFastWriteBarrier(ExtendedAssembler *assembler); + static void ASMCMCFastUpdateRSet(ExtendedAssembler *assembler); + + static void ASMCMCFastUpdateRSetAndMarkSatb(ExtendedAssembler *assembler); + + static void ASMCMCFastMarkSatb(ExtendedAssembler *assembler); + + static void ASMCMCFastDoNothing(ExtendedAssembler *assembler); + static void ASMFastSharedWriteBarrier(ExtendedAssembler *assembler, Label& needCall); private: + static void ASMCMCFastUpdateRSet(ExtendedAssembler *assembler, + const std::function &doShortcut); + static void PushCallThis(ExtendedAssembler *assembler, JSCallMode mode, Label *stackOverflow, FrameTransitionType type); diff --git a/ecmascript/compiler/trampoline/x64/asm_interpreter_call.cpp b/ecmascript/compiler/trampoline/x64/asm_interpreter_call.cpp index aece3f6475..fc4ff0b15c 100644 --- a/ecmascript/compiler/trampoline/x64/asm_interpreter_call.cpp +++ b/ecmascript/compiler/trampoline/x64/asm_interpreter_call.cpp @@ -13,11 +13,11 @@ * limitations under the License. */ - #include "ecmascript/compiler/trampoline/x64/common_call.h" #include "ecmascript/js_generator_object.h" #include "ecmascript/message_string.h" +#include "common_components/heap/allocator/region_desc.h" namespace panda::ecmascript::x64 { #define __ assembler-> @@ -1742,6 +1742,123 @@ void AsmInterpreterCall::ASMFastSharedWriteBarrier(ExtendedAssembler* assembler, } } +// ASMCMCFastUpdateRSet(GateRef glue, GateRef obj, GateRef offset, GateRef value) +// c calling convention, but preserve all general registers except %r11 +// %rd1 - glue +// %rsi - obj +// %rdx - offset +// %rcx - value +void AsmInterpreterCall::ASMCMCFastUpdateRSet(ExtendedAssembler *assembler, + const std::function &doShortcut) +{ + using common::RegionDesc; + Label checkValue; + Label checkRSet; + Label updateRSet; + { + __ Movabs(~RegionDesc::DEFAULT_REGION_UNIT_MASK, r11); + __ And(rsi, r11); + __ Movzbl(Operand(r11, RegionDesc::REGION_TYPE_IN_INLINED_METADATA_OFFSET), r11); + + __ Subl(Immediate(static_cast(RegionDesc::RegionType::THREAD_LOCAL_REGION)), r11); + __ Cmpl(Immediate(static_cast(RegionDesc::RegionType::FROM_REGION) - + static_cast(RegionDesc::RegionType::THREAD_LOCAL_REGION)), r11); + __ Ja(&checkValue); + doShortcut(assembler); + } + __ Bind(&checkValue); + { + __ Movabs(~RegionDesc::DEFAULT_REGION_UNIT_MASK, r11); + __ And(rcx, r11); + __ Movzbl(Operand(r11, RegionDesc::REGION_TYPE_IN_INLINED_METADATA_OFFSET), r11); + __ Subl(Immediate(static_cast(RegionDesc::RegionType::THREAD_LOCAL_REGION)), r11); + __ Cmpl(Immediate(static_cast(RegionDesc::RegionType::FROM_REGION) - + static_cast(RegionDesc::RegionType::THREAD_LOCAL_REGION)), r11); + __ Jbe(&checkRSet); + doShortcut(assembler); + } + __ Bind(&checkRSet); + { + __ Push(r12); + + __ Movabs(~RegionDesc::DEFAULT_REGION_UNIT_MASK, r11); + __ And(rsi, r11); + __ Movq(Operand(r11, RegionDesc::REGION_RSET_IN_INLINED_METADATA_OFFSET), r11); + __ Addq(Immediate(common::RegionRSet::CARD_TABLE_DATA_OFFSET), r11); + + __ Movq(rsi, r12); + __ Shr(Immediate(common::kBitsPerWordLog2), r12); + constexpr uint32_t cardIndexMask = ((1 << RegionDesc::UNIT_SIZE_LOG2) - 1) >> + (common::kBitsPerWordLog2 + common::kMarkedBytesPerBitLog2) << + common::kBitsPerByteLog2; + __ Andq(Immediate(cardIndexMask), r12); + + __ Addq(r11, r12); + + __ Movq(rsi, r11); + __ Shrq(Immediate(common::kMarkedBytesPerBitLog2), r11); + constexpr uint32_t headMaskBitStartMask = ((1 << common::kBitsPerWordLog2) - 1); + __ Andq(Immediate(headMaskBitStartMask), r11); + __ Btq(r11, Operand(r12, 0)); + __ Jnb(&updateRSet); + __ Pop(r12); + doShortcut(assembler); + } + __ Bind(&updateRSet); + { + __ LockPrefix(); + __ Btsq(r11, Operand(r12, 0)); + __ Pop(r12); + } +} + +// ASMCMCFastUpdateRSet(GateRef glue, GateRef obj, GateRef offset, GateRef value) +// c calling convention, but preserve all general registers except %r11 +// %rd1 - glue +// %rsi - obj +// %rdx - offset +// %rcx - value +void AsmInterpreterCall::ASMCMCFastUpdateRSet(ExtendedAssembler *assembler) +{ + __ BindAssemblerStub(RTSTUB_ID(ASMCMCFastUpdateRSet)); + ASMCMCFastUpdateRSet(assembler, [](ExtendedAssembler *assembler) { + __ Ret(); + }); + __ Ret(); +} + +void AsmInterpreterCall::ASMCMCFastUpdateRSetAndMarkSatb(ExtendedAssembler *assembler) +{ + __ BindAssemblerStub(RTSTUB_ID(ASMCMCFastUpdateRSetAndMarkSatb)); + Label markSatb; + ASMCMCFastUpdateRSet(assembler, [&markSatb](ExtendedAssembler *assembler) { + __ Jmp(&markSatb); + }); + __ Bind(&markSatb); + { + int32_t rtMarkInSatb = static_cast(JSThread::GlueData::GetRTStubEntriesOffset(false)) + + RTSTUB_ID(MarkInBuffer) * FRAME_SLOT_SIZE; + __ Movq(Operand(rdi, rtMarkInSatb), r11); + PreserveMostCall(assembler); + } +} + +void AsmInterpreterCall::ASMCMCFastMarkSatb(ExtendedAssembler *assembler) +{ + __ BindAssemblerStub(RTSTUB_ID(ASMCMCFastMarkSatb)); + int32_t rtMarkInSatb = static_cast(JSThread::GlueData::GetRTStubEntriesOffset(false)) + + RTSTUB_ID(MarkInBuffer) * FRAME_SLOT_SIZE; + __ Movq(Operand(rdi, rtMarkInSatb), r11); + PreserveMostCall(assembler); +} + + +void AsmInterpreterCall::ASMCMCFastDoNothing(ExtendedAssembler *assembler) +{ + __ BindAssemblerStub(RTSTUB_ID(ASMCMCFastDoNothing)); + __ Ret(); +} + void AsmInterpreterCall::PushUndefinedWithArgcAndCheckStack(ExtendedAssembler *assembler, Register glue, Register argc, Register op1, Register op2, Label *stackOverflow) { diff --git a/ecmascript/compiler/trampoline/x64/common_call.h b/ecmascript/compiler/trampoline/x64/common_call.h index 2143c13bc8..8905b78d50 100644 --- a/ecmascript/compiler/trampoline/x64/common_call.h +++ b/ecmascript/compiler/trampoline/x64/common_call.h @@ -185,6 +185,15 @@ public: static void ResumeRspAndRollback(ExtendedAssembler *assembler); static void ASMFastWriteBarrier(ExtendedAssembler *assembler); + + static void ASMCMCFastUpdateRSet(ExtendedAssembler *assembler); + + static void ASMCMCFastUpdateRSetAndMarkSatb(ExtendedAssembler *assembler); + + static void ASMCMCFastMarkSatb(ExtendedAssembler *assembler); + + static void ASMCMCFastDoNothing(ExtendedAssembler *assembler); + private: static void PushFrameState(ExtendedAssembler *assembler, Register prevSpRegister, Register fpRegister, Register callTargetRegister, Register thisRegister, Register methodRegister, Register pcRegister, @@ -223,6 +232,8 @@ private: Label *fastPathEntry, Label *pushCallThis, Label *stackOverflow); static void PreserveMostCall(ExtendedAssembler* assembler); static void ASMFastSharedWriteBarrier(ExtendedAssembler *assembler, Label &needcall); + static void ASMCMCFastUpdateRSet(ExtendedAssembler *assembler, + const std::function &doShortcut); friend class OptimizedCall; friend class BaselineCall; }; diff --git a/ecmascript/ecma_vm.cpp b/ecmascript/ecma_vm.cpp index b8fa05a65b..e270799b56 100644 --- a/ecmascript/ecma_vm.cpp +++ b/ecmascript/ecma_vm.cpp @@ -388,8 +388,11 @@ bool EcmaVM::Initialize() quickFixManager_ = new QuickFixManager(); if (options_.GetEnableAsmInterpreter()) { LoadStubFile(); + // Since stubs are loaded after RegisterJSThread, we need to update the SetValueWithBarrier stub of CMC + if (g_isEnableCMCGC) { + UpdateCMCWriteBarrierStub(thread_, thread_->GetCMCGCPhase(), thread_->GetCMCGCReason()); + } } - callTimer_ = new FunctionCallTimer(); strategy_ = new ThroughputJSObjectResizingStrategy(); SetRegisterSymbols(SymbolTable::Create(thread_).GetTaggedValue()); diff --git a/ecmascript/mem/cmc_gc/hooks.cpp b/ecmascript/mem/cmc_gc/hooks.cpp index 18fb9f4ec0..7a4e4766cd 100644 --- a/ecmascript/mem/cmc_gc/hooks.cpp +++ b/ecmascript/mem/cmc_gc/hooks.cpp @@ -25,6 +25,7 @@ #include "ecmascript/mem/tagged_state_word.h" #include "ecmascript/mem/visitor.h" #include "ecmascript/runtime.h" +#include "ecmascript/interpreter/interpreter-inl.h" #include "objects/base_type.h" #include "objects/composite_base_class.h" @@ -274,8 +275,10 @@ void VisitJSThread(void *jsThread, CommonRootVisitor visitor) void SynchronizeGCPhaseToJSThread(void *jsThread, GCPhase gcPhase) { - reinterpret_cast(jsThread)->SetCMCGCPhase(gcPhase); - reinterpret_cast(jsThread)->SetCMCGCReason(Heap::GetHeap().GetGCReason()); + static_cast(jsThread)->SetCMCGCPhase(gcPhase); + GCReason gcReason = Heap::GetHeap().GetGCReason(); + static_cast(jsThread)->SetCMCGCReason(gcReason); + UpdateCMCWriteBarrierStub(jsThread, gcPhase, gcReason); if (panda::ecmascript::g_isEnableCMCGC) { // forcely enable read barrier for read barrier DFX #ifdef ENABLE_CMC_RB_DFX @@ -298,6 +301,45 @@ void MarkThreadLocalJitFortInstalled(void *thread, void *machineCode) ->MarkJitFortMemInstalled(reinterpret_cast(machineCode)); } +void UpdateCMCWriteBarrierStub(void *jsThread, GCPhase gcPhase, GCReason gcReason) +{ + using panda::ecmascript::kungfu::RuntimeStubCSigns; + using panda::ecmascript::CommonStubCSigns; + panda::ecmascript::Address barrierAddr; + auto *thread = static_cast(jsThread); + switch (gcPhase) { + case GC_PHASE_ENUM: + case GC_PHASE_MARK: + if (gcReason == GC_REASON_YOUNG) { + barrierAddr = thread->GetRTInterface(RuntimeStubCSigns::ID_ASMCMCFastUpdateRSetAndMarkSatb); + } else { + barrierAddr = thread->GetRTInterface(RuntimeStubCSigns::ID_ASMCMCFastMarkSatb); + } + break; + case GC_PHASE_POST_MARK: + if (gcReason == GC_REASON_YOUNG) { + barrierAddr = thread->GetRTInterface(RuntimeStubCSigns::ID_ASMCMCFastUpdateRSet); + } else { + barrierAddr = thread->GetRTInterface(RuntimeStubCSigns::ID_ASMCMCFastDoNothing); + } + break; + case GC_PHASE_REMARK_SATB: + case GC_PHASE_FINAL_MARK: + barrierAddr = thread->GetRTInterface(RuntimeStubCSigns::ID_ASMCMCFastUpdateRSetAndMarkSatb); + break; + case GC_PHASE_IDLE: + case GC_PHASE_PRECOPY: + case GC_PHASE_COPY: + case GC_PHASE_FIX: + barrierAddr = thread->GetRTInterface(RuntimeStubCSigns::ID_ASMCMCFastUpdateRSet); + break; + default: + LOG_ECMA(FATAL) << "Unknown GC phase " << gcPhase; + UNREACHABLE(); + } + thread->RegisterRTInterface(RuntimeStubCSigns::ID_ASMFastWriteBarrier, barrierAddr); +} + void SweepThreadLocalJitFort() { panda::ecmascript::Runtime* runtime = panda::ecmascript::Runtime::GetInstance(); diff --git a/ecmascript/stubs/runtime_stub_list.h b/ecmascript/stubs/runtime_stub_list.h index b51b764fdd..bac1c37142 100644 --- a/ecmascript/stubs/runtime_stub_list.h +++ b/ecmascript/stubs/runtime_stub_list.h @@ -56,7 +56,12 @@ namespace panda::ecmascript { V(CallContainersArgs2) \ V(CallContainersArgs3) \ V(CallReturnWithArgv) \ - V(ASMFastWriteBarrier) + V(ASMFastWriteBarrier) \ + V(ASMCMCFastUpdateRSet) \ + V(ASMCMCFastUpdateRSetAndMarkSatb) \ + V(ASMCMCFastMarkSatb) \ + V(ASMCMCFastDoNothing) + #define BASELINE_TRAMPOLINE_LIST(V) \ V(CallArg0AndCheckToBaseline) \ diff --git a/ecmascript/stubs/runtime_stubs.cpp b/ecmascript/stubs/runtime_stubs.cpp index 9b13b9f3df..524b482bd5 100644 --- a/ecmascript/stubs/runtime_stubs.cpp +++ b/ecmascript/stubs/runtime_stubs.cpp @@ -5018,10 +5018,12 @@ bool RuntimeStubs::MarkRSetCardTable(BaseObject* obj) return region->MarkRSetCardTable(obj); } -void RuntimeStubs::MarkInBuffer(BaseObject* ref) +void RuntimeStubs::MarkInBuffer(uintptr_t argGlue, [[maybe_unused]] BaseObject *obj, [[maybe_unused]] uintptr_t offset, + BaseObject *ref) { - ref = reinterpret_cast(reinterpret_cast(ref) & ~(common::Barrier::TAG_WEAK)); - common::Mutator* mutator = common::Mutator::GetMutator(); + ref = reinterpret_cast(reinterpret_cast(ref) & ~(common::Barrier::TAG_WEAK)); + auto thread = JSThread::GlueToJSThread(argGlue); + common::Mutator *mutator = static_cast(thread->GetThreadHolder()->GetMutator()); mutator->RememberObjectInSatbBuffer(ref); } diff --git a/ecmascript/stubs/runtime_stubs.h b/ecmascript/stubs/runtime_stubs.h index 9305cb5491..1cb5a63406 100644 --- a/ecmascript/stubs/runtime_stubs.h +++ b/ecmascript/stubs/runtime_stubs.h @@ -183,7 +183,7 @@ public: static void TraceLazyDeoptCommitSuccess(uintptr_t argGlue, JSHandle func); static JSTaggedValue GetExternalModuleVar(uintptr_t argGlue, JSFunction *jsFunc, int32_t index); static bool MarkRSetCardTable(BaseObject* obj); - static void MarkInBuffer(BaseObject* ref); + static void MarkInBuffer(uintptr_t argGlue, BaseObject* obj, uintptr_t offset, BaseObject* ref); static void BatchMarkInBuffer(void* src, size_t count); private: -- Gitee