diff --git a/src/cmd/compile/internal/ssagen/intrinsics.go b/src/cmd/compile/internal/ssagen/intrinsics.go index e4da86db51ad8b2ca143290c75dab28d47484067..45a27428b856dfee8bbf0870f527f247a7ec4325 100644 --- a/src/cmd/compile/internal/ssagen/intrinsics.go +++ b/src/cmd/compile/internal/ssagen/intrinsics.go @@ -380,7 +380,9 @@ func initIntrinsics(cfg *intrinsicBuildConfig) { makeAtomicGuardedIntrinsicARM64common := func(op0, op1 ssa.Op, typ types.Kind, emit atomicOpEmitter, needReturn bool) intrinsicBuilder { return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - if cfg.goarm64.LSE { + if buildcfg.GOARM64.LSE { + emit(s, n, args, op0, typ, needReturn) + } else if cfg.goarm64.LSE { emit(s, n, args, op1, typ, needReturn) } else { // Target Atomic feature is identified by dynamic detection diff --git a/src/cmd/go/internal/work/gc.go b/src/cmd/go/internal/work/gc.go index 3a173efee88ae41e5ad806d66efc08efeccffd71..3ce12219c0acefe03e8914eed88bb834c25faaa8 100644 --- a/src/cmd/go/internal/work/gc.go +++ b/src/cmd/go/internal/work/gc.go @@ -383,6 +383,9 @@ func asmArgs(a *Action, p *load.Package) []any { if err == nil && g.LSE { args = append(args, "-D", "GOARM64_LSE") } + if err == nil && g.KPAtomicOpt { + args = append(args, "-D", "KPAtomicOpt") + } } return args diff --git a/src/internal/buildcfg/cfg.go b/src/internal/buildcfg/cfg.go index fca09bf8d3384a935582f6847af1389cfa204ba5..e115d7f95a0fcfaecdc7c121da47cb8f10cfe84a 100644 --- a/src/internal/buildcfg/cfg.go +++ b/src/internal/buildcfg/cfg.go @@ -179,6 +179,8 @@ type Goarm64Features struct { // * FEAT_SHA1, which includes the SHA1* instructions. // * FEAT_SHA256, which includes the SHA256* instructions. Crypto bool + // Kunpeng atomic optimize + KPAtomicOpt bool } func (g Goarm64Features) String() string { @@ -189,17 +191,22 @@ func (g Goarm64Features) String() string { if g.Crypto { arm64Str += ",crypto" } + if g.KPAtomicOpt { + arm64Str += ",kpatomicopt" + } return arm64Str } func ParseGoarm64(v string) (g Goarm64Features, e error) { const ( - lseOpt = ",lse" - cryptoOpt = ",crypto" + lseOpt = ",lse" + cryptoOpt = ",crypto" + kpAtomicOpt = ",kpatomicopt" ) g.LSE = false g.Crypto = false + g.KPAtomicOpt = false // We allow any combination of suffixes, in any order for { if strings.HasSuffix(v, lseOpt) { @@ -214,20 +221,24 @@ func ParseGoarm64(v string) (g Goarm64Features, e error) { continue } + if strings.HasSuffix(v, kpAtomicOpt) { + if os.Getenv("KP_AI_OPT") == "1" { + g.KPAtomicOpt = true + } + v = v[:len(v)-len(kpAtomicOpt)] + continue + } + break } switch v { - case "v8.0": - g.Version = v - case "v8.1", "v8.2", "v8.3", "v8.4", "v8.5", "v8.6", "v8.7", "v8.8", "v8.9", + case "v8.0", "v8.1", "v8.2", "v8.3", "v8.4", "v8.5", "v8.6", "v8.7", "v8.8", "v8.9", "v9.0", "v9.1", "v9.2", "v9.3", "v9.4", "v9.5": g.Version = v - // LSE extension is mandatory starting from 8.1 - g.LSE = true default: - e = fmt.Errorf("invalid GOARM64: must start with v8.{0-9} or v9.{0-5} and may optionally end in %q and/or %q", - lseOpt, cryptoOpt) + e = fmt.Errorf("invalid GOARM64: must start with v8.{0-9} or v9.{0-5} and may optionally end in %q, %q and/or %q", + lseOpt, cryptoOpt, kpAtomicOpt) g.Version = DefaultGOARM64 } diff --git a/src/reflect/asm_arm64.s b/src/reflect/asm_arm64.s index 5e91e62aa145251afc8058150ef64b9f1508f265..23677a9e6d36ffdd53d2ba73b7966130a8129ff8 100644 --- a/src/reflect/asm_arm64.s +++ b/src/reflect/asm_arm64.s @@ -19,6 +19,13 @@ #define LOCAL_RETVALID 40 #define LOCAL_REGARGS 48 +GLOBL ·kpAtomicOpt(SB),(NOPTR|WRAPPER),$1 +#ifndef KPAtomicOpt +DATA ·kpAtomicOpt(SB)/1,$0 +#else +DATA ·kpAtomicOpt(SB)/1,$1 +#endif + // The frame size of the functions below is // 32 (args of callReflect) + 8 (bool + padding) + 392 (abi.RegArgs) = 432. diff --git a/src/reflect/type.go b/src/reflect/type.go index 0e41a6db992e1cd202dcb484ac0eb2cd56bbee2e..0a25ccf7a1140b7f9c9b7e3c4fedcff0717d000f 100644 --- a/src/reflect/type.go +++ b/src/reflect/type.go @@ -1808,9 +1808,18 @@ func ChanOf(dir ChanDir, t Type) Type { } var funcTypes []Type -var funcTypesMutex sync.Mutex +var funcTypesMutex sync.RWMutex +var kpAtomicOpt bool func initFuncTypes(n int) Type { + if kpAtomicOpt { + funcTypesMutex.RLock() + if n < len(funcTypes) && funcTypes[n] != nil { + defer funcTypesMutex.RUnlock() + return funcTypes[n] + } + funcTypesMutex.RUnlock() + } funcTypesMutex.Lock() defer funcTypesMutex.Unlock() if n >= len(funcTypes) { diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index 73d663f7f59e67986a6603dfe9b09c03412f99ca..1f5481acd8d38d9ed0f04dcb4a9c0f059239ab25 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -1092,6 +1092,9 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { if debug.malloc { postMallocgcDebug(x, elemsize, typ) } + + sys.Prefetch(uintptr(unsafe.Add(x, size))) + return x } diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 48001cfdb9463182914fb63efb70106f504f60b0..fecb5f88d53c3a5e384d3fadf9b3b2bcad0023bd 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -184,7 +184,7 @@ func gcinit() { // Initialize GC pacer state. // Use the environment variable GOGC for the initial gcPercent value. // Use the environment variable GOMEMLIMIT for the initial memoryLimit value. - gcController.init(readGOGC(), readGOMEMLIMIT()) + gcController.init(readGOGC(), readGOMEMLIMIT(), readGOGCRATIO()) work.startSema = 1 work.markDoneSema = 1 diff --git a/src/runtime/mgclimit.go b/src/runtime/mgclimit.go index ad86fbd65bceb34fcf667c0b7d4c7c482b0b55a4..ae35ee8d91c07cbfcc3cbc4453c95e7449ae82ff 100644 --- a/src/runtime/mgclimit.go +++ b/src/runtime/mgclimit.go @@ -230,7 +230,7 @@ func (l *gcCPULimiterState) updateLocked(now int64) { // Compute total GC time. windowGCTime := assistTime if l.gcEnabled { - windowGCTime += int64(float64(windowTotalTime) * gcBackgroundUtilization) + windowGCTime += int64(float64(windowTotalTime) * gcController.gcRatio) } // Subtract out all idle time from the total time. Do this after computing diff --git a/src/runtime/mgcpacer.go b/src/runtime/mgcpacer.go index 3e80fae4f534647bdc9dee3ac8adbcd8d536c272..7e11293458a4b0499b1bd688d9857d3abad2010e 100644 --- a/src/runtime/mgcpacer.go +++ b/src/runtime/mgcpacer.go @@ -12,30 +12,6 @@ import ( ) const ( - // gcGoalUtilization is the goal CPU utilization for - // marking as a fraction of GOMAXPROCS. - // - // Increasing the goal utilization will shorten GC cycles as the GC - // has more resources behind it, lessening costs from the write barrier, - // but comes at the cost of increasing mutator latency. - gcGoalUtilization = gcBackgroundUtilization - - // gcBackgroundUtilization is the fixed CPU utilization for background - // marking. It must be <= gcGoalUtilization. The difference between - // gcGoalUtilization and gcBackgroundUtilization will be made up by - // mark assists. The scheduler will aim to use within 50% of this - // goal. - // - // As a general rule, there's little reason to set gcBackgroundUtilization - // < gcGoalUtilization. One reason might be in mostly idle applications, - // where goroutines are unlikely to assist at all, so the actual - // utilization will be lower than the goal. But this is moot point - // because the idle mark workers already soak up idle CPU resources. - // These two values are still kept separate however because they are - // distinct conceptually, and in previous iterations of the pacer the - // distinction was more important. - gcBackgroundUtilization = 0.25 - // gcCreditSlack is the amount of scan work credit that can // accumulate locally before updating gcController.heapScanWork and, // optionally, gcController.bgScanCredit. Lower values give a more @@ -72,6 +48,15 @@ const ( // to maintain the memory limit. memoryLimitHeapGoalHeadroomPercent = 3 ) +// gcGoalUtilization is the goal CPU utilization for +// marking as a fraction of GOMAXPROCS. +// +// Increasing the goal utilization will shorten GC cycles as the GC +// has more resources behind it, lessening costs from the write barrier, +// but comes at the cost of increasing mutator latency. +var gcGoalUtilization = gcController.gcRatio + + // gcController implements the GC pacing controller that determines // when to trigger concurrent garbage collection and how much marking @@ -88,6 +73,11 @@ const ( var gcController gcControllerState type gcControllerState struct { + // gcBackgroundUtilization be optional, value equals gcratio/100.0. + // Initialized from GOGCRATIO, which in the range of (1, 99). + // Default GOGCRATIO is 25. + gcRatio float64 + // Initialized from GOGC. GOGC=off means no GC. gcPercent atomic.Int32 @@ -366,11 +356,12 @@ type gcControllerState struct { _ cpu.CacheLinePad } -func (c *gcControllerState) init(gcPercent int32, memoryLimit int64) { +func (c *gcControllerState) init(gcPercent int32, memoryLimit int64, gcRatio float64) { c.heapMinimum = defaultHeapMinimum c.triggered = ^uint64(0) c.setGCPercent(gcPercent) c.setMemoryLimit(memoryLimit) + c.setGOGCRatio(gcRatio) c.commit(true) // No sweep phase in the first GC cycle. // N.B. Don't bother calling traceHeapGoal. Tracing is never enabled at // initialization time. @@ -398,7 +389,7 @@ func (c *gcControllerState) startCycle(markStartTime int64, procs int, trigger g // dedicated workers so that the utilization is closest to // 25%. For small GOMAXPROCS, this would introduce too much // error, so we add fractional workers in that case. - totalUtilizationGoal := float64(procs) * gcBackgroundUtilization + totalUtilizationGoal := float64(procs) * gcController.gcRatio dedicatedMarkWorkersNeeded := int64(totalUtilizationGoal + 0.5) utilError := float64(dedicatedMarkWorkersNeeded)/totalUtilizationGoal - 1 const maxUtilError = 0.3 @@ -604,7 +595,7 @@ func (c *gcControllerState) endCycle(now int64, procs int, userForced bool) { assistDuration := now - c.markStartTime // Assume background mark hit its utilization goal. - utilization := gcBackgroundUtilization + utilization := gcController.gcRatio // Add assist utilization; avoid divide by zero. if assistDuration > 0 { utilization += float64(c.assistTime.Load()) / float64(assistDuration*int64(procs)) @@ -1344,6 +1335,39 @@ func readGOMEMLIMIT() int64 { return n } +func (c *gcControllerState) setGOGCRatio(in float64) float64 { + if !c.test { + assertWorldStoppedOrLockHeld(&mheap_.lock) + } + + out := c.gcRatio + c.gcRatio = in + + return out +} + +func readGOGCRATIO() float64 { + p := gogetenv("GOGCRATIO") + if p == "" { + return 0.25 + } + n, ok := parseByteCount(p) + if !ok { + print("GOGCRATIO=", p, "\n") + throw("malformed GOGCRATIO; get the wrong value") + } + + if n < 1 { + n = 1 + } else if n > 99 { + n = 99 + } + + out := float64(n)/100.0 + + return out +} + // addIdleMarkWorker attempts to add a new idle mark worker. // // If this returns true, the caller must become an idle mark worker unless