// gcFlushBgCredit flushes scanWork units of background scan work // credit. This first satisfies blocked assists on the // work.assistQueue and then flushes any remaining credit to // gcController.bgScanCredit. // // Write barriers are disallowed because this is used by gcDrain after // it has ensured that all work is drained and this must preserve that // condition. // //go:nowritebarrierrec func gcFlushBgCredit(scanWork int64) { if work.assistQueue.head == 0 { // Fast path; there are no blocked assists. There's a // small window here where an assist may add itself to // the blocked queue and park. If that happens, we'll // just get it on the next flush. atomic.Xaddint64(&gcController.bgScanCredit, scanWork) return } scanBytes := int64(float64(scanWork) * gcController.assistBytesPerWork) lock(&work.assistQueue.lock) gp := work.assistQueue.head.ptr() for gp != nil && scanBytes > 0 { // Note that gp.gcAssistBytes is negative because gp // is in debt. Think carefully about the signs below. if scanBytes+gp.gcAssistBytes >= 0 { // Satisfy this entire assist debt. scanBytes += gp.gcAssistBytes gp.gcAssistBytes = 0 xgp := gp gp = gp.schedlink.ptr() ready(xgp, 0) } else { // Partially satisfy this assist. gp.gcAssistBytes += scanBytes scanBytes = 0 // As a heuristic, we move this assist to the // back of the queue so that large assists // can't clog up the assist queue and // substantially delay small assists. xgp := gp gp = gp.schedlink.ptr() if gp == nil { // gp is the only assist in the queue. gp = xgp } else { xgp.schedlink = 0 work.assistQueue.tail.ptr().schedlink.set(xgp) work.assistQueue.tail.set(xgp) } break } } work.assistQueue.head.set(gp) if gp == nil { work.assistQueue.tail.set(nil) } if scanBytes > 0 { // Convert from scan bytes back to work. scanWork = int64(float64(scanBytes) * gcController.assistWorkPerByte) atomic.Xaddint64(&gcController.bgScanCredit, scanWork) } unlock(&work.assistQueue.lock) }
// gcDrainN blackens grey objects until it has performed roughly // scanWork units of scan work or the G is preempted. This is // best-effort, so it may perform less work if it fails to get a work // buffer. Otherwise, it will perform at least n units of work, but // may perform more because scanning is always done in whole object // increments. It returns the amount of scan work performed. //go:nowritebarrier func gcDrainN(gcw *gcWork, scanWork int64) int64 { if !writeBarrier.needed { throw("gcDrainN phase incorrect") } // There may already be scan work on the gcw, which we don't // want to claim was done by this call. workFlushed := -gcw.scanWork gp := getg().m.curg for !gp.preempt && workFlushed+gcw.scanWork < scanWork { // This might be a good place to add prefetch code... // if(wbuf.nobj > 4) { // PREFETCH(wbuf->obj[wbuf.nobj - 3]; // } b := gcw.tryGet() if b == 0 { break } scanobject(b, gcw) // Flush background scan work credit. if gcw.scanWork >= gcCreditSlack { atomic.Xaddint64(&gcController.scanWork, gcw.scanWork) workFlushed += gcw.scanWork gcw.scanWork = 0 } } // Unlike gcDrain, there's no need to flush remaining work // here because this never flushes to bgScanCredit and // gcw.dispose will flush any remaining work to scanWork. return workFlushed + gcw.scanWork }
// dispose returns any cached pointers to the global queue. // The buffers are being put on the full queue so that the // write barriers will not simply reacquire them before the // GC can inspect them. This helps reduce the mutator's // ability to hide pointers during the concurrent mark phase. // //go:nowritebarrier func (w *gcWork) dispose() { if wbuf := w.wbuf1.ptr(); wbuf != nil { if wbuf.nobj == 0 { putempty(wbuf, 212) } else { putfull(wbuf, 214) } w.wbuf1 = 0 wbuf = w.wbuf2.ptr() if wbuf.nobj == 0 { putempty(wbuf, 218) } else { putfull(wbuf, 220) } w.wbuf2 = 0 } if w.bytesMarked != 0 { // dispose happens relatively infrequently. If this // atomic becomes a problem, we should first try to // dispose less and if necessary aggregate in a per-P // counter. atomic.Xadd64(&work.bytesMarked, int64(w.bytesMarked)) w.bytesMarked = 0 } if w.scanWork != 0 { atomic.Xaddint64(&gcController.scanWork, w.scanWork) w.scanWork = 0 } }
// gcDrainN blackens grey objects until it has performed roughly // scanWork units of scan work or the G is preempted. This is // best-effort, so it may perform less work if it fails to get a work // buffer. Otherwise, it will perform at least n units of work, but // may perform more because scanning is always done in whole object // increments. It returns the amount of scan work performed. // // The caller goroutine must be in a preemptible state (e.g., // _Gwaiting) to prevent deadlocks during stack scanning. As a // consequence, this must be called on the system stack. // //go:nowritebarrier //go:systemstack func gcDrainN(gcw *gcWork, scanWork int64) int64 { if !writeBarrier.needed { throw("gcDrainN phase incorrect") } // There may already be scan work on the gcw, which we don't // want to claim was done by this call. workFlushed := -gcw.scanWork gp := getg().m.curg for !gp.preempt && workFlushed+gcw.scanWork < scanWork { // See gcDrain comment. if work.full == 0 { gcw.balance() } // This might be a good place to add prefetch code... // if(wbuf.nobj > 4) { // PREFETCH(wbuf->obj[wbuf.nobj - 3]; // } // b := gcw.tryGetFast() if b == 0 { b = gcw.tryGet() } if b == 0 { // Try to do a root job. // // TODO: Assists should get credit for this // work. if work.markrootNext < work.markrootJobs { job := atomic.Xadd(&work.markrootNext, +1) - 1 if job < work.markrootJobs { markroot(gcw, job) continue } } // No heap or root jobs. break } scanobject(b, gcw) // Flush background scan work credit. if gcw.scanWork >= gcCreditSlack { atomic.Xaddint64(&gcController.scanWork, gcw.scanWork) workFlushed += gcw.scanWork gcw.scanWork = 0 } } // Unlike gcDrain, there's no need to flush remaining work // here because this never flushes to bgScanCredit and // gcw.dispose will flush any remaining work to scanWork. return workFlushed + gcw.scanWork }
// gcFlushBgCredit flushes scanWork units of background scan work // credit. This first satisfies blocked assists on the // work.assistQueue and then flushes any remaining credit to // gcController.bgScanCredit. // // Write barriers are disallowed because this is used by gcDrain after // it has ensured that all work is drained and this must preserve that // condition. // //go:nowritebarrierrec func gcFlushBgCredit(scanWork int64) { if work.assistQueue.head == 0 { // Fast path; there are no blocked assists. There's a // small window here where an assist may add itself to // the blocked queue and park. If that happens, we'll // just get it on the next flush. atomic.Xaddint64(&gcController.bgScanCredit, scanWork) return } scanBytes := int64(float64(scanWork) * gcController.assistBytesPerWork) lock(&work.assistQueue.lock) gp := work.assistQueue.head.ptr() for gp != nil && scanBytes > 0 { // Note that gp.gcAssistBytes is negative because gp // is in debt. Think carefully about the signs below. if scanBytes+gp.gcAssistBytes >= 0 { // Satisfy this entire assist debt. scanBytes += gp.gcAssistBytes gp.gcAssistBytes = 0 xgp := gp gp = gp.schedlink.ptr() // It's important that we *not* put xgp in // runnext. Otherwise, it's possible for user // code to exploit the GC worker's high // scheduler priority to get itself always run // before other goroutines and always in the // fresh quantum started by GC. ready(xgp, 0, false) } else { // Partially satisfy this assist. gp.gcAssistBytes += scanBytes scanBytes = 0 // As a heuristic, we move this assist to the // back of the queue so that large assists // can't clog up the assist queue and // substantially delay small assists. xgp := gp gp = gp.schedlink.ptr() if gp == nil { // gp is the only assist in the queue. gp = xgp } else { xgp.schedlink = 0 work.assistQueue.tail.ptr().schedlink.set(xgp) work.assistQueue.tail.set(xgp) } break } } work.assistQueue.head.set(gp) if gp == nil { work.assistQueue.tail.set(nil) } if scanBytes > 0 { // Convert from scan bytes back to work. scanWork = int64(float64(scanBytes) * gcController.assistWorkPerByte) atomic.Xaddint64(&gcController.bgScanCredit, scanWork) } unlock(&work.assistQueue.lock) }
// gcAssistAlloc1 is the part of gcAssistAlloc that runs on the system // stack. This is a separate function to make it easier to see that // we're not capturing anything from the user stack, since the user // stack may move while we're in this function. // // gcAssistAlloc1 indicates whether this assist completed the mark // phase by setting gp.param to non-nil. This can't be communicated on // the stack since it may move. // //go:systemstack func gcAssistAlloc1(gp *g, scanWork int64) { // Clear the flag indicating that this assist completed the // mark phase. gp.param = nil if atomic.Load(&gcBlackenEnabled) == 0 { // The gcBlackenEnabled check in malloc races with the // store that clears it but an atomic check in every malloc // would be a performance hit. // Instead we recheck it here on the non-preemptable system // stack to determine if we should preform an assist. // GC is done, so ignore any remaining debt. gp.gcAssistBytes = 0 return } // Track time spent in this assist. Since we're on the // system stack, this is non-preemptible, so we can // just measure start and end time. startTime := nanotime() decnwait := atomic.Xadd(&work.nwait, -1) if decnwait == work.nproc { println("runtime: work.nwait =", decnwait, "work.nproc=", work.nproc) throw("nwait > work.nprocs") } // gcDrainN requires the caller to be preemptible. casgstatus(gp, _Grunning, _Gwaiting) gp.waitreason = "GC assist marking" // drain own cached work first in the hopes that it // will be more cache friendly. gcw := &getg().m.p.ptr().gcw workDone := gcDrainN(gcw, scanWork) // If we are near the end of the mark phase // dispose of the gcw. if gcBlackenPromptly { gcw.dispose() } casgstatus(gp, _Gwaiting, _Grunning) // Record that we did this much scan work. // // Back out the number of bytes of assist credit that // this scan work counts for. The "1+" is a poor man's // round-up, to ensure this adds credit even if // assistBytesPerWork is very low. gp.gcAssistBytes += 1 + int64(gcController.assistBytesPerWork*float64(workDone)) // If this is the last worker and we ran out of work, // signal a completion point. incnwait := atomic.Xadd(&work.nwait, +1) if incnwait > work.nproc { println("runtime: work.nwait=", incnwait, "work.nproc=", work.nproc, "gcBlackenPromptly=", gcBlackenPromptly) throw("work.nwait > work.nproc") } if incnwait == work.nproc && !gcMarkWorkAvailable(nil) { // This has reached a background completion point. Set // gp.param to a non-nil value to indicate this. It // doesn't matter what we set it to (it just has to be // a valid pointer). gp.param = unsafe.Pointer(gp) } duration := nanotime() - startTime _p_ := gp.m.p.ptr() _p_.gcAssistTime += duration if _p_.gcAssistTime > gcAssistTimeSlack { atomic.Xaddint64(&gcController.assistTime, _p_.gcAssistTime) _p_.gcAssistTime = 0 } }
// gcAssistAlloc performs GC work to make gp's assist debt positive. // gp must be the calling user gorountine. // // This must be called with preemption enabled. func gcAssistAlloc(gp *g) { // Don't assist in non-preemptible contexts. These are // generally fragile and won't allow the assist to block. if getg() == gp.m.g0 { return } if mp := getg().m; mp.locks > 0 || mp.preemptoff != "" { return } retry: // Compute the amount of scan work we need to do to make the // balance positive. When the required amount of work is low, // we over-assist to build up credit for future allocations // and amortize the cost of assisting. debtBytes := -gp.gcAssistBytes scanWork := int64(gcController.assistWorkPerByte * float64(debtBytes)) if scanWork < gcOverAssistWork { scanWork = gcOverAssistWork debtBytes = int64(gcController.assistBytesPerWork * float64(scanWork)) } // Steal as much credit as we can from the background GC's // scan credit. This is racy and may drop the background // credit below 0 if two mutators steal at the same time. This // will just cause steals to fail until credit is accumulated // again, so in the long run it doesn't really matter, but we // do have to handle the negative credit case. bgScanCredit := atomic.Loadint64(&gcController.bgScanCredit) stolen := int64(0) if bgScanCredit > 0 { if bgScanCredit < scanWork { stolen = bgScanCredit gp.gcAssistBytes += 1 + int64(gcController.assistBytesPerWork*float64(stolen)) } else { stolen = scanWork gp.gcAssistBytes += debtBytes } atomic.Xaddint64(&gcController.bgScanCredit, -stolen) scanWork -= stolen if scanWork == 0 { // We were able to steal all of the credit we // needed. return } } // Perform assist work systemstack(func() { gcAssistAlloc1(gp, scanWork) // The user stack may have moved, so this can't touch // anything on it until it returns from systemstack. }) completed := gp.param != nil gp.param = nil if completed { gcMarkDone() } if gp.gcAssistBytes < 0 { // We were unable steal enough credit or perform // enough work to pay off the assist debt. We need to // do one of these before letting the mutator allocate // more to prevent over-allocation. // // If this is because we were preempted, reschedule // and try some more. if gp.preempt { Gosched() goto retry } // Add this G to an assist queue and park. When the GC // has more background credit, it will satisfy queued // assists before flushing to the global credit pool. // // Note that this does *not* get woken up when more // work is added to the work list. The theory is that // there wasn't enough work to do anyway, so we might // as well let background marking take care of the // work that is available. if !gcParkAssist() { goto retry } // At this point either background GC has satisfied // this G's assist debt, or the GC cycle is over. } }
// gcDrain scans roots and objects in work buffers, blackening grey // objects until all roots and work buffers have been drained. // // If flags&gcDrainUntilPreempt != 0, gcDrain returns when g.preempt // is set. This implies gcDrainNoBlock. // // If flags&gcDrainIdle != 0, gcDrain returns when there is other work // to do. This implies gcDrainNoBlock. // // If flags&gcDrainNoBlock != 0, gcDrain returns as soon as it is // unable to get more work. Otherwise, it will block until all // blocking calls are blocked in gcDrain. // // If flags&gcDrainFlushBgCredit != 0, gcDrain flushes scan work // credit to gcController.bgScanCredit every gcCreditSlack units of // scan work. // //go:nowritebarrier func gcDrain(gcw *gcWork, flags gcDrainFlags) { if !writeBarrier.needed { throw("gcDrain phase incorrect") } gp := getg().m.curg preemptible := flags&gcDrainUntilPreempt != 0 blocking := flags&(gcDrainUntilPreempt|gcDrainIdle|gcDrainNoBlock) == 0 flushBgCredit := flags&gcDrainFlushBgCredit != 0 idle := flags&gcDrainIdle != 0 initScanWork := gcw.scanWork // idleCheck is the scan work at which to perform the next // idle check with the scheduler. idleCheck := initScanWork + idleCheckThreshold // Drain root marking jobs. if work.markrootNext < work.markrootJobs { for !(preemptible && gp.preempt) { job := atomic.Xadd(&work.markrootNext, +1) - 1 if job >= work.markrootJobs { break } markroot(gcw, job) if idle && pollWork() { goto done } } } // Drain heap marking jobs. for !(preemptible && gp.preempt) { // Try to keep work available on the global queue. We used to // check if there were waiting workers, but it's better to // just keep work available than to make workers wait. In the // worst case, we'll do O(log(_WorkbufSize)) unnecessary // balances. if work.full == 0 { gcw.balance() } var b uintptr if blocking { b = gcw.get() } else { b = gcw.tryGetFast() if b == 0 { b = gcw.tryGet() } } if b == 0 { // work barrier reached or tryGet failed. break } scanobject(b, gcw) // Flush background scan work credit to the global // account if we've accumulated enough locally so // mutator assists can draw on it. if gcw.scanWork >= gcCreditSlack { atomic.Xaddint64(&gcController.scanWork, gcw.scanWork) if flushBgCredit { gcFlushBgCredit(gcw.scanWork - initScanWork) initScanWork = 0 } idleCheck -= gcw.scanWork gcw.scanWork = 0 if idle && idleCheck <= 0 { idleCheck += idleCheckThreshold if pollWork() { break } } } } // In blocking mode, write barriers are not allowed after this // point because we must preserve the condition that the work // buffers are empty. done: // Flush remaining scan work credit. if gcw.scanWork > 0 { atomic.Xaddint64(&gcController.scanWork, gcw.scanWork) if flushBgCredit { gcFlushBgCredit(gcw.scanWork - initScanWork) } gcw.scanWork = 0 } }
// gcDrain scans roots and objects in work buffers, blackening grey // objects until all roots and work buffers have been drained. // // If flags&gcDrainUntilPreempt != 0, gcDrain returns when g.preempt // is set. This implies gcDrainNoBlock. // // If flags&gcDrainNoBlock != 0, gcDrain returns as soon as it is // unable to get more work. Otherwise, it will block until all // blocking calls are blocked in gcDrain. // // If flags&gcDrainFlushBgCredit != 0, gcDrain flushes scan work // credit to gcController.bgScanCredit every gcCreditSlack units of // scan work. // //go:nowritebarrier func gcDrain(gcw *gcWork, flags gcDrainFlags) { if !writeBarrier.needed { throw("gcDrain phase incorrect") } gp := getg() preemtible := flags&gcDrainUntilPreempt != 0 blocking := flags&(gcDrainUntilPreempt|gcDrainNoBlock) == 0 flushBgCredit := flags&gcDrainFlushBgCredit != 0 // Drain root marking jobs. if work.markrootNext < work.markrootJobs { for blocking || !gp.preempt { job := atomic.Xadd(&work.markrootNext, +1) - 1 if job >= work.markrootJobs { break } // TODO: Pass in gcw. markroot(job) } } initScanWork := gcw.scanWork // Drain heap marking jobs. for !(preemtible && gp.preempt) { // If another proc wants a pointer, give it some. if work.nwait > 0 && work.full == 0 { gcw.balance() } var b uintptr if blocking { b = gcw.get() } else { b = gcw.tryGet() } if b == 0 { // work barrier reached or tryGet failed. break } // If the current wbuf is filled by the scan a new wbuf might be // returned that could possibly hold only a single object. This // could result in each iteration draining only a single object // out of the wbuf passed in + a single object placed // into an empty wbuf in scanobject so there could be // a performance hit as we keep fetching fresh wbufs. scanobject(b, gcw) // Flush background scan work credit to the global // account if we've accumulated enough locally so // mutator assists can draw on it. if gcw.scanWork >= gcCreditSlack { atomic.Xaddint64(&gcController.scanWork, gcw.scanWork) if flushBgCredit { gcFlushBgCredit(gcw.scanWork - initScanWork) initScanWork = 0 } gcw.scanWork = 0 } } // In blocking mode, write barriers are not allowed after this // point because we must preserve the condition that the work // buffers are empty. // Flush remaining scan work credit. if gcw.scanWork > 0 { atomic.Xaddint64(&gcController.scanWork, gcw.scanWork) if flushBgCredit { gcFlushBgCredit(gcw.scanWork - initScanWork) } gcw.scanWork = 0 } }
// gcAssistAlloc performs GC work to make gp's assist debt positive. // gp must be the calling user gorountine. // // This must be called with preemption enabled. //go:nowritebarrier func gcAssistAlloc(gp *g) { // Don't assist in non-preemptible contexts. These are // generally fragile and won't allow the assist to block. if getg() == gp.m.g0 { return } if mp := getg().m; mp.locks > 0 || mp.preemptoff != "" { return } // Compute the amount of scan work we need to do to make the // balance positive. We over-assist to build up credit for // future allocations and amortize the cost of assisting. debtBytes := -gp.gcAssistBytes + gcOverAssistBytes scanWork := int64(gcController.assistWorkPerByte * float64(debtBytes)) retry: // Steal as much credit as we can from the background GC's // scan credit. This is racy and may drop the background // credit below 0 if two mutators steal at the same time. This // will just cause steals to fail until credit is accumulated // again, so in the long run it doesn't really matter, but we // do have to handle the negative credit case. bgScanCredit := atomic.Loadint64(&gcController.bgScanCredit) stolen := int64(0) if bgScanCredit > 0 { if bgScanCredit < scanWork { stolen = bgScanCredit gp.gcAssistBytes += 1 + int64(gcController.assistBytesPerWork*float64(stolen)) } else { stolen = scanWork gp.gcAssistBytes += debtBytes } atomic.Xaddint64(&gcController.bgScanCredit, -stolen) scanWork -= stolen if scanWork == 0 { // We were able to steal all of the credit we // needed. return } } // Perform assist work completed := false systemstack(func() { if atomic.Load(&gcBlackenEnabled) == 0 { // The gcBlackenEnabled check in malloc races with the // store that clears it but an atomic check in every malloc // would be a performance hit. // Instead we recheck it here on the non-preemptable system // stack to determine if we should preform an assist. // GC is done, so ignore any remaining debt. gp.gcAssistBytes = 0 return } // Track time spent in this assist. Since we're on the // system stack, this is non-preemptible, so we can // just measure start and end time. startTime := nanotime() decnwait := atomic.Xadd(&work.nwait, -1) if decnwait == work.nproc { println("runtime: work.nwait =", decnwait, "work.nproc=", work.nproc) throw("nwait > work.nprocs") } // drain own cached work first in the hopes that it // will be more cache friendly. gcw := &getg().m.p.ptr().gcw workDone := gcDrainN(gcw, scanWork) // If we are near the end of the mark phase // dispose of the gcw. if gcBlackenPromptly { gcw.dispose() } // Record that we did this much scan work. // // Back out the number of bytes of assist credit that // this scan work counts for. The "1+" is a poor man's // round-up, to ensure this adds credit even if // assistBytesPerWork is very low. gp.gcAssistBytes += 1 + int64(gcController.assistBytesPerWork*float64(workDone)) // If this is the last worker and we ran out of work, // signal a completion point. incnwait := atomic.Xadd(&work.nwait, +1) if incnwait > work.nproc { println("runtime: work.nwait=", incnwait, "work.nproc=", work.nproc, "gcBlackenPromptly=", gcBlackenPromptly) throw("work.nwait > work.nproc") } if incnwait == work.nproc && !gcMarkWorkAvailable(nil) { // This has reached a background completion // point. completed = true } duration := nanotime() - startTime _p_ := gp.m.p.ptr() _p_.gcAssistTime += duration if _p_.gcAssistTime > gcAssistTimeSlack { atomic.Xaddint64(&gcController.assistTime, _p_.gcAssistTime) _p_.gcAssistTime = 0 } }) if completed { gcMarkDone() } if gp.gcAssistBytes < 0 { // We were unable steal enough credit or perform // enough work to pay off the assist debt. We need to // do one of these before letting the mutator allocate // more to prevent over-allocation. // // If this is because we were preempted, reschedule // and try some more. if gp.preempt { Gosched() goto retry } // Add this G to an assist queue and park. When the GC // has more background credit, it will satisfy queued // assists before flushing to the global credit pool. // // Note that this does *not* get woken up when more // work is added to the work list. The theory is that // there wasn't enough work to do anyway, so we might // as well let background marking take care of the // work that is available. lock(&work.assistQueue.lock) // If the GC cycle is over, just return. This is the // likely path if we completed above. We do this // under the lock to prevent a GC cycle from ending // between this check and queuing the assist. if atomic.Load(&gcBlackenEnabled) == 0 { unlock(&work.assistQueue.lock) return } oldHead, oldTail := work.assistQueue.head, work.assistQueue.tail if oldHead == 0 { work.assistQueue.head.set(gp) } else { oldTail.ptr().schedlink.set(gp) } work.assistQueue.tail.set(gp) gp.schedlink.set(nil) // Recheck for background credit now that this G is in // the queue, but can still back out. This avoids a // race in case background marking has flushed more // credit since we checked above. if atomic.Loadint64(&gcController.bgScanCredit) > 0 { work.assistQueue.head = oldHead work.assistQueue.tail = oldTail if oldTail != 0 { oldTail.ptr().schedlink.set(nil) } unlock(&work.assistQueue.lock) goto retry } // Park for real. goparkunlock(&work.assistQueue.lock, "GC assist wait", traceEvGoBlock, 2) // At this point either background GC has satisfied // this G's assist debt, or the GC cycle is over. } }